if_iwm - Recognize IWM_FW_PAGING_BLOCK_CMD wide cmd response correctly.
[dragonfly.git] / lib / libcrypto / asm / rsaz-x86_64.s
blob7bc8e99602860154e4b9f985a388ea8fe0df4ff3
1 .text
5 .globl rsaz_512_sqr
6 .type rsaz_512_sqr,@function
7 .align 32
8 rsaz_512_sqr:
9 pushq %rbx
10 pushq %rbp
11 pushq %r12
12 pushq %r13
13 pushq %r14
14 pushq %r15
16 subq $128+24,%rsp
17 .Lsqr_body:
18 movq %rdx,%rbp
19 movq (%rsi),%rdx
20 movq 8(%rsi),%rax
21 movq %rcx,128(%rsp)
22 movl $0x80100,%r11d
23 andl OPENSSL_ia32cap_P+8(%rip),%r11d
24 cmpl $0x80100,%r11d
25 je .Loop_sqrx
26 jmp .Loop_sqr
28 .align 32
29 .Loop_sqr:
30 movl %r8d,128+8(%rsp)
32 movq %rdx,%rbx
33 mulq %rdx
34 movq %rax,%r8
35 movq 16(%rsi),%rax
36 movq %rdx,%r9
38 mulq %rbx
39 addq %rax,%r9
40 movq 24(%rsi),%rax
41 movq %rdx,%r10
42 adcq $0,%r10
44 mulq %rbx
45 addq %rax,%r10
46 movq 32(%rsi),%rax
47 movq %rdx,%r11
48 adcq $0,%r11
50 mulq %rbx
51 addq %rax,%r11
52 movq 40(%rsi),%rax
53 movq %rdx,%r12
54 adcq $0,%r12
56 mulq %rbx
57 addq %rax,%r12
58 movq 48(%rsi),%rax
59 movq %rdx,%r13
60 adcq $0,%r13
62 mulq %rbx
63 addq %rax,%r13
64 movq 56(%rsi),%rax
65 movq %rdx,%r14
66 adcq $0,%r14
68 mulq %rbx
69 addq %rax,%r14
70 movq %rbx,%rax
71 movq %rdx,%r15
72 adcq $0,%r15
74 addq %r8,%r8
75 movq %r9,%rcx
76 adcq %r9,%r9
78 mulq %rax
79 movq %rax,(%rsp)
80 addq %rdx,%r8
81 adcq $0,%r9
83 movq %r8,8(%rsp)
84 shrq $63,%rcx
87 movq 8(%rsi),%r8
88 movq 16(%rsi),%rax
89 mulq %r8
90 addq %rax,%r10
91 movq 24(%rsi),%rax
92 movq %rdx,%rbx
93 adcq $0,%rbx
95 mulq %r8
96 addq %rax,%r11
97 movq 32(%rsi),%rax
98 adcq $0,%rdx
99 addq %rbx,%r11
100 movq %rdx,%rbx
101 adcq $0,%rbx
103 mulq %r8
104 addq %rax,%r12
105 movq 40(%rsi),%rax
106 adcq $0,%rdx
107 addq %rbx,%r12
108 movq %rdx,%rbx
109 adcq $0,%rbx
111 mulq %r8
112 addq %rax,%r13
113 movq 48(%rsi),%rax
114 adcq $0,%rdx
115 addq %rbx,%r13
116 movq %rdx,%rbx
117 adcq $0,%rbx
119 mulq %r8
120 addq %rax,%r14
121 movq 56(%rsi),%rax
122 adcq $0,%rdx
123 addq %rbx,%r14
124 movq %rdx,%rbx
125 adcq $0,%rbx
127 mulq %r8
128 addq %rax,%r15
129 movq %r8,%rax
130 adcq $0,%rdx
131 addq %rbx,%r15
132 movq %rdx,%r8
133 movq %r10,%rdx
134 adcq $0,%r8
136 addq %rdx,%rdx
137 leaq (%rcx,%r10,2),%r10
138 movq %r11,%rbx
139 adcq %r11,%r11
141 mulq %rax
142 addq %rax,%r9
143 adcq %rdx,%r10
144 adcq $0,%r11
146 movq %r9,16(%rsp)
147 movq %r10,24(%rsp)
148 shrq $63,%rbx
151 movq 16(%rsi),%r9
152 movq 24(%rsi),%rax
153 mulq %r9
154 addq %rax,%r12
155 movq 32(%rsi),%rax
156 movq %rdx,%rcx
157 adcq $0,%rcx
159 mulq %r9
160 addq %rax,%r13
161 movq 40(%rsi),%rax
162 adcq $0,%rdx
163 addq %rcx,%r13
164 movq %rdx,%rcx
165 adcq $0,%rcx
167 mulq %r9
168 addq %rax,%r14
169 movq 48(%rsi),%rax
170 adcq $0,%rdx
171 addq %rcx,%r14
172 movq %rdx,%rcx
173 adcq $0,%rcx
175 mulq %r9
176 movq %r12,%r10
177 leaq (%rbx,%r12,2),%r12
178 addq %rax,%r15
179 movq 56(%rsi),%rax
180 adcq $0,%rdx
181 addq %rcx,%r15
182 movq %rdx,%rcx
183 adcq $0,%rcx
185 mulq %r9
186 shrq $63,%r10
187 addq %rax,%r8
188 movq %r9,%rax
189 adcq $0,%rdx
190 addq %rcx,%r8
191 movq %rdx,%r9
192 adcq $0,%r9
194 movq %r13,%rcx
195 leaq (%r10,%r13,2),%r13
197 mulq %rax
198 addq %rax,%r11
199 adcq %rdx,%r12
200 adcq $0,%r13
202 movq %r11,32(%rsp)
203 movq %r12,40(%rsp)
204 shrq $63,%rcx
207 movq 24(%rsi),%r10
208 movq 32(%rsi),%rax
209 mulq %r10
210 addq %rax,%r14
211 movq 40(%rsi),%rax
212 movq %rdx,%rbx
213 adcq $0,%rbx
215 mulq %r10
216 addq %rax,%r15
217 movq 48(%rsi),%rax
218 adcq $0,%rdx
219 addq %rbx,%r15
220 movq %rdx,%rbx
221 adcq $0,%rbx
223 mulq %r10
224 movq %r14,%r12
225 leaq (%rcx,%r14,2),%r14
226 addq %rax,%r8
227 movq 56(%rsi),%rax
228 adcq $0,%rdx
229 addq %rbx,%r8
230 movq %rdx,%rbx
231 adcq $0,%rbx
233 mulq %r10
234 shrq $63,%r12
235 addq %rax,%r9
236 movq %r10,%rax
237 adcq $0,%rdx
238 addq %rbx,%r9
239 movq %rdx,%r10
240 adcq $0,%r10
242 movq %r15,%rbx
243 leaq (%r12,%r15,2),%r15
245 mulq %rax
246 addq %rax,%r13
247 adcq %rdx,%r14
248 adcq $0,%r15
250 movq %r13,48(%rsp)
251 movq %r14,56(%rsp)
252 shrq $63,%rbx
255 movq 32(%rsi),%r11
256 movq 40(%rsi),%rax
257 mulq %r11
258 addq %rax,%r8
259 movq 48(%rsi),%rax
260 movq %rdx,%rcx
261 adcq $0,%rcx
263 mulq %r11
264 addq %rax,%r9
265 movq 56(%rsi),%rax
266 adcq $0,%rdx
267 movq %r8,%r12
268 leaq (%rbx,%r8,2),%r8
269 addq %rcx,%r9
270 movq %rdx,%rcx
271 adcq $0,%rcx
273 mulq %r11
274 shrq $63,%r12
275 addq %rax,%r10
276 movq %r11,%rax
277 adcq $0,%rdx
278 addq %rcx,%r10
279 movq %rdx,%r11
280 adcq $0,%r11
282 movq %r9,%rcx
283 leaq (%r12,%r9,2),%r9
285 mulq %rax
286 addq %rax,%r15
287 adcq %rdx,%r8
288 adcq $0,%r9
290 movq %r15,64(%rsp)
291 movq %r8,72(%rsp)
292 shrq $63,%rcx
295 movq 40(%rsi),%r12
296 movq 48(%rsi),%rax
297 mulq %r12
298 addq %rax,%r10
299 movq 56(%rsi),%rax
300 movq %rdx,%rbx
301 adcq $0,%rbx
303 mulq %r12
304 addq %rax,%r11
305 movq %r12,%rax
306 movq %r10,%r15
307 leaq (%rcx,%r10,2),%r10
308 adcq $0,%rdx
309 shrq $63,%r15
310 addq %rbx,%r11
311 movq %rdx,%r12
312 adcq $0,%r12
314 movq %r11,%rbx
315 leaq (%r15,%r11,2),%r11
317 mulq %rax
318 addq %rax,%r9
319 adcq %rdx,%r10
320 adcq $0,%r11
322 movq %r9,80(%rsp)
323 movq %r10,88(%rsp)
326 movq 48(%rsi),%r13
327 movq 56(%rsi),%rax
328 mulq %r13
329 addq %rax,%r12
330 movq %r13,%rax
331 movq %rdx,%r13
332 adcq $0,%r13
334 xorq %r14,%r14
335 shlq $1,%rbx
336 adcq %r12,%r12
337 adcq %r13,%r13
338 adcq %r14,%r14
340 mulq %rax
341 addq %rax,%r11
342 adcq %rdx,%r12
343 adcq $0,%r13
345 movq %r11,96(%rsp)
346 movq %r12,104(%rsp)
349 movq 56(%rsi),%rax
350 mulq %rax
351 addq %rax,%r13
352 adcq $0,%rdx
354 addq %rdx,%r14
356 movq %r13,112(%rsp)
357 movq %r14,120(%rsp)
359 movq (%rsp),%r8
360 movq 8(%rsp),%r9
361 movq 16(%rsp),%r10
362 movq 24(%rsp),%r11
363 movq 32(%rsp),%r12
364 movq 40(%rsp),%r13
365 movq 48(%rsp),%r14
366 movq 56(%rsp),%r15
368 call __rsaz_512_reduce
370 addq 64(%rsp),%r8
371 adcq 72(%rsp),%r9
372 adcq 80(%rsp),%r10
373 adcq 88(%rsp),%r11
374 adcq 96(%rsp),%r12
375 adcq 104(%rsp),%r13
376 adcq 112(%rsp),%r14
377 adcq 120(%rsp),%r15
378 sbbq %rcx,%rcx
380 call __rsaz_512_subtract
382 movq %r8,%rdx
383 movq %r9,%rax
384 movl 128+8(%rsp),%r8d
385 movq %rdi,%rsi
387 decl %r8d
388 jnz .Loop_sqr
389 jmp .Lsqr_tail
391 .align 32
392 .Loop_sqrx:
393 movl %r8d,128+8(%rsp)
394 .byte 102,72,15,110,199
395 .byte 102,72,15,110,205
397 mulxq %rax,%r8,%r9
399 mulxq 16(%rsi),%rcx,%r10
400 xorq %rbp,%rbp
402 mulxq 24(%rsi),%rax,%r11
403 adcxq %rcx,%r9
405 mulxq 32(%rsi),%rcx,%r12
406 adcxq %rax,%r10
408 mulxq 40(%rsi),%rax,%r13
409 adcxq %rcx,%r11
411 .byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
412 adcxq %rax,%r12
413 adcxq %rcx,%r13
415 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
416 adcxq %rax,%r14
417 adcxq %rbp,%r15
419 movq %r9,%rcx
420 shldq $1,%r8,%r9
421 shlq $1,%r8
423 xorl %ebp,%ebp
424 mulxq %rdx,%rax,%rdx
425 adcxq %rdx,%r8
426 movq 8(%rsi),%rdx
427 adcxq %rbp,%r9
429 movq %rax,(%rsp)
430 movq %r8,8(%rsp)
433 mulxq 16(%rsi),%rax,%rbx
434 adoxq %rax,%r10
435 adcxq %rbx,%r11
437 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
438 adoxq %rdi,%r11
439 adcxq %r8,%r12
441 mulxq 32(%rsi),%rax,%rbx
442 adoxq %rax,%r12
443 adcxq %rbx,%r13
445 mulxq 40(%rsi),%rdi,%r8
446 adoxq %rdi,%r13
447 adcxq %r8,%r14
449 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
450 adoxq %rax,%r14
451 adcxq %rbx,%r15
453 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
454 adoxq %rdi,%r15
455 adcxq %rbp,%r8
456 adoxq %rbp,%r8
458 movq %r11,%rbx
459 shldq $1,%r10,%r11
460 shldq $1,%rcx,%r10
462 xorl %ebp,%ebp
463 mulxq %rdx,%rax,%rcx
464 movq 16(%rsi),%rdx
465 adcxq %rax,%r9
466 adcxq %rcx,%r10
467 adcxq %rbp,%r11
469 movq %r9,16(%rsp)
470 .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
473 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
474 adoxq %rdi,%r12
475 adcxq %r9,%r13
477 mulxq 32(%rsi),%rax,%rcx
478 adoxq %rax,%r13
479 adcxq %rcx,%r14
481 mulxq 40(%rsi),%rdi,%r9
482 adoxq %rdi,%r14
483 adcxq %r9,%r15
485 .byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
486 adoxq %rax,%r15
487 adcxq %rcx,%r8
489 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
490 adoxq %rdi,%r8
491 adcxq %rbp,%r9
492 adoxq %rbp,%r9
494 movq %r13,%rcx
495 shldq $1,%r12,%r13
496 shldq $1,%rbx,%r12
498 xorl %ebp,%ebp
499 mulxq %rdx,%rax,%rdx
500 adcxq %rax,%r11
501 adcxq %rdx,%r12
502 movq 24(%rsi),%rdx
503 adcxq %rbp,%r13
505 movq %r11,32(%rsp)
506 .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
509 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
510 adoxq %rax,%r14
511 adcxq %rbx,%r15
513 mulxq 40(%rsi),%rdi,%r10
514 adoxq %rdi,%r15
515 adcxq %r10,%r8
517 mulxq 48(%rsi),%rax,%rbx
518 adoxq %rax,%r8
519 adcxq %rbx,%r9
521 mulxq 56(%rsi),%rdi,%r10
522 adoxq %rdi,%r9
523 adcxq %rbp,%r10
524 adoxq %rbp,%r10
526 .byte 0x66
527 movq %r15,%rbx
528 shldq $1,%r14,%r15
529 shldq $1,%rcx,%r14
531 xorl %ebp,%ebp
532 mulxq %rdx,%rax,%rdx
533 adcxq %rax,%r13
534 adcxq %rdx,%r14
535 movq 32(%rsi),%rdx
536 adcxq %rbp,%r15
538 movq %r13,48(%rsp)
539 movq %r14,56(%rsp)
542 .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
543 adoxq %rdi,%r8
544 adcxq %r11,%r9
546 mulxq 48(%rsi),%rax,%rcx
547 adoxq %rax,%r9
548 adcxq %rcx,%r10
550 mulxq 56(%rsi),%rdi,%r11
551 adoxq %rdi,%r10
552 adcxq %rbp,%r11
553 adoxq %rbp,%r11
555 movq %r9,%rcx
556 shldq $1,%r8,%r9
557 shldq $1,%rbx,%r8
559 xorl %ebp,%ebp
560 mulxq %rdx,%rax,%rdx
561 adcxq %rax,%r15
562 adcxq %rdx,%r8
563 movq 40(%rsi),%rdx
564 adcxq %rbp,%r9
566 movq %r15,64(%rsp)
567 movq %r8,72(%rsp)
570 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
571 adoxq %rax,%r10
572 adcxq %rbx,%r11
574 .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
575 adoxq %rdi,%r11
576 adcxq %rbp,%r12
577 adoxq %rbp,%r12
579 movq %r11,%rbx
580 shldq $1,%r10,%r11
581 shldq $1,%rcx,%r10
583 xorl %ebp,%ebp
584 mulxq %rdx,%rax,%rdx
585 adcxq %rax,%r9
586 adcxq %rdx,%r10
587 movq 48(%rsi),%rdx
588 adcxq %rbp,%r11
590 movq %r9,80(%rsp)
591 movq %r10,88(%rsp)
594 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
595 adoxq %rax,%r12
596 adoxq %rbp,%r13
598 xorq %r14,%r14
599 shldq $1,%r13,%r14
600 shldq $1,%r12,%r13
601 shldq $1,%rbx,%r12
603 xorl %ebp,%ebp
604 mulxq %rdx,%rax,%rdx
605 adcxq %rax,%r11
606 adcxq %rdx,%r12
607 movq 56(%rsi),%rdx
608 adcxq %rbp,%r13
610 .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
611 .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
614 mulxq %rdx,%rax,%rdx
615 adoxq %rax,%r13
616 adoxq %rbp,%rdx
618 .byte 0x66
619 addq %rdx,%r14
621 movq %r13,112(%rsp)
622 movq %r14,120(%rsp)
623 .byte 102,72,15,126,199
624 .byte 102,72,15,126,205
626 movq 128(%rsp),%rdx
627 movq (%rsp),%r8
628 movq 8(%rsp),%r9
629 movq 16(%rsp),%r10
630 movq 24(%rsp),%r11
631 movq 32(%rsp),%r12
632 movq 40(%rsp),%r13
633 movq 48(%rsp),%r14
634 movq 56(%rsp),%r15
636 call __rsaz_512_reducex
638 addq 64(%rsp),%r8
639 adcq 72(%rsp),%r9
640 adcq 80(%rsp),%r10
641 adcq 88(%rsp),%r11
642 adcq 96(%rsp),%r12
643 adcq 104(%rsp),%r13
644 adcq 112(%rsp),%r14
645 adcq 120(%rsp),%r15
646 sbbq %rcx,%rcx
648 call __rsaz_512_subtract
650 movq %r8,%rdx
651 movq %r9,%rax
652 movl 128+8(%rsp),%r8d
653 movq %rdi,%rsi
655 decl %r8d
656 jnz .Loop_sqrx
658 .Lsqr_tail:
660 leaq 128+24+48(%rsp),%rax
661 movq -48(%rax),%r15
662 movq -40(%rax),%r14
663 movq -32(%rax),%r13
664 movq -24(%rax),%r12
665 movq -16(%rax),%rbp
666 movq -8(%rax),%rbx
667 leaq (%rax),%rsp
668 .Lsqr_epilogue:
669 .byte 0xf3,0xc3
670 .size rsaz_512_sqr,.-rsaz_512_sqr
671 .globl rsaz_512_mul
672 .type rsaz_512_mul,@function
673 .align 32
674 rsaz_512_mul:
675 pushq %rbx
676 pushq %rbp
677 pushq %r12
678 pushq %r13
679 pushq %r14
680 pushq %r15
682 subq $128+24,%rsp
683 .Lmul_body:
684 .byte 102,72,15,110,199
685 .byte 102,72,15,110,201
686 movq %r8,128(%rsp)
687 movl $0x80100,%r11d
688 andl OPENSSL_ia32cap_P+8(%rip),%r11d
689 cmpl $0x80100,%r11d
690 je .Lmulx
691 movq (%rdx),%rbx
692 movq %rdx,%rbp
693 call __rsaz_512_mul
695 .byte 102,72,15,126,199
696 .byte 102,72,15,126,205
698 movq (%rsp),%r8
699 movq 8(%rsp),%r9
700 movq 16(%rsp),%r10
701 movq 24(%rsp),%r11
702 movq 32(%rsp),%r12
703 movq 40(%rsp),%r13
704 movq 48(%rsp),%r14
705 movq 56(%rsp),%r15
707 call __rsaz_512_reduce
708 jmp .Lmul_tail
710 .align 32
711 .Lmulx:
712 movq %rdx,%rbp
713 movq (%rdx),%rdx
714 call __rsaz_512_mulx
716 .byte 102,72,15,126,199
717 .byte 102,72,15,126,205
719 movq 128(%rsp),%rdx
720 movq (%rsp),%r8
721 movq 8(%rsp),%r9
722 movq 16(%rsp),%r10
723 movq 24(%rsp),%r11
724 movq 32(%rsp),%r12
725 movq 40(%rsp),%r13
726 movq 48(%rsp),%r14
727 movq 56(%rsp),%r15
729 call __rsaz_512_reducex
730 .Lmul_tail:
731 addq 64(%rsp),%r8
732 adcq 72(%rsp),%r9
733 adcq 80(%rsp),%r10
734 adcq 88(%rsp),%r11
735 adcq 96(%rsp),%r12
736 adcq 104(%rsp),%r13
737 adcq 112(%rsp),%r14
738 adcq 120(%rsp),%r15
739 sbbq %rcx,%rcx
741 call __rsaz_512_subtract
743 leaq 128+24+48(%rsp),%rax
744 movq -48(%rax),%r15
745 movq -40(%rax),%r14
746 movq -32(%rax),%r13
747 movq -24(%rax),%r12
748 movq -16(%rax),%rbp
749 movq -8(%rax),%rbx
750 leaq (%rax),%rsp
751 .Lmul_epilogue:
752 .byte 0xf3,0xc3
753 .size rsaz_512_mul,.-rsaz_512_mul
754 .globl rsaz_512_mul_gather4
755 .type rsaz_512_mul_gather4,@function
756 .align 32
757 rsaz_512_mul_gather4:
758 pushq %rbx
759 pushq %rbp
760 pushq %r12
761 pushq %r13
762 pushq %r14
763 pushq %r15
765 subq $152,%rsp
766 .Lmul_gather4_body:
767 movd %r9d,%xmm8
768 movdqa .Linc+16(%rip),%xmm1
769 movdqa .Linc(%rip),%xmm0
771 pshufd $0,%xmm8,%xmm8
772 movdqa %xmm1,%xmm7
773 movdqa %xmm1,%xmm2
774 paddd %xmm0,%xmm1
775 pcmpeqd %xmm8,%xmm0
776 movdqa %xmm7,%xmm3
777 paddd %xmm1,%xmm2
778 pcmpeqd %xmm8,%xmm1
779 movdqa %xmm7,%xmm4
780 paddd %xmm2,%xmm3
781 pcmpeqd %xmm8,%xmm2
782 movdqa %xmm7,%xmm5
783 paddd %xmm3,%xmm4
784 pcmpeqd %xmm8,%xmm3
785 movdqa %xmm7,%xmm6
786 paddd %xmm4,%xmm5
787 pcmpeqd %xmm8,%xmm4
788 paddd %xmm5,%xmm6
789 pcmpeqd %xmm8,%xmm5
790 paddd %xmm6,%xmm7
791 pcmpeqd %xmm8,%xmm6
792 pcmpeqd %xmm8,%xmm7
794 movdqa 0(%rdx),%xmm8
795 movdqa 16(%rdx),%xmm9
796 movdqa 32(%rdx),%xmm10
797 movdqa 48(%rdx),%xmm11
798 pand %xmm0,%xmm8
799 movdqa 64(%rdx),%xmm12
800 pand %xmm1,%xmm9
801 movdqa 80(%rdx),%xmm13
802 pand %xmm2,%xmm10
803 movdqa 96(%rdx),%xmm14
804 pand %xmm3,%xmm11
805 movdqa 112(%rdx),%xmm15
806 leaq 128(%rdx),%rbp
807 pand %xmm4,%xmm12
808 pand %xmm5,%xmm13
809 pand %xmm6,%xmm14
810 pand %xmm7,%xmm15
811 por %xmm10,%xmm8
812 por %xmm11,%xmm9
813 por %xmm12,%xmm8
814 por %xmm13,%xmm9
815 por %xmm14,%xmm8
816 por %xmm15,%xmm9
818 por %xmm9,%xmm8
819 pshufd $0x4e,%xmm8,%xmm9
820 por %xmm9,%xmm8
821 movl $0x80100,%r11d
822 andl OPENSSL_ia32cap_P+8(%rip),%r11d
823 cmpl $0x80100,%r11d
824 je .Lmulx_gather
825 .byte 102,76,15,126,195
827 movq %r8,128(%rsp)
828 movq %rdi,128+8(%rsp)
829 movq %rcx,128+16(%rsp)
831 movq (%rsi),%rax
832 movq 8(%rsi),%rcx
833 mulq %rbx
834 movq %rax,(%rsp)
835 movq %rcx,%rax
836 movq %rdx,%r8
838 mulq %rbx
839 addq %rax,%r8
840 movq 16(%rsi),%rax
841 movq %rdx,%r9
842 adcq $0,%r9
844 mulq %rbx
845 addq %rax,%r9
846 movq 24(%rsi),%rax
847 movq %rdx,%r10
848 adcq $0,%r10
850 mulq %rbx
851 addq %rax,%r10
852 movq 32(%rsi),%rax
853 movq %rdx,%r11
854 adcq $0,%r11
856 mulq %rbx
857 addq %rax,%r11
858 movq 40(%rsi),%rax
859 movq %rdx,%r12
860 adcq $0,%r12
862 mulq %rbx
863 addq %rax,%r12
864 movq 48(%rsi),%rax
865 movq %rdx,%r13
866 adcq $0,%r13
868 mulq %rbx
869 addq %rax,%r13
870 movq 56(%rsi),%rax
871 movq %rdx,%r14
872 adcq $0,%r14
874 mulq %rbx
875 addq %rax,%r14
876 movq (%rsi),%rax
877 movq %rdx,%r15
878 adcq $0,%r15
880 leaq 8(%rsp),%rdi
881 movl $7,%ecx
882 jmp .Loop_mul_gather
884 .align 32
885 .Loop_mul_gather:
886 movdqa 0(%rbp),%xmm8
887 movdqa 16(%rbp),%xmm9
888 movdqa 32(%rbp),%xmm10
889 movdqa 48(%rbp),%xmm11
890 pand %xmm0,%xmm8
891 movdqa 64(%rbp),%xmm12
892 pand %xmm1,%xmm9
893 movdqa 80(%rbp),%xmm13
894 pand %xmm2,%xmm10
895 movdqa 96(%rbp),%xmm14
896 pand %xmm3,%xmm11
897 movdqa 112(%rbp),%xmm15
898 leaq 128(%rbp),%rbp
899 pand %xmm4,%xmm12
900 pand %xmm5,%xmm13
901 pand %xmm6,%xmm14
902 pand %xmm7,%xmm15
903 por %xmm10,%xmm8
904 por %xmm11,%xmm9
905 por %xmm12,%xmm8
906 por %xmm13,%xmm9
907 por %xmm14,%xmm8
908 por %xmm15,%xmm9
910 por %xmm9,%xmm8
911 pshufd $0x4e,%xmm8,%xmm9
912 por %xmm9,%xmm8
913 .byte 102,76,15,126,195
915 mulq %rbx
916 addq %rax,%r8
917 movq 8(%rsi),%rax
918 movq %r8,(%rdi)
919 movq %rdx,%r8
920 adcq $0,%r8
922 mulq %rbx
923 addq %rax,%r9
924 movq 16(%rsi),%rax
925 adcq $0,%rdx
926 addq %r9,%r8
927 movq %rdx,%r9
928 adcq $0,%r9
930 mulq %rbx
931 addq %rax,%r10
932 movq 24(%rsi),%rax
933 adcq $0,%rdx
934 addq %r10,%r9
935 movq %rdx,%r10
936 adcq $0,%r10
938 mulq %rbx
939 addq %rax,%r11
940 movq 32(%rsi),%rax
941 adcq $0,%rdx
942 addq %r11,%r10
943 movq %rdx,%r11
944 adcq $0,%r11
946 mulq %rbx
947 addq %rax,%r12
948 movq 40(%rsi),%rax
949 adcq $0,%rdx
950 addq %r12,%r11
951 movq %rdx,%r12
952 adcq $0,%r12
954 mulq %rbx
955 addq %rax,%r13
956 movq 48(%rsi),%rax
957 adcq $0,%rdx
958 addq %r13,%r12
959 movq %rdx,%r13
960 adcq $0,%r13
962 mulq %rbx
963 addq %rax,%r14
964 movq 56(%rsi),%rax
965 adcq $0,%rdx
966 addq %r14,%r13
967 movq %rdx,%r14
968 adcq $0,%r14
970 mulq %rbx
971 addq %rax,%r15
972 movq (%rsi),%rax
973 adcq $0,%rdx
974 addq %r15,%r14
975 movq %rdx,%r15
976 adcq $0,%r15
978 leaq 8(%rdi),%rdi
980 decl %ecx
981 jnz .Loop_mul_gather
983 movq %r8,(%rdi)
984 movq %r9,8(%rdi)
985 movq %r10,16(%rdi)
986 movq %r11,24(%rdi)
987 movq %r12,32(%rdi)
988 movq %r13,40(%rdi)
989 movq %r14,48(%rdi)
990 movq %r15,56(%rdi)
992 movq 128+8(%rsp),%rdi
993 movq 128+16(%rsp),%rbp
995 movq (%rsp),%r8
996 movq 8(%rsp),%r9
997 movq 16(%rsp),%r10
998 movq 24(%rsp),%r11
999 movq 32(%rsp),%r12
1000 movq 40(%rsp),%r13
1001 movq 48(%rsp),%r14
1002 movq 56(%rsp),%r15
1004 call __rsaz_512_reduce
1005 jmp .Lmul_gather_tail
1007 .align 32
1008 .Lmulx_gather:
1009 .byte 102,76,15,126,194
1011 movq %r8,128(%rsp)
1012 movq %rdi,128+8(%rsp)
1013 movq %rcx,128+16(%rsp)
1015 mulxq (%rsi),%rbx,%r8
1016 movq %rbx,(%rsp)
1017 xorl %edi,%edi
1019 mulxq 8(%rsi),%rax,%r9
1021 mulxq 16(%rsi),%rbx,%r10
1022 adcxq %rax,%r8
1024 mulxq 24(%rsi),%rax,%r11
1025 adcxq %rbx,%r9
1027 mulxq 32(%rsi),%rbx,%r12
1028 adcxq %rax,%r10
1030 mulxq 40(%rsi),%rax,%r13
1031 adcxq %rbx,%r11
1033 mulxq 48(%rsi),%rbx,%r14
1034 adcxq %rax,%r12
1036 mulxq 56(%rsi),%rax,%r15
1037 adcxq %rbx,%r13
1038 adcxq %rax,%r14
1039 .byte 0x67
1040 movq %r8,%rbx
1041 adcxq %rdi,%r15
1043 movq $-7,%rcx
1044 jmp .Loop_mulx_gather
1046 .align 32
1047 .Loop_mulx_gather:
1048 movdqa 0(%rbp),%xmm8
1049 movdqa 16(%rbp),%xmm9
1050 movdqa 32(%rbp),%xmm10
1051 movdqa 48(%rbp),%xmm11
1052 pand %xmm0,%xmm8
1053 movdqa 64(%rbp),%xmm12
1054 pand %xmm1,%xmm9
1055 movdqa 80(%rbp),%xmm13
1056 pand %xmm2,%xmm10
1057 movdqa 96(%rbp),%xmm14
1058 pand %xmm3,%xmm11
1059 movdqa 112(%rbp),%xmm15
1060 leaq 128(%rbp),%rbp
1061 pand %xmm4,%xmm12
1062 pand %xmm5,%xmm13
1063 pand %xmm6,%xmm14
1064 pand %xmm7,%xmm15
1065 por %xmm10,%xmm8
1066 por %xmm11,%xmm9
1067 por %xmm12,%xmm8
1068 por %xmm13,%xmm9
1069 por %xmm14,%xmm8
1070 por %xmm15,%xmm9
1072 por %xmm9,%xmm8
1073 pshufd $0x4e,%xmm8,%xmm9
1074 por %xmm9,%xmm8
1075 .byte 102,76,15,126,194
1077 .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1078 adcxq %rax,%rbx
1079 adoxq %r9,%r8
1081 mulxq 8(%rsi),%rax,%r9
1082 adcxq %rax,%r8
1083 adoxq %r10,%r9
1085 mulxq 16(%rsi),%rax,%r10
1086 adcxq %rax,%r9
1087 adoxq %r11,%r10
1089 .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1090 adcxq %rax,%r10
1091 adoxq %r12,%r11
1093 mulxq 32(%rsi),%rax,%r12
1094 adcxq %rax,%r11
1095 adoxq %r13,%r12
1097 mulxq 40(%rsi),%rax,%r13
1098 adcxq %rax,%r12
1099 adoxq %r14,%r13
1101 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1102 adcxq %rax,%r13
1103 .byte 0x67
1104 adoxq %r15,%r14
1106 mulxq 56(%rsi),%rax,%r15
1107 movq %rbx,64(%rsp,%rcx,8)
1108 adcxq %rax,%r14
1109 adoxq %rdi,%r15
1110 movq %r8,%rbx
1111 adcxq %rdi,%r15
1113 incq %rcx
1114 jnz .Loop_mulx_gather
1116 movq %r8,64(%rsp)
1117 movq %r9,64+8(%rsp)
1118 movq %r10,64+16(%rsp)
1119 movq %r11,64+24(%rsp)
1120 movq %r12,64+32(%rsp)
1121 movq %r13,64+40(%rsp)
1122 movq %r14,64+48(%rsp)
1123 movq %r15,64+56(%rsp)
1125 movq 128(%rsp),%rdx
1126 movq 128+8(%rsp),%rdi
1127 movq 128+16(%rsp),%rbp
1129 movq (%rsp),%r8
1130 movq 8(%rsp),%r9
1131 movq 16(%rsp),%r10
1132 movq 24(%rsp),%r11
1133 movq 32(%rsp),%r12
1134 movq 40(%rsp),%r13
1135 movq 48(%rsp),%r14
1136 movq 56(%rsp),%r15
1138 call __rsaz_512_reducex
1140 .Lmul_gather_tail:
1141 addq 64(%rsp),%r8
1142 adcq 72(%rsp),%r9
1143 adcq 80(%rsp),%r10
1144 adcq 88(%rsp),%r11
1145 adcq 96(%rsp),%r12
1146 adcq 104(%rsp),%r13
1147 adcq 112(%rsp),%r14
1148 adcq 120(%rsp),%r15
1149 sbbq %rcx,%rcx
1151 call __rsaz_512_subtract
1153 leaq 128+24+48(%rsp),%rax
1154 movq -48(%rax),%r15
1155 movq -40(%rax),%r14
1156 movq -32(%rax),%r13
1157 movq -24(%rax),%r12
1158 movq -16(%rax),%rbp
1159 movq -8(%rax),%rbx
1160 leaq (%rax),%rsp
1161 .Lmul_gather4_epilogue:
1162 .byte 0xf3,0xc3
1163 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1164 .globl rsaz_512_mul_scatter4
1165 .type rsaz_512_mul_scatter4,@function
1166 .align 32
1167 rsaz_512_mul_scatter4:
1168 pushq %rbx
1169 pushq %rbp
1170 pushq %r12
1171 pushq %r13
1172 pushq %r14
1173 pushq %r15
1175 movl %r9d,%r9d
1176 subq $128+24,%rsp
1177 .Lmul_scatter4_body:
1178 leaq (%r8,%r9,8),%r8
1179 .byte 102,72,15,110,199
1180 .byte 102,72,15,110,202
1181 .byte 102,73,15,110,208
1182 movq %rcx,128(%rsp)
1184 movq %rdi,%rbp
1185 movl $0x80100,%r11d
1186 andl OPENSSL_ia32cap_P+8(%rip),%r11d
1187 cmpl $0x80100,%r11d
1188 je .Lmulx_scatter
1189 movq (%rdi),%rbx
1190 call __rsaz_512_mul
1192 .byte 102,72,15,126,199
1193 .byte 102,72,15,126,205
1195 movq (%rsp),%r8
1196 movq 8(%rsp),%r9
1197 movq 16(%rsp),%r10
1198 movq 24(%rsp),%r11
1199 movq 32(%rsp),%r12
1200 movq 40(%rsp),%r13
1201 movq 48(%rsp),%r14
1202 movq 56(%rsp),%r15
1204 call __rsaz_512_reduce
1205 jmp .Lmul_scatter_tail
1207 .align 32
1208 .Lmulx_scatter:
1209 movq (%rdi),%rdx
1210 call __rsaz_512_mulx
1212 .byte 102,72,15,126,199
1213 .byte 102,72,15,126,205
1215 movq 128(%rsp),%rdx
1216 movq (%rsp),%r8
1217 movq 8(%rsp),%r9
1218 movq 16(%rsp),%r10
1219 movq 24(%rsp),%r11
1220 movq 32(%rsp),%r12
1221 movq 40(%rsp),%r13
1222 movq 48(%rsp),%r14
1223 movq 56(%rsp),%r15
1225 call __rsaz_512_reducex
1227 .Lmul_scatter_tail:
1228 addq 64(%rsp),%r8
1229 adcq 72(%rsp),%r9
1230 adcq 80(%rsp),%r10
1231 adcq 88(%rsp),%r11
1232 adcq 96(%rsp),%r12
1233 adcq 104(%rsp),%r13
1234 adcq 112(%rsp),%r14
1235 adcq 120(%rsp),%r15
1236 .byte 102,72,15,126,214
1237 sbbq %rcx,%rcx
1239 call __rsaz_512_subtract
1241 movq %r8,0(%rsi)
1242 movq %r9,128(%rsi)
1243 movq %r10,256(%rsi)
1244 movq %r11,384(%rsi)
1245 movq %r12,512(%rsi)
1246 movq %r13,640(%rsi)
1247 movq %r14,768(%rsi)
1248 movq %r15,896(%rsi)
1250 leaq 128+24+48(%rsp),%rax
1251 movq -48(%rax),%r15
1252 movq -40(%rax),%r14
1253 movq -32(%rax),%r13
1254 movq -24(%rax),%r12
1255 movq -16(%rax),%rbp
1256 movq -8(%rax),%rbx
1257 leaq (%rax),%rsp
1258 .Lmul_scatter4_epilogue:
1259 .byte 0xf3,0xc3
1260 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1261 .globl rsaz_512_mul_by_one
1262 .type rsaz_512_mul_by_one,@function
1263 .align 32
1264 rsaz_512_mul_by_one:
1265 pushq %rbx
1266 pushq %rbp
1267 pushq %r12
1268 pushq %r13
1269 pushq %r14
1270 pushq %r15
1272 subq $128+24,%rsp
1273 .Lmul_by_one_body:
1274 movl OPENSSL_ia32cap_P+8(%rip),%eax
1275 movq %rdx,%rbp
1276 movq %rcx,128(%rsp)
1278 movq (%rsi),%r8
1279 pxor %xmm0,%xmm0
1280 movq 8(%rsi),%r9
1281 movq 16(%rsi),%r10
1282 movq 24(%rsi),%r11
1283 movq 32(%rsi),%r12
1284 movq 40(%rsi),%r13
1285 movq 48(%rsi),%r14
1286 movq 56(%rsi),%r15
1288 movdqa %xmm0,(%rsp)
1289 movdqa %xmm0,16(%rsp)
1290 movdqa %xmm0,32(%rsp)
1291 movdqa %xmm0,48(%rsp)
1292 movdqa %xmm0,64(%rsp)
1293 movdqa %xmm0,80(%rsp)
1294 movdqa %xmm0,96(%rsp)
1295 andl $0x80100,%eax
1296 cmpl $0x80100,%eax
1297 je .Lby_one_callx
1298 call __rsaz_512_reduce
1299 jmp .Lby_one_tail
1300 .align 32
1301 .Lby_one_callx:
1302 movq 128(%rsp),%rdx
1303 call __rsaz_512_reducex
1304 .Lby_one_tail:
1305 movq %r8,(%rdi)
1306 movq %r9,8(%rdi)
1307 movq %r10,16(%rdi)
1308 movq %r11,24(%rdi)
1309 movq %r12,32(%rdi)
1310 movq %r13,40(%rdi)
1311 movq %r14,48(%rdi)
1312 movq %r15,56(%rdi)
1314 leaq 128+24+48(%rsp),%rax
1315 movq -48(%rax),%r15
1316 movq -40(%rax),%r14
1317 movq -32(%rax),%r13
1318 movq -24(%rax),%r12
1319 movq -16(%rax),%rbp
1320 movq -8(%rax),%rbx
1321 leaq (%rax),%rsp
1322 .Lmul_by_one_epilogue:
1323 .byte 0xf3,0xc3
1324 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1325 .type __rsaz_512_reduce,@function
1326 .align 32
1327 __rsaz_512_reduce:
1328 movq %r8,%rbx
1329 imulq 128+8(%rsp),%rbx
1330 movq 0(%rbp),%rax
1331 movl $8,%ecx
1332 jmp .Lreduction_loop
1334 .align 32
1335 .Lreduction_loop:
1336 mulq %rbx
1337 movq 8(%rbp),%rax
1338 negq %r8
1339 movq %rdx,%r8
1340 adcq $0,%r8
1342 mulq %rbx
1343 addq %rax,%r9
1344 movq 16(%rbp),%rax
1345 adcq $0,%rdx
1346 addq %r9,%r8
1347 movq %rdx,%r9
1348 adcq $0,%r9
1350 mulq %rbx
1351 addq %rax,%r10
1352 movq 24(%rbp),%rax
1353 adcq $0,%rdx
1354 addq %r10,%r9
1355 movq %rdx,%r10
1356 adcq $0,%r10
1358 mulq %rbx
1359 addq %rax,%r11
1360 movq 32(%rbp),%rax
1361 adcq $0,%rdx
1362 addq %r11,%r10
1363 movq 128+8(%rsp),%rsi
1366 adcq $0,%rdx
1367 movq %rdx,%r11
1369 mulq %rbx
1370 addq %rax,%r12
1371 movq 40(%rbp),%rax
1372 adcq $0,%rdx
1373 imulq %r8,%rsi
1374 addq %r12,%r11
1375 movq %rdx,%r12
1376 adcq $0,%r12
1378 mulq %rbx
1379 addq %rax,%r13
1380 movq 48(%rbp),%rax
1381 adcq $0,%rdx
1382 addq %r13,%r12
1383 movq %rdx,%r13
1384 adcq $0,%r13
1386 mulq %rbx
1387 addq %rax,%r14
1388 movq 56(%rbp),%rax
1389 adcq $0,%rdx
1390 addq %r14,%r13
1391 movq %rdx,%r14
1392 adcq $0,%r14
1394 mulq %rbx
1395 movq %rsi,%rbx
1396 addq %rax,%r15
1397 movq 0(%rbp),%rax
1398 adcq $0,%rdx
1399 addq %r15,%r14
1400 movq %rdx,%r15
1401 adcq $0,%r15
1403 decl %ecx
1404 jne .Lreduction_loop
1406 .byte 0xf3,0xc3
1407 .size __rsaz_512_reduce,.-__rsaz_512_reduce
1408 .type __rsaz_512_reducex,@function
1409 .align 32
1410 __rsaz_512_reducex:
1412 imulq %r8,%rdx
1413 xorq %rsi,%rsi
1414 movl $8,%ecx
1415 jmp .Lreduction_loopx
1417 .align 32
1418 .Lreduction_loopx:
1419 movq %r8,%rbx
1420 mulxq 0(%rbp),%rax,%r8
1421 adcxq %rbx,%rax
1422 adoxq %r9,%r8
1424 mulxq 8(%rbp),%rax,%r9
1425 adcxq %rax,%r8
1426 adoxq %r10,%r9
1428 mulxq 16(%rbp),%rbx,%r10
1429 adcxq %rbx,%r9
1430 adoxq %r11,%r10
1432 mulxq 24(%rbp),%rbx,%r11
1433 adcxq %rbx,%r10
1434 adoxq %r12,%r11
1436 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1437 movq %rdx,%rax
1438 movq %r8,%rdx
1439 adcxq %rbx,%r11
1440 adoxq %r13,%r12
1442 mulxq 128+8(%rsp),%rbx,%rdx
1443 movq %rax,%rdx
1445 mulxq 40(%rbp),%rax,%r13
1446 adcxq %rax,%r12
1447 adoxq %r14,%r13
1449 .byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1450 adcxq %rax,%r13
1451 adoxq %r15,%r14
1453 mulxq 56(%rbp),%rax,%r15
1454 movq %rbx,%rdx
1455 adcxq %rax,%r14
1456 adoxq %rsi,%r15
1457 adcxq %rsi,%r15
1459 decl %ecx
1460 jne .Lreduction_loopx
1462 .byte 0xf3,0xc3
1463 .size __rsaz_512_reducex,.-__rsaz_512_reducex
1464 .type __rsaz_512_subtract,@function
1465 .align 32
1466 __rsaz_512_subtract:
1467 movq %r8,(%rdi)
1468 movq %r9,8(%rdi)
1469 movq %r10,16(%rdi)
1470 movq %r11,24(%rdi)
1471 movq %r12,32(%rdi)
1472 movq %r13,40(%rdi)
1473 movq %r14,48(%rdi)
1474 movq %r15,56(%rdi)
1476 movq 0(%rbp),%r8
1477 movq 8(%rbp),%r9
1478 negq %r8
1479 notq %r9
1480 andq %rcx,%r8
1481 movq 16(%rbp),%r10
1482 andq %rcx,%r9
1483 notq %r10
1484 movq 24(%rbp),%r11
1485 andq %rcx,%r10
1486 notq %r11
1487 movq 32(%rbp),%r12
1488 andq %rcx,%r11
1489 notq %r12
1490 movq 40(%rbp),%r13
1491 andq %rcx,%r12
1492 notq %r13
1493 movq 48(%rbp),%r14
1494 andq %rcx,%r13
1495 notq %r14
1496 movq 56(%rbp),%r15
1497 andq %rcx,%r14
1498 notq %r15
1499 andq %rcx,%r15
1501 addq (%rdi),%r8
1502 adcq 8(%rdi),%r9
1503 adcq 16(%rdi),%r10
1504 adcq 24(%rdi),%r11
1505 adcq 32(%rdi),%r12
1506 adcq 40(%rdi),%r13
1507 adcq 48(%rdi),%r14
1508 adcq 56(%rdi),%r15
1510 movq %r8,(%rdi)
1511 movq %r9,8(%rdi)
1512 movq %r10,16(%rdi)
1513 movq %r11,24(%rdi)
1514 movq %r12,32(%rdi)
1515 movq %r13,40(%rdi)
1516 movq %r14,48(%rdi)
1517 movq %r15,56(%rdi)
1519 .byte 0xf3,0xc3
1520 .size __rsaz_512_subtract,.-__rsaz_512_subtract
1521 .type __rsaz_512_mul,@function
1522 .align 32
1523 __rsaz_512_mul:
1524 leaq 8(%rsp),%rdi
1526 movq (%rsi),%rax
1527 mulq %rbx
1528 movq %rax,(%rdi)
1529 movq 8(%rsi),%rax
1530 movq %rdx,%r8
1532 mulq %rbx
1533 addq %rax,%r8
1534 movq 16(%rsi),%rax
1535 movq %rdx,%r9
1536 adcq $0,%r9
1538 mulq %rbx
1539 addq %rax,%r9
1540 movq 24(%rsi),%rax
1541 movq %rdx,%r10
1542 adcq $0,%r10
1544 mulq %rbx
1545 addq %rax,%r10
1546 movq 32(%rsi),%rax
1547 movq %rdx,%r11
1548 adcq $0,%r11
1550 mulq %rbx
1551 addq %rax,%r11
1552 movq 40(%rsi),%rax
1553 movq %rdx,%r12
1554 adcq $0,%r12
1556 mulq %rbx
1557 addq %rax,%r12
1558 movq 48(%rsi),%rax
1559 movq %rdx,%r13
1560 adcq $0,%r13
1562 mulq %rbx
1563 addq %rax,%r13
1564 movq 56(%rsi),%rax
1565 movq %rdx,%r14
1566 adcq $0,%r14
1568 mulq %rbx
1569 addq %rax,%r14
1570 movq (%rsi),%rax
1571 movq %rdx,%r15
1572 adcq $0,%r15
1574 leaq 8(%rbp),%rbp
1575 leaq 8(%rdi),%rdi
1577 movl $7,%ecx
1578 jmp .Loop_mul
1580 .align 32
1581 .Loop_mul:
1582 movq (%rbp),%rbx
1583 mulq %rbx
1584 addq %rax,%r8
1585 movq 8(%rsi),%rax
1586 movq %r8,(%rdi)
1587 movq %rdx,%r8
1588 adcq $0,%r8
1590 mulq %rbx
1591 addq %rax,%r9
1592 movq 16(%rsi),%rax
1593 adcq $0,%rdx
1594 addq %r9,%r8
1595 movq %rdx,%r9
1596 adcq $0,%r9
1598 mulq %rbx
1599 addq %rax,%r10
1600 movq 24(%rsi),%rax
1601 adcq $0,%rdx
1602 addq %r10,%r9
1603 movq %rdx,%r10
1604 adcq $0,%r10
1606 mulq %rbx
1607 addq %rax,%r11
1608 movq 32(%rsi),%rax
1609 adcq $0,%rdx
1610 addq %r11,%r10
1611 movq %rdx,%r11
1612 adcq $0,%r11
1614 mulq %rbx
1615 addq %rax,%r12
1616 movq 40(%rsi),%rax
1617 adcq $0,%rdx
1618 addq %r12,%r11
1619 movq %rdx,%r12
1620 adcq $0,%r12
1622 mulq %rbx
1623 addq %rax,%r13
1624 movq 48(%rsi),%rax
1625 adcq $0,%rdx
1626 addq %r13,%r12
1627 movq %rdx,%r13
1628 adcq $0,%r13
1630 mulq %rbx
1631 addq %rax,%r14
1632 movq 56(%rsi),%rax
1633 adcq $0,%rdx
1634 addq %r14,%r13
1635 movq %rdx,%r14
1636 leaq 8(%rbp),%rbp
1637 adcq $0,%r14
1639 mulq %rbx
1640 addq %rax,%r15
1641 movq (%rsi),%rax
1642 adcq $0,%rdx
1643 addq %r15,%r14
1644 movq %rdx,%r15
1645 adcq $0,%r15
1647 leaq 8(%rdi),%rdi
1649 decl %ecx
1650 jnz .Loop_mul
1652 movq %r8,(%rdi)
1653 movq %r9,8(%rdi)
1654 movq %r10,16(%rdi)
1655 movq %r11,24(%rdi)
1656 movq %r12,32(%rdi)
1657 movq %r13,40(%rdi)
1658 movq %r14,48(%rdi)
1659 movq %r15,56(%rdi)
1661 .byte 0xf3,0xc3
1662 .size __rsaz_512_mul,.-__rsaz_512_mul
1663 .type __rsaz_512_mulx,@function
1664 .align 32
1665 __rsaz_512_mulx:
1666 mulxq (%rsi),%rbx,%r8
1667 movq $-6,%rcx
1669 mulxq 8(%rsi),%rax,%r9
1670 movq %rbx,8(%rsp)
1672 mulxq 16(%rsi),%rbx,%r10
1673 adcq %rax,%r8
1675 mulxq 24(%rsi),%rax,%r11
1676 adcq %rbx,%r9
1678 mulxq 32(%rsi),%rbx,%r12
1679 adcq %rax,%r10
1681 mulxq 40(%rsi),%rax,%r13
1682 adcq %rbx,%r11
1684 mulxq 48(%rsi),%rbx,%r14
1685 adcq %rax,%r12
1687 mulxq 56(%rsi),%rax,%r15
1688 movq 8(%rbp),%rdx
1689 adcq %rbx,%r13
1690 adcq %rax,%r14
1691 adcq $0,%r15
1693 xorq %rdi,%rdi
1694 jmp .Loop_mulx
1696 .align 32
1697 .Loop_mulx:
1698 movq %r8,%rbx
1699 mulxq (%rsi),%rax,%r8
1700 adcxq %rax,%rbx
1701 adoxq %r9,%r8
1703 mulxq 8(%rsi),%rax,%r9
1704 adcxq %rax,%r8
1705 adoxq %r10,%r9
1707 mulxq 16(%rsi),%rax,%r10
1708 adcxq %rax,%r9
1709 adoxq %r11,%r10
1711 mulxq 24(%rsi),%rax,%r11
1712 adcxq %rax,%r10
1713 adoxq %r12,%r11
1715 .byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1716 adcxq %rax,%r11
1717 adoxq %r13,%r12
1719 mulxq 40(%rsi),%rax,%r13
1720 adcxq %rax,%r12
1721 adoxq %r14,%r13
1723 mulxq 48(%rsi),%rax,%r14
1724 adcxq %rax,%r13
1725 adoxq %r15,%r14
1727 mulxq 56(%rsi),%rax,%r15
1728 movq 64(%rbp,%rcx,8),%rdx
1729 movq %rbx,8+64-8(%rsp,%rcx,8)
1730 adcxq %rax,%r14
1731 adoxq %rdi,%r15
1732 adcxq %rdi,%r15
1734 incq %rcx
1735 jnz .Loop_mulx
1737 movq %r8,%rbx
1738 mulxq (%rsi),%rax,%r8
1739 adcxq %rax,%rbx
1740 adoxq %r9,%r8
1742 .byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1743 adcxq %rax,%r8
1744 adoxq %r10,%r9
1746 .byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1747 adcxq %rax,%r9
1748 adoxq %r11,%r10
1750 mulxq 24(%rsi),%rax,%r11
1751 adcxq %rax,%r10
1752 adoxq %r12,%r11
1754 mulxq 32(%rsi),%rax,%r12
1755 adcxq %rax,%r11
1756 adoxq %r13,%r12
1758 mulxq 40(%rsi),%rax,%r13
1759 adcxq %rax,%r12
1760 adoxq %r14,%r13
1762 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1763 adcxq %rax,%r13
1764 adoxq %r15,%r14
1766 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1767 adcxq %rax,%r14
1768 adoxq %rdi,%r15
1769 adcxq %rdi,%r15
1771 movq %rbx,8+64-8(%rsp)
1772 movq %r8,8+64(%rsp)
1773 movq %r9,8+64+8(%rsp)
1774 movq %r10,8+64+16(%rsp)
1775 movq %r11,8+64+24(%rsp)
1776 movq %r12,8+64+32(%rsp)
1777 movq %r13,8+64+40(%rsp)
1778 movq %r14,8+64+48(%rsp)
1779 movq %r15,8+64+56(%rsp)
1781 .byte 0xf3,0xc3
1782 .size __rsaz_512_mulx,.-__rsaz_512_mulx
1783 .globl rsaz_512_scatter4
1784 .type rsaz_512_scatter4,@function
1785 .align 16
1786 rsaz_512_scatter4:
1787 leaq (%rdi,%rdx,8),%rdi
1788 movl $8,%r9d
1789 jmp .Loop_scatter
1790 .align 16
1791 .Loop_scatter:
1792 movq (%rsi),%rax
1793 leaq 8(%rsi),%rsi
1794 movq %rax,(%rdi)
1795 leaq 128(%rdi),%rdi
1796 decl %r9d
1797 jnz .Loop_scatter
1798 .byte 0xf3,0xc3
1799 .size rsaz_512_scatter4,.-rsaz_512_scatter4
1801 .globl rsaz_512_gather4
1802 .type rsaz_512_gather4,@function
1803 .align 16
1804 rsaz_512_gather4:
1805 movd %edx,%xmm8
1806 movdqa .Linc+16(%rip),%xmm1
1807 movdqa .Linc(%rip),%xmm0
1809 pshufd $0,%xmm8,%xmm8
1810 movdqa %xmm1,%xmm7
1811 movdqa %xmm1,%xmm2
1812 paddd %xmm0,%xmm1
1813 pcmpeqd %xmm8,%xmm0
1814 movdqa %xmm7,%xmm3
1815 paddd %xmm1,%xmm2
1816 pcmpeqd %xmm8,%xmm1
1817 movdqa %xmm7,%xmm4
1818 paddd %xmm2,%xmm3
1819 pcmpeqd %xmm8,%xmm2
1820 movdqa %xmm7,%xmm5
1821 paddd %xmm3,%xmm4
1822 pcmpeqd %xmm8,%xmm3
1823 movdqa %xmm7,%xmm6
1824 paddd %xmm4,%xmm5
1825 pcmpeqd %xmm8,%xmm4
1826 paddd %xmm5,%xmm6
1827 pcmpeqd %xmm8,%xmm5
1828 paddd %xmm6,%xmm7
1829 pcmpeqd %xmm8,%xmm6
1830 pcmpeqd %xmm8,%xmm7
1831 movl $8,%r9d
1832 jmp .Loop_gather
1833 .align 16
1834 .Loop_gather:
1835 movdqa 0(%rsi),%xmm8
1836 movdqa 16(%rsi),%xmm9
1837 movdqa 32(%rsi),%xmm10
1838 movdqa 48(%rsi),%xmm11
1839 pand %xmm0,%xmm8
1840 movdqa 64(%rsi),%xmm12
1841 pand %xmm1,%xmm9
1842 movdqa 80(%rsi),%xmm13
1843 pand %xmm2,%xmm10
1844 movdqa 96(%rsi),%xmm14
1845 pand %xmm3,%xmm11
1846 movdqa 112(%rsi),%xmm15
1847 leaq 128(%rsi),%rsi
1848 pand %xmm4,%xmm12
1849 pand %xmm5,%xmm13
1850 pand %xmm6,%xmm14
1851 pand %xmm7,%xmm15
1852 por %xmm10,%xmm8
1853 por %xmm11,%xmm9
1854 por %xmm12,%xmm8
1855 por %xmm13,%xmm9
1856 por %xmm14,%xmm8
1857 por %xmm15,%xmm9
1859 por %xmm9,%xmm8
1860 pshufd $0x4e,%xmm8,%xmm9
1861 por %xmm9,%xmm8
1862 movq %xmm8,(%rdi)
1863 leaq 8(%rdi),%rdi
1864 decl %r9d
1865 jnz .Loop_gather
1866 .byte 0xf3,0xc3
1867 .LSEH_end_rsaz_512_gather4:
1868 .size rsaz_512_gather4,.-rsaz_512_gather4
1870 .align 64
1871 .Linc:
1872 .long 0,0, 1,1
1873 .long 2,2, 2,2
1874 .section .note.GNU-stack,"",%progbits