Import LibreSSL v2.4.2 to vendor branch
[dragonfly.git] / crypto / libressl / crypto / aes / vpaes-macosx-x86_64.s
blob36b00dddccd3c4cd24c3ecec2002d6581c289f24
1 .text
19 .p2align 4
20 _vpaes_encrypt_core:
21 movq %rdx,%r9
22 movq $16,%r11
23 movl 240(%rdx),%eax
24 movdqa %xmm9,%xmm1
25 movdqa L$k_ipt(%rip),%xmm2
26 pandn %xmm0,%xmm1
27 movdqu (%r9),%xmm5
28 psrld $4,%xmm1
29 pand %xmm9,%xmm0
30 .byte 102,15,56,0,208
31 movdqa L$k_ipt+16(%rip),%xmm0
32 .byte 102,15,56,0,193
33 pxor %xmm5,%xmm2
34 pxor %xmm2,%xmm0
35 addq $16,%r9
36 leaq L$k_mc_backward(%rip),%r10
37 jmp L$enc_entry
39 .p2align 4
40 L$enc_loop:
42 movdqa %xmm13,%xmm4
43 .byte 102,15,56,0,226
44 pxor %xmm5,%xmm4
45 movdqa %xmm12,%xmm0
46 .byte 102,15,56,0,195
47 pxor %xmm4,%xmm0
48 movdqa %xmm15,%xmm5
49 .byte 102,15,56,0,234
50 movdqa -64(%r11,%r10,1),%xmm1
51 movdqa %xmm14,%xmm2
52 .byte 102,15,56,0,211
53 pxor %xmm5,%xmm2
54 movdqa (%r11,%r10,1),%xmm4
55 movdqa %xmm0,%xmm3
56 .byte 102,15,56,0,193
57 addq $16,%r9
58 pxor %xmm2,%xmm0
59 .byte 102,15,56,0,220
60 addq $16,%r11
61 pxor %xmm0,%xmm3
62 .byte 102,15,56,0,193
63 andq $48,%r11
64 pxor %xmm3,%xmm0
65 subq $1,%rax
67 L$enc_entry:
69 movdqa %xmm9,%xmm1
70 pandn %xmm0,%xmm1
71 psrld $4,%xmm1
72 pand %xmm9,%xmm0
73 movdqa %xmm11,%xmm5
74 .byte 102,15,56,0,232
75 pxor %xmm1,%xmm0
76 movdqa %xmm10,%xmm3
77 .byte 102,15,56,0,217
78 pxor %xmm5,%xmm3
79 movdqa %xmm10,%xmm4
80 .byte 102,15,56,0,224
81 pxor %xmm5,%xmm4
82 movdqa %xmm10,%xmm2
83 .byte 102,15,56,0,211
84 pxor %xmm0,%xmm2
85 movdqa %xmm10,%xmm3
86 movdqu (%r9),%xmm5
87 .byte 102,15,56,0,220
88 pxor %xmm1,%xmm3
89 jnz L$enc_loop
92 movdqa -96(%r10),%xmm4
93 movdqa -80(%r10),%xmm0
94 .byte 102,15,56,0,226
95 pxor %xmm5,%xmm4
96 .byte 102,15,56,0,195
97 movdqa 64(%r11,%r10,1),%xmm1
98 pxor %xmm4,%xmm0
99 .byte 102,15,56,0,193
100 .byte 0xf3,0xc3
109 .p2align 4
110 _vpaes_decrypt_core:
111 movq %rdx,%r9
112 movl 240(%rdx),%eax
113 movdqa %xmm9,%xmm1
114 movdqa L$k_dipt(%rip),%xmm2
115 pandn %xmm0,%xmm1
116 movq %rax,%r11
117 psrld $4,%xmm1
118 movdqu (%r9),%xmm5
119 shlq $4,%r11
120 pand %xmm9,%xmm0
121 .byte 102,15,56,0,208
122 movdqa L$k_dipt+16(%rip),%xmm0
123 xorq $48,%r11
124 leaq L$k_dsbd(%rip),%r10
125 .byte 102,15,56,0,193
126 andq $48,%r11
127 pxor %xmm5,%xmm2
128 movdqa L$k_mc_forward+48(%rip),%xmm5
129 pxor %xmm2,%xmm0
130 addq $16,%r9
131 addq %r10,%r11
132 jmp L$dec_entry
134 .p2align 4
135 L$dec_loop:
139 movdqa -32(%r10),%xmm4
140 .byte 102,15,56,0,226
141 pxor %xmm0,%xmm4
142 movdqa -16(%r10),%xmm0
143 .byte 102,15,56,0,195
144 pxor %xmm4,%xmm0
145 addq $16,%r9
147 .byte 102,15,56,0,197
148 movdqa 0(%r10),%xmm4
149 .byte 102,15,56,0,226
150 pxor %xmm0,%xmm4
151 movdqa 16(%r10),%xmm0
152 .byte 102,15,56,0,195
153 pxor %xmm4,%xmm0
154 subq $1,%rax
156 .byte 102,15,56,0,197
157 movdqa 32(%r10),%xmm4
158 .byte 102,15,56,0,226
159 pxor %xmm0,%xmm4
160 movdqa 48(%r10),%xmm0
161 .byte 102,15,56,0,195
162 pxor %xmm4,%xmm0
164 .byte 102,15,56,0,197
165 movdqa 64(%r10),%xmm4
166 .byte 102,15,56,0,226
167 pxor %xmm0,%xmm4
168 movdqa 80(%r10),%xmm0
169 .byte 102,15,56,0,195
170 pxor %xmm4,%xmm0
172 .byte 102,15,58,15,237,12
174 L$dec_entry:
176 movdqa %xmm9,%xmm1
177 pandn %xmm0,%xmm1
178 psrld $4,%xmm1
179 pand %xmm9,%xmm0
180 movdqa %xmm11,%xmm2
181 .byte 102,15,56,0,208
182 pxor %xmm1,%xmm0
183 movdqa %xmm10,%xmm3
184 .byte 102,15,56,0,217
185 pxor %xmm2,%xmm3
186 movdqa %xmm10,%xmm4
187 .byte 102,15,56,0,224
188 pxor %xmm2,%xmm4
189 movdqa %xmm10,%xmm2
190 .byte 102,15,56,0,211
191 pxor %xmm0,%xmm2
192 movdqa %xmm10,%xmm3
193 .byte 102,15,56,0,220
194 pxor %xmm1,%xmm3
195 movdqu (%r9),%xmm0
196 jnz L$dec_loop
199 movdqa 96(%r10),%xmm4
200 .byte 102,15,56,0,226
201 pxor %xmm0,%xmm4
202 movdqa 112(%r10),%xmm0
203 movdqa -352(%r11),%xmm2
204 .byte 102,15,56,0,195
205 pxor %xmm4,%xmm0
206 .byte 102,15,56,0,194
207 .byte 0xf3,0xc3
216 .p2align 4
217 _vpaes_schedule_core:
223 call _vpaes_preheat
224 movdqa L$k_rcon(%rip),%xmm8
225 movdqu (%rdi),%xmm0
228 movdqa %xmm0,%xmm3
229 leaq L$k_ipt(%rip),%r11
230 call _vpaes_schedule_transform
231 movdqa %xmm0,%xmm7
233 leaq L$k_sr(%rip),%r10
234 testq %rcx,%rcx
235 jnz L$schedule_am_decrypting
238 movdqu %xmm0,(%rdx)
239 jmp L$schedule_go
241 L$schedule_am_decrypting:
243 movdqa (%r8,%r10,1),%xmm1
244 .byte 102,15,56,0,217
245 movdqu %xmm3,(%rdx)
246 xorq $48,%r8
248 L$schedule_go:
249 cmpl $192,%esi
250 ja L$schedule_256
251 je L$schedule_192
262 L$schedule_128:
263 movl $10,%esi
265 L$oop_schedule_128:
266 call _vpaes_schedule_round
267 decq %rsi
268 jz L$schedule_mangle_last
269 call _vpaes_schedule_mangle
270 jmp L$oop_schedule_128
287 .p2align 4
288 L$schedule_192:
289 movdqu 8(%rdi),%xmm0
290 call _vpaes_schedule_transform
291 movdqa %xmm0,%xmm6
292 pxor %xmm4,%xmm4
293 movhlps %xmm4,%xmm6
294 movl $4,%esi
296 L$oop_schedule_192:
297 call _vpaes_schedule_round
298 .byte 102,15,58,15,198,8
299 call _vpaes_schedule_mangle
300 call _vpaes_schedule_192_smear
301 call _vpaes_schedule_mangle
302 call _vpaes_schedule_round
303 decq %rsi
304 jz L$schedule_mangle_last
305 call _vpaes_schedule_mangle
306 call _vpaes_schedule_192_smear
307 jmp L$oop_schedule_192
319 .p2align 4
320 L$schedule_256:
321 movdqu 16(%rdi),%xmm0
322 call _vpaes_schedule_transform
323 movl $7,%esi
325 L$oop_schedule_256:
326 call _vpaes_schedule_mangle
327 movdqa %xmm0,%xmm6
330 call _vpaes_schedule_round
331 decq %rsi
332 jz L$schedule_mangle_last
333 call _vpaes_schedule_mangle
336 pshufd $255,%xmm0,%xmm0
337 movdqa %xmm7,%xmm5
338 movdqa %xmm6,%xmm7
339 call _vpaes_schedule_low_round
340 movdqa %xmm5,%xmm7
342 jmp L$oop_schedule_256
355 .p2align 4
356 L$schedule_mangle_last:
358 leaq L$k_deskew(%rip),%r11
359 testq %rcx,%rcx
360 jnz L$schedule_mangle_last_dec
363 movdqa (%r8,%r10,1),%xmm1
364 .byte 102,15,56,0,193
365 leaq L$k_opt(%rip),%r11
366 addq $32,%rdx
368 L$schedule_mangle_last_dec:
369 addq $-16,%rdx
370 pxor L$k_s63(%rip),%xmm0
371 call _vpaes_schedule_transform
372 movdqu %xmm0,(%rdx)
375 pxor %xmm0,%xmm0
376 pxor %xmm1,%xmm1
377 pxor %xmm2,%xmm2
378 pxor %xmm3,%xmm3
379 pxor %xmm4,%xmm4
380 pxor %xmm5,%xmm5
381 pxor %xmm6,%xmm6
382 pxor %xmm7,%xmm7
383 .byte 0xf3,0xc3
401 .p2align 4
402 _vpaes_schedule_192_smear:
403 pshufd $128,%xmm6,%xmm0
404 pxor %xmm0,%xmm6
405 pshufd $254,%xmm7,%xmm0
406 pxor %xmm0,%xmm6
407 movdqa %xmm6,%xmm0
408 pxor %xmm1,%xmm1
409 movhlps %xmm1,%xmm6
410 .byte 0xf3,0xc3
432 .p2align 4
433 _vpaes_schedule_round:
435 pxor %xmm1,%xmm1
436 .byte 102,65,15,58,15,200,15
437 .byte 102,69,15,58,15,192,15
438 pxor %xmm1,%xmm7
441 pshufd $255,%xmm0,%xmm0
442 .byte 102,15,58,15,192,1
447 _vpaes_schedule_low_round:
449 movdqa %xmm7,%xmm1
450 pslldq $4,%xmm7
451 pxor %xmm1,%xmm7
452 movdqa %xmm7,%xmm1
453 pslldq $8,%xmm7
454 pxor %xmm1,%xmm7
455 pxor L$k_s63(%rip),%xmm7
458 movdqa %xmm9,%xmm1
459 pandn %xmm0,%xmm1
460 psrld $4,%xmm1
461 pand %xmm9,%xmm0
462 movdqa %xmm11,%xmm2
463 .byte 102,15,56,0,208
464 pxor %xmm1,%xmm0
465 movdqa %xmm10,%xmm3
466 .byte 102,15,56,0,217
467 pxor %xmm2,%xmm3
468 movdqa %xmm10,%xmm4
469 .byte 102,15,56,0,224
470 pxor %xmm2,%xmm4
471 movdqa %xmm10,%xmm2
472 .byte 102,15,56,0,211
473 pxor %xmm0,%xmm2
474 movdqa %xmm10,%xmm3
475 .byte 102,15,56,0,220
476 pxor %xmm1,%xmm3
477 movdqa %xmm13,%xmm4
478 .byte 102,15,56,0,226
479 movdqa %xmm12,%xmm0
480 .byte 102,15,56,0,195
481 pxor %xmm4,%xmm0
484 pxor %xmm7,%xmm0
485 movdqa %xmm0,%xmm7
486 .byte 0xf3,0xc3
499 .p2align 4
500 _vpaes_schedule_transform:
501 movdqa %xmm9,%xmm1
502 pandn %xmm0,%xmm1
503 psrld $4,%xmm1
504 pand %xmm9,%xmm0
505 movdqa (%r11),%xmm2
506 .byte 102,15,56,0,208
507 movdqa 16(%r11),%xmm0
508 .byte 102,15,56,0,193
509 pxor %xmm2,%xmm0
510 .byte 0xf3,0xc3
537 .p2align 4
538 _vpaes_schedule_mangle:
539 movdqa %xmm0,%xmm4
540 movdqa L$k_mc_forward(%rip),%xmm5
541 testq %rcx,%rcx
542 jnz L$schedule_mangle_dec
545 addq $16,%rdx
546 pxor L$k_s63(%rip),%xmm4
547 .byte 102,15,56,0,229
548 movdqa %xmm4,%xmm3
549 .byte 102,15,56,0,229
550 pxor %xmm4,%xmm3
551 .byte 102,15,56,0,229
552 pxor %xmm4,%xmm3
554 jmp L$schedule_mangle_both
555 .p2align 4
556 L$schedule_mangle_dec:
558 leaq L$k_dksd(%rip),%r11
559 movdqa %xmm9,%xmm1
560 pandn %xmm4,%xmm1
561 psrld $4,%xmm1
562 pand %xmm9,%xmm4
564 movdqa 0(%r11),%xmm2
565 .byte 102,15,56,0,212
566 movdqa 16(%r11),%xmm3
567 .byte 102,15,56,0,217
568 pxor %xmm2,%xmm3
569 .byte 102,15,56,0,221
571 movdqa 32(%r11),%xmm2
572 .byte 102,15,56,0,212
573 pxor %xmm3,%xmm2
574 movdqa 48(%r11),%xmm3
575 .byte 102,15,56,0,217
576 pxor %xmm2,%xmm3
577 .byte 102,15,56,0,221
579 movdqa 64(%r11),%xmm2
580 .byte 102,15,56,0,212
581 pxor %xmm3,%xmm2
582 movdqa 80(%r11),%xmm3
583 .byte 102,15,56,0,217
584 pxor %xmm2,%xmm3
585 .byte 102,15,56,0,221
587 movdqa 96(%r11),%xmm2
588 .byte 102,15,56,0,212
589 pxor %xmm3,%xmm2
590 movdqa 112(%r11),%xmm3
591 .byte 102,15,56,0,217
592 pxor %xmm2,%xmm3
594 addq $-16,%rdx
596 L$schedule_mangle_both:
597 movdqa (%r8,%r10,1),%xmm1
598 .byte 102,15,56,0,217
599 addq $-16,%r8
600 andq $48,%r8
601 movdqu %xmm3,(%rdx)
602 .byte 0xf3,0xc3
608 .globl _vpaes_set_encrypt_key
610 .p2align 4
611 _vpaes_set_encrypt_key:
612 movl %esi,%eax
613 shrl $5,%eax
614 addl $5,%eax
615 movl %eax,240(%rdx)
617 movl $0,%ecx
618 movl $48,%r8d
619 call _vpaes_schedule_core
620 xorl %eax,%eax
621 .byte 0xf3,0xc3
624 .globl _vpaes_set_decrypt_key
626 .p2align 4
627 _vpaes_set_decrypt_key:
628 movl %esi,%eax
629 shrl $5,%eax
630 addl $5,%eax
631 movl %eax,240(%rdx)
632 shll $4,%eax
633 leaq 16(%rdx,%rax,1),%rdx
635 movl $1,%ecx
636 movl %esi,%r8d
637 shrl $1,%r8d
638 andl $32,%r8d
639 xorl $32,%r8d
640 call _vpaes_schedule_core
641 xorl %eax,%eax
642 .byte 0xf3,0xc3
645 .globl _vpaes_encrypt
647 .p2align 4
648 _vpaes_encrypt:
649 movdqu (%rdi),%xmm0
650 call _vpaes_preheat
651 call _vpaes_encrypt_core
652 movdqu %xmm0,(%rsi)
653 .byte 0xf3,0xc3
656 .globl _vpaes_decrypt
658 .p2align 4
659 _vpaes_decrypt:
660 movdqu (%rdi),%xmm0
661 call _vpaes_preheat
662 call _vpaes_decrypt_core
663 movdqu %xmm0,(%rsi)
664 .byte 0xf3,0xc3
666 .globl _vpaes_cbc_encrypt
668 .p2align 4
669 _vpaes_cbc_encrypt:
670 xchgq %rcx,%rdx
671 subq $16,%rcx
672 jc L$cbc_abort
673 movdqu (%r8),%xmm6
674 subq %rdi,%rsi
675 call _vpaes_preheat
676 cmpl $0,%r9d
677 je L$cbc_dec_loop
678 jmp L$cbc_enc_loop
679 .p2align 4
680 L$cbc_enc_loop:
681 movdqu (%rdi),%xmm0
682 pxor %xmm6,%xmm0
683 call _vpaes_encrypt_core
684 movdqa %xmm0,%xmm6
685 movdqu %xmm0,(%rsi,%rdi,1)
686 leaq 16(%rdi),%rdi
687 subq $16,%rcx
688 jnc L$cbc_enc_loop
689 jmp L$cbc_done
690 .p2align 4
691 L$cbc_dec_loop:
692 movdqu (%rdi),%xmm0
693 movdqa %xmm0,%xmm7
694 call _vpaes_decrypt_core
695 pxor %xmm6,%xmm0
696 movdqa %xmm7,%xmm6
697 movdqu %xmm0,(%rsi,%rdi,1)
698 leaq 16(%rdi),%rdi
699 subq $16,%rcx
700 jnc L$cbc_dec_loop
701 L$cbc_done:
702 movdqu %xmm6,(%r8)
703 L$cbc_abort:
704 .byte 0xf3,0xc3
713 .p2align 4
714 _vpaes_preheat:
715 leaq L$k_s0F(%rip),%r10
716 movdqa -32(%r10),%xmm10
717 movdqa -16(%r10),%xmm11
718 movdqa 0(%r10),%xmm9
719 movdqa 48(%r10),%xmm13
720 movdqa 64(%r10),%xmm12
721 movdqa 80(%r10),%xmm15
722 movdqa 96(%r10),%xmm14
723 .byte 0xf3,0xc3
731 .p2align 6
732 _vpaes_consts:
733 L$k_inv:
734 .quad 0x0E05060F0D080180, 0x040703090A0B0C02
735 .quad 0x01040A060F0B0780, 0x030D0E0C02050809
737 L$k_s0F:
738 .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
740 L$k_ipt:
741 .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
742 .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
744 L$k_sb1:
745 .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
746 .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
747 L$k_sb2:
748 .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
749 .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
750 L$k_sbo:
751 .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
752 .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
754 L$k_mc_forward:
755 .quad 0x0407060500030201, 0x0C0F0E0D080B0A09
756 .quad 0x080B0A0904070605, 0x000302010C0F0E0D
757 .quad 0x0C0F0E0D080B0A09, 0x0407060500030201
758 .quad 0x000302010C0F0E0D, 0x080B0A0904070605
760 L$k_mc_backward:
761 .quad 0x0605040702010003, 0x0E0D0C0F0A09080B
762 .quad 0x020100030E0D0C0F, 0x0A09080B06050407
763 .quad 0x0E0D0C0F0A09080B, 0x0605040702010003
764 .quad 0x0A09080B06050407, 0x020100030E0D0C0F
766 L$k_sr:
767 .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
768 .quad 0x030E09040F0A0500, 0x0B06010C07020D08
769 .quad 0x0F060D040B020900, 0x070E050C030A0108
770 .quad 0x0B0E0104070A0D00, 0x0306090C0F020508
772 L$k_rcon:
773 .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
775 L$k_s63:
776 .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
778 L$k_opt:
779 .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
780 .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
782 L$k_deskew:
783 .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
784 .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
790 L$k_dksd:
791 .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
792 .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
793 L$k_dksb:
794 .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
795 .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
796 L$k_dkse:
797 .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
798 .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
799 L$k_dks9:
800 .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
801 .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
807 L$k_dipt:
808 .quad 0x0F505B040B545F00, 0x154A411E114E451A
809 .quad 0x86E383E660056500, 0x12771772F491F194
811 L$k_dsb9:
812 .quad 0x851C03539A86D600, 0xCAD51F504F994CC9
813 .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
814 L$k_dsbd:
815 .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
816 .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
817 L$k_dsbb:
818 .quad 0xD022649296B44200, 0x602646F6B0F2D404
819 .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
820 L$k_dsbe:
821 .quad 0x46F2929626D4D000, 0x2242600464B4F6B0
822 .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
823 L$k_dsbo:
824 .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
825 .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
826 .byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
827 .p2align 6