updated licenses
[gnutls.git] / lib / accelerated / x86 / macosx / appro-aes-x86-64-macosx.s
blob92b78ef76a64230f74986cda2b2ac94ff327788d
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # * Redistributions of source code must retain copyright notices,
9 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # * Neither the name of the Andy Polyakov nor the names of its
17 # copyright holder and contributors may be used to endorse or
18 # promote products derived from this software without specific
19 # prior written permission.
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # *** This file is auto-generated ***
40 .text
41 .globl _aesni_encrypt
43 .p2align 4
44 _aesni_encrypt:
45 movups (%rdi),%xmm2
46 movl 240(%rdx),%eax
47 movups (%rdx),%xmm0
48 movups 16(%rdx),%xmm1
49 leaq 32(%rdx),%rdx
50 xorps %xmm0,%xmm2
51 L$oop_enc1_1:
52 .byte 102,15,56,220,209
53 decl %eax
54 movups (%rdx),%xmm1
55 leaq 16(%rdx),%rdx
56 jnz L$oop_enc1_1
57 .byte 102,15,56,221,209
58 movups %xmm2,(%rsi)
59 .byte 0xf3,0xc3
62 .globl _aesni_decrypt
64 .p2align 4
65 _aesni_decrypt:
66 movups (%rdi),%xmm2
67 movl 240(%rdx),%eax
68 movups (%rdx),%xmm0
69 movups 16(%rdx),%xmm1
70 leaq 32(%rdx),%rdx
71 xorps %xmm0,%xmm2
72 L$oop_dec1_2:
73 .byte 102,15,56,222,209
74 decl %eax
75 movups (%rdx),%xmm1
76 leaq 16(%rdx),%rdx
77 jnz L$oop_dec1_2
78 .byte 102,15,56,223,209
79 movups %xmm2,(%rsi)
80 .byte 0xf3,0xc3
83 .p2align 4
84 _aesni_encrypt3:
85 movups (%rcx),%xmm0
86 shrl $1,%eax
87 movups 16(%rcx),%xmm1
88 leaq 32(%rcx),%rcx
89 xorps %xmm0,%xmm2
90 xorps %xmm0,%xmm3
91 xorps %xmm0,%xmm4
92 movups (%rcx),%xmm0
94 L$enc_loop3:
95 .byte 102,15,56,220,209
96 .byte 102,15,56,220,217
97 decl %eax
98 .byte 102,15,56,220,225
99 movups 16(%rcx),%xmm1
100 .byte 102,15,56,220,208
101 .byte 102,15,56,220,216
102 leaq 32(%rcx),%rcx
103 .byte 102,15,56,220,224
104 movups (%rcx),%xmm0
105 jnz L$enc_loop3
107 .byte 102,15,56,220,209
108 .byte 102,15,56,220,217
109 .byte 102,15,56,220,225
110 .byte 102,15,56,221,208
111 .byte 102,15,56,221,216
112 .byte 102,15,56,221,224
113 .byte 0xf3,0xc3
116 .p2align 4
117 _aesni_decrypt3:
118 movups (%rcx),%xmm0
119 shrl $1,%eax
120 movups 16(%rcx),%xmm1
121 leaq 32(%rcx),%rcx
122 xorps %xmm0,%xmm2
123 xorps %xmm0,%xmm3
124 xorps %xmm0,%xmm4
125 movups (%rcx),%xmm0
127 L$dec_loop3:
128 .byte 102,15,56,222,209
129 .byte 102,15,56,222,217
130 decl %eax
131 .byte 102,15,56,222,225
132 movups 16(%rcx),%xmm1
133 .byte 102,15,56,222,208
134 .byte 102,15,56,222,216
135 leaq 32(%rcx),%rcx
136 .byte 102,15,56,222,224
137 movups (%rcx),%xmm0
138 jnz L$dec_loop3
140 .byte 102,15,56,222,209
141 .byte 102,15,56,222,217
142 .byte 102,15,56,222,225
143 .byte 102,15,56,223,208
144 .byte 102,15,56,223,216
145 .byte 102,15,56,223,224
146 .byte 0xf3,0xc3
149 .p2align 4
150 _aesni_encrypt4:
151 movups (%rcx),%xmm0
152 shrl $1,%eax
153 movups 16(%rcx),%xmm1
154 leaq 32(%rcx),%rcx
155 xorps %xmm0,%xmm2
156 xorps %xmm0,%xmm3
157 xorps %xmm0,%xmm4
158 xorps %xmm0,%xmm5
159 movups (%rcx),%xmm0
161 L$enc_loop4:
162 .byte 102,15,56,220,209
163 .byte 102,15,56,220,217
164 decl %eax
165 .byte 102,15,56,220,225
166 .byte 102,15,56,220,233
167 movups 16(%rcx),%xmm1
168 .byte 102,15,56,220,208
169 .byte 102,15,56,220,216
170 leaq 32(%rcx),%rcx
171 .byte 102,15,56,220,224
172 .byte 102,15,56,220,232
173 movups (%rcx),%xmm0
174 jnz L$enc_loop4
176 .byte 102,15,56,220,209
177 .byte 102,15,56,220,217
178 .byte 102,15,56,220,225
179 .byte 102,15,56,220,233
180 .byte 102,15,56,221,208
181 .byte 102,15,56,221,216
182 .byte 102,15,56,221,224
183 .byte 102,15,56,221,232
184 .byte 0xf3,0xc3
187 .p2align 4
188 _aesni_decrypt4:
189 movups (%rcx),%xmm0
190 shrl $1,%eax
191 movups 16(%rcx),%xmm1
192 leaq 32(%rcx),%rcx
193 xorps %xmm0,%xmm2
194 xorps %xmm0,%xmm3
195 xorps %xmm0,%xmm4
196 xorps %xmm0,%xmm5
197 movups (%rcx),%xmm0
199 L$dec_loop4:
200 .byte 102,15,56,222,209
201 .byte 102,15,56,222,217
202 decl %eax
203 .byte 102,15,56,222,225
204 .byte 102,15,56,222,233
205 movups 16(%rcx),%xmm1
206 .byte 102,15,56,222,208
207 .byte 102,15,56,222,216
208 leaq 32(%rcx),%rcx
209 .byte 102,15,56,222,224
210 .byte 102,15,56,222,232
211 movups (%rcx),%xmm0
212 jnz L$dec_loop4
214 .byte 102,15,56,222,209
215 .byte 102,15,56,222,217
216 .byte 102,15,56,222,225
217 .byte 102,15,56,222,233
218 .byte 102,15,56,223,208
219 .byte 102,15,56,223,216
220 .byte 102,15,56,223,224
221 .byte 102,15,56,223,232
222 .byte 0xf3,0xc3
225 .p2align 4
226 _aesni_encrypt6:
227 movups (%rcx),%xmm0
228 shrl $1,%eax
229 movups 16(%rcx),%xmm1
230 leaq 32(%rcx),%rcx
231 xorps %xmm0,%xmm2
232 pxor %xmm0,%xmm3
233 .byte 102,15,56,220,209
234 pxor %xmm0,%xmm4
235 .byte 102,15,56,220,217
236 pxor %xmm0,%xmm5
237 .byte 102,15,56,220,225
238 pxor %xmm0,%xmm6
239 .byte 102,15,56,220,233
240 pxor %xmm0,%xmm7
241 decl %eax
242 .byte 102,15,56,220,241
243 movups (%rcx),%xmm0
244 .byte 102,15,56,220,249
245 jmp L$enc_loop6_enter
246 .p2align 4
247 L$enc_loop6:
248 .byte 102,15,56,220,209
249 .byte 102,15,56,220,217
250 decl %eax
251 .byte 102,15,56,220,225
252 .byte 102,15,56,220,233
253 .byte 102,15,56,220,241
254 .byte 102,15,56,220,249
255 L$enc_loop6_enter:
256 movups 16(%rcx),%xmm1
257 .byte 102,15,56,220,208
258 .byte 102,15,56,220,216
259 leaq 32(%rcx),%rcx
260 .byte 102,15,56,220,224
261 .byte 102,15,56,220,232
262 .byte 102,15,56,220,240
263 .byte 102,15,56,220,248
264 movups (%rcx),%xmm0
265 jnz L$enc_loop6
267 .byte 102,15,56,220,209
268 .byte 102,15,56,220,217
269 .byte 102,15,56,220,225
270 .byte 102,15,56,220,233
271 .byte 102,15,56,220,241
272 .byte 102,15,56,220,249
273 .byte 102,15,56,221,208
274 .byte 102,15,56,221,216
275 .byte 102,15,56,221,224
276 .byte 102,15,56,221,232
277 .byte 102,15,56,221,240
278 .byte 102,15,56,221,248
279 .byte 0xf3,0xc3
282 .p2align 4
283 _aesni_decrypt6:
284 movups (%rcx),%xmm0
285 shrl $1,%eax
286 movups 16(%rcx),%xmm1
287 leaq 32(%rcx),%rcx
288 xorps %xmm0,%xmm2
289 pxor %xmm0,%xmm3
290 .byte 102,15,56,222,209
291 pxor %xmm0,%xmm4
292 .byte 102,15,56,222,217
293 pxor %xmm0,%xmm5
294 .byte 102,15,56,222,225
295 pxor %xmm0,%xmm6
296 .byte 102,15,56,222,233
297 pxor %xmm0,%xmm7
298 decl %eax
299 .byte 102,15,56,222,241
300 movups (%rcx),%xmm0
301 .byte 102,15,56,222,249
302 jmp L$dec_loop6_enter
303 .p2align 4
304 L$dec_loop6:
305 .byte 102,15,56,222,209
306 .byte 102,15,56,222,217
307 decl %eax
308 .byte 102,15,56,222,225
309 .byte 102,15,56,222,233
310 .byte 102,15,56,222,241
311 .byte 102,15,56,222,249
312 L$dec_loop6_enter:
313 movups 16(%rcx),%xmm1
314 .byte 102,15,56,222,208
315 .byte 102,15,56,222,216
316 leaq 32(%rcx),%rcx
317 .byte 102,15,56,222,224
318 .byte 102,15,56,222,232
319 .byte 102,15,56,222,240
320 .byte 102,15,56,222,248
321 movups (%rcx),%xmm0
322 jnz L$dec_loop6
324 .byte 102,15,56,222,209
325 .byte 102,15,56,222,217
326 .byte 102,15,56,222,225
327 .byte 102,15,56,222,233
328 .byte 102,15,56,222,241
329 .byte 102,15,56,222,249
330 .byte 102,15,56,223,208
331 .byte 102,15,56,223,216
332 .byte 102,15,56,223,224
333 .byte 102,15,56,223,232
334 .byte 102,15,56,223,240
335 .byte 102,15,56,223,248
336 .byte 0xf3,0xc3
339 .p2align 4
340 _aesni_encrypt8:
341 movups (%rcx),%xmm0
342 shrl $1,%eax
343 movups 16(%rcx),%xmm1
344 leaq 32(%rcx),%rcx
345 xorps %xmm0,%xmm2
346 xorps %xmm0,%xmm3
347 .byte 102,15,56,220,209
348 pxor %xmm0,%xmm4
349 .byte 102,15,56,220,217
350 pxor %xmm0,%xmm5
351 .byte 102,15,56,220,225
352 pxor %xmm0,%xmm6
353 .byte 102,15,56,220,233
354 pxor %xmm0,%xmm7
355 decl %eax
356 .byte 102,15,56,220,241
357 pxor %xmm0,%xmm8
358 .byte 102,15,56,220,249
359 pxor %xmm0,%xmm9
360 movups (%rcx),%xmm0
361 .byte 102,68,15,56,220,193
362 .byte 102,68,15,56,220,201
363 movups 16(%rcx),%xmm1
364 jmp L$enc_loop8_enter
365 .p2align 4
366 L$enc_loop8:
367 .byte 102,15,56,220,209
368 .byte 102,15,56,220,217
369 decl %eax
370 .byte 102,15,56,220,225
371 .byte 102,15,56,220,233
372 .byte 102,15,56,220,241
373 .byte 102,15,56,220,249
374 .byte 102,68,15,56,220,193
375 .byte 102,68,15,56,220,201
376 movups 16(%rcx),%xmm1
377 L$enc_loop8_enter:
378 .byte 102,15,56,220,208
379 .byte 102,15,56,220,216
380 leaq 32(%rcx),%rcx
381 .byte 102,15,56,220,224
382 .byte 102,15,56,220,232
383 .byte 102,15,56,220,240
384 .byte 102,15,56,220,248
385 .byte 102,68,15,56,220,192
386 .byte 102,68,15,56,220,200
387 movups (%rcx),%xmm0
388 jnz L$enc_loop8
390 .byte 102,15,56,220,209
391 .byte 102,15,56,220,217
392 .byte 102,15,56,220,225
393 .byte 102,15,56,220,233
394 .byte 102,15,56,220,241
395 .byte 102,15,56,220,249
396 .byte 102,68,15,56,220,193
397 .byte 102,68,15,56,220,201
398 .byte 102,15,56,221,208
399 .byte 102,15,56,221,216
400 .byte 102,15,56,221,224
401 .byte 102,15,56,221,232
402 .byte 102,15,56,221,240
403 .byte 102,15,56,221,248
404 .byte 102,68,15,56,221,192
405 .byte 102,68,15,56,221,200
406 .byte 0xf3,0xc3
409 .p2align 4
410 _aesni_decrypt8:
411 movups (%rcx),%xmm0
412 shrl $1,%eax
413 movups 16(%rcx),%xmm1
414 leaq 32(%rcx),%rcx
415 xorps %xmm0,%xmm2
416 xorps %xmm0,%xmm3
417 .byte 102,15,56,222,209
418 pxor %xmm0,%xmm4
419 .byte 102,15,56,222,217
420 pxor %xmm0,%xmm5
421 .byte 102,15,56,222,225
422 pxor %xmm0,%xmm6
423 .byte 102,15,56,222,233
424 pxor %xmm0,%xmm7
425 decl %eax
426 .byte 102,15,56,222,241
427 pxor %xmm0,%xmm8
428 .byte 102,15,56,222,249
429 pxor %xmm0,%xmm9
430 movups (%rcx),%xmm0
431 .byte 102,68,15,56,222,193
432 .byte 102,68,15,56,222,201
433 movups 16(%rcx),%xmm1
434 jmp L$dec_loop8_enter
435 .p2align 4
436 L$dec_loop8:
437 .byte 102,15,56,222,209
438 .byte 102,15,56,222,217
439 decl %eax
440 .byte 102,15,56,222,225
441 .byte 102,15,56,222,233
442 .byte 102,15,56,222,241
443 .byte 102,15,56,222,249
444 .byte 102,68,15,56,222,193
445 .byte 102,68,15,56,222,201
446 movups 16(%rcx),%xmm1
447 L$dec_loop8_enter:
448 .byte 102,15,56,222,208
449 .byte 102,15,56,222,216
450 leaq 32(%rcx),%rcx
451 .byte 102,15,56,222,224
452 .byte 102,15,56,222,232
453 .byte 102,15,56,222,240
454 .byte 102,15,56,222,248
455 .byte 102,68,15,56,222,192
456 .byte 102,68,15,56,222,200
457 movups (%rcx),%xmm0
458 jnz L$dec_loop8
460 .byte 102,15,56,222,209
461 .byte 102,15,56,222,217
462 .byte 102,15,56,222,225
463 .byte 102,15,56,222,233
464 .byte 102,15,56,222,241
465 .byte 102,15,56,222,249
466 .byte 102,68,15,56,222,193
467 .byte 102,68,15,56,222,201
468 .byte 102,15,56,223,208
469 .byte 102,15,56,223,216
470 .byte 102,15,56,223,224
471 .byte 102,15,56,223,232
472 .byte 102,15,56,223,240
473 .byte 102,15,56,223,248
474 .byte 102,68,15,56,223,192
475 .byte 102,68,15,56,223,200
476 .byte 0xf3,0xc3
478 .globl _aesni_ecb_encrypt
480 .p2align 4
481 _aesni_ecb_encrypt:
482 andq $-16,%rdx
483 jz L$ecb_ret
485 movl 240(%rcx),%eax
486 movups (%rcx),%xmm0
487 movq %rcx,%r11
488 movl %eax,%r10d
489 testl %r8d,%r8d
490 jz L$ecb_decrypt
492 cmpq $128,%rdx
493 jb L$ecb_enc_tail
495 movdqu (%rdi),%xmm2
496 movdqu 16(%rdi),%xmm3
497 movdqu 32(%rdi),%xmm4
498 movdqu 48(%rdi),%xmm5
499 movdqu 64(%rdi),%xmm6
500 movdqu 80(%rdi),%xmm7
501 movdqu 96(%rdi),%xmm8
502 movdqu 112(%rdi),%xmm9
503 leaq 128(%rdi),%rdi
504 subq $128,%rdx
505 jmp L$ecb_enc_loop8_enter
506 .p2align 4
507 L$ecb_enc_loop8:
508 movups %xmm2,(%rsi)
509 movq %r11,%rcx
510 movdqu (%rdi),%xmm2
511 movl %r10d,%eax
512 movups %xmm3,16(%rsi)
513 movdqu 16(%rdi),%xmm3
514 movups %xmm4,32(%rsi)
515 movdqu 32(%rdi),%xmm4
516 movups %xmm5,48(%rsi)
517 movdqu 48(%rdi),%xmm5
518 movups %xmm6,64(%rsi)
519 movdqu 64(%rdi),%xmm6
520 movups %xmm7,80(%rsi)
521 movdqu 80(%rdi),%xmm7
522 movups %xmm8,96(%rsi)
523 movdqu 96(%rdi),%xmm8
524 movups %xmm9,112(%rsi)
525 leaq 128(%rsi),%rsi
526 movdqu 112(%rdi),%xmm9
527 leaq 128(%rdi),%rdi
528 L$ecb_enc_loop8_enter:
530 call _aesni_encrypt8
532 subq $128,%rdx
533 jnc L$ecb_enc_loop8
535 movups %xmm2,(%rsi)
536 movq %r11,%rcx
537 movups %xmm3,16(%rsi)
538 movl %r10d,%eax
539 movups %xmm4,32(%rsi)
540 movups %xmm5,48(%rsi)
541 movups %xmm6,64(%rsi)
542 movups %xmm7,80(%rsi)
543 movups %xmm8,96(%rsi)
544 movups %xmm9,112(%rsi)
545 leaq 128(%rsi),%rsi
546 addq $128,%rdx
547 jz L$ecb_ret
549 L$ecb_enc_tail:
550 movups (%rdi),%xmm2
551 cmpq $32,%rdx
552 jb L$ecb_enc_one
553 movups 16(%rdi),%xmm3
554 je L$ecb_enc_two
555 movups 32(%rdi),%xmm4
556 cmpq $64,%rdx
557 jb L$ecb_enc_three
558 movups 48(%rdi),%xmm5
559 je L$ecb_enc_four
560 movups 64(%rdi),%xmm6
561 cmpq $96,%rdx
562 jb L$ecb_enc_five
563 movups 80(%rdi),%xmm7
564 je L$ecb_enc_six
565 movdqu 96(%rdi),%xmm8
566 call _aesni_encrypt8
567 movups %xmm2,(%rsi)
568 movups %xmm3,16(%rsi)
569 movups %xmm4,32(%rsi)
570 movups %xmm5,48(%rsi)
571 movups %xmm6,64(%rsi)
572 movups %xmm7,80(%rsi)
573 movups %xmm8,96(%rsi)
574 jmp L$ecb_ret
575 .p2align 4
576 L$ecb_enc_one:
577 movups (%rcx),%xmm0
578 movups 16(%rcx),%xmm1
579 leaq 32(%rcx),%rcx
580 xorps %xmm0,%xmm2
581 L$oop_enc1_3:
582 .byte 102,15,56,220,209
583 decl %eax
584 movups (%rcx),%xmm1
585 leaq 16(%rcx),%rcx
586 jnz L$oop_enc1_3
587 .byte 102,15,56,221,209
588 movups %xmm2,(%rsi)
589 jmp L$ecb_ret
590 .p2align 4
591 L$ecb_enc_two:
592 xorps %xmm4,%xmm4
593 call _aesni_encrypt3
594 movups %xmm2,(%rsi)
595 movups %xmm3,16(%rsi)
596 jmp L$ecb_ret
597 .p2align 4
598 L$ecb_enc_three:
599 call _aesni_encrypt3
600 movups %xmm2,(%rsi)
601 movups %xmm3,16(%rsi)
602 movups %xmm4,32(%rsi)
603 jmp L$ecb_ret
604 .p2align 4
605 L$ecb_enc_four:
606 call _aesni_encrypt4
607 movups %xmm2,(%rsi)
608 movups %xmm3,16(%rsi)
609 movups %xmm4,32(%rsi)
610 movups %xmm5,48(%rsi)
611 jmp L$ecb_ret
612 .p2align 4
613 L$ecb_enc_five:
614 xorps %xmm7,%xmm7
615 call _aesni_encrypt6
616 movups %xmm2,(%rsi)
617 movups %xmm3,16(%rsi)
618 movups %xmm4,32(%rsi)
619 movups %xmm5,48(%rsi)
620 movups %xmm6,64(%rsi)
621 jmp L$ecb_ret
622 .p2align 4
623 L$ecb_enc_six:
624 call _aesni_encrypt6
625 movups %xmm2,(%rsi)
626 movups %xmm3,16(%rsi)
627 movups %xmm4,32(%rsi)
628 movups %xmm5,48(%rsi)
629 movups %xmm6,64(%rsi)
630 movups %xmm7,80(%rsi)
631 jmp L$ecb_ret
633 .p2align 4
634 L$ecb_decrypt:
635 cmpq $128,%rdx
636 jb L$ecb_dec_tail
638 movdqu (%rdi),%xmm2
639 movdqu 16(%rdi),%xmm3
640 movdqu 32(%rdi),%xmm4
641 movdqu 48(%rdi),%xmm5
642 movdqu 64(%rdi),%xmm6
643 movdqu 80(%rdi),%xmm7
644 movdqu 96(%rdi),%xmm8
645 movdqu 112(%rdi),%xmm9
646 leaq 128(%rdi),%rdi
647 subq $128,%rdx
648 jmp L$ecb_dec_loop8_enter
649 .p2align 4
650 L$ecb_dec_loop8:
651 movups %xmm2,(%rsi)
652 movq %r11,%rcx
653 movdqu (%rdi),%xmm2
654 movl %r10d,%eax
655 movups %xmm3,16(%rsi)
656 movdqu 16(%rdi),%xmm3
657 movups %xmm4,32(%rsi)
658 movdqu 32(%rdi),%xmm4
659 movups %xmm5,48(%rsi)
660 movdqu 48(%rdi),%xmm5
661 movups %xmm6,64(%rsi)
662 movdqu 64(%rdi),%xmm6
663 movups %xmm7,80(%rsi)
664 movdqu 80(%rdi),%xmm7
665 movups %xmm8,96(%rsi)
666 movdqu 96(%rdi),%xmm8
667 movups %xmm9,112(%rsi)
668 leaq 128(%rsi),%rsi
669 movdqu 112(%rdi),%xmm9
670 leaq 128(%rdi),%rdi
671 L$ecb_dec_loop8_enter:
673 call _aesni_decrypt8
675 movups (%r11),%xmm0
676 subq $128,%rdx
677 jnc L$ecb_dec_loop8
679 movups %xmm2,(%rsi)
680 movq %r11,%rcx
681 movups %xmm3,16(%rsi)
682 movl %r10d,%eax
683 movups %xmm4,32(%rsi)
684 movups %xmm5,48(%rsi)
685 movups %xmm6,64(%rsi)
686 movups %xmm7,80(%rsi)
687 movups %xmm8,96(%rsi)
688 movups %xmm9,112(%rsi)
689 leaq 128(%rsi),%rsi
690 addq $128,%rdx
691 jz L$ecb_ret
693 L$ecb_dec_tail:
694 movups (%rdi),%xmm2
695 cmpq $32,%rdx
696 jb L$ecb_dec_one
697 movups 16(%rdi),%xmm3
698 je L$ecb_dec_two
699 movups 32(%rdi),%xmm4
700 cmpq $64,%rdx
701 jb L$ecb_dec_three
702 movups 48(%rdi),%xmm5
703 je L$ecb_dec_four
704 movups 64(%rdi),%xmm6
705 cmpq $96,%rdx
706 jb L$ecb_dec_five
707 movups 80(%rdi),%xmm7
708 je L$ecb_dec_six
709 movups 96(%rdi),%xmm8
710 movups (%rcx),%xmm0
711 call _aesni_decrypt8
712 movups %xmm2,(%rsi)
713 movups %xmm3,16(%rsi)
714 movups %xmm4,32(%rsi)
715 movups %xmm5,48(%rsi)
716 movups %xmm6,64(%rsi)
717 movups %xmm7,80(%rsi)
718 movups %xmm8,96(%rsi)
719 jmp L$ecb_ret
720 .p2align 4
721 L$ecb_dec_one:
722 movups (%rcx),%xmm0
723 movups 16(%rcx),%xmm1
724 leaq 32(%rcx),%rcx
725 xorps %xmm0,%xmm2
726 L$oop_dec1_4:
727 .byte 102,15,56,222,209
728 decl %eax
729 movups (%rcx),%xmm1
730 leaq 16(%rcx),%rcx
731 jnz L$oop_dec1_4
732 .byte 102,15,56,223,209
733 movups %xmm2,(%rsi)
734 jmp L$ecb_ret
735 .p2align 4
736 L$ecb_dec_two:
737 xorps %xmm4,%xmm4
738 call _aesni_decrypt3
739 movups %xmm2,(%rsi)
740 movups %xmm3,16(%rsi)
741 jmp L$ecb_ret
742 .p2align 4
743 L$ecb_dec_three:
744 call _aesni_decrypt3
745 movups %xmm2,(%rsi)
746 movups %xmm3,16(%rsi)
747 movups %xmm4,32(%rsi)
748 jmp L$ecb_ret
749 .p2align 4
750 L$ecb_dec_four:
751 call _aesni_decrypt4
752 movups %xmm2,(%rsi)
753 movups %xmm3,16(%rsi)
754 movups %xmm4,32(%rsi)
755 movups %xmm5,48(%rsi)
756 jmp L$ecb_ret
757 .p2align 4
758 L$ecb_dec_five:
759 xorps %xmm7,%xmm7
760 call _aesni_decrypt6
761 movups %xmm2,(%rsi)
762 movups %xmm3,16(%rsi)
763 movups %xmm4,32(%rsi)
764 movups %xmm5,48(%rsi)
765 movups %xmm6,64(%rsi)
766 jmp L$ecb_ret
767 .p2align 4
768 L$ecb_dec_six:
769 call _aesni_decrypt6
770 movups %xmm2,(%rsi)
771 movups %xmm3,16(%rsi)
772 movups %xmm4,32(%rsi)
773 movups %xmm5,48(%rsi)
774 movups %xmm6,64(%rsi)
775 movups %xmm7,80(%rsi)
777 L$ecb_ret:
778 .byte 0xf3,0xc3
780 .globl _aesni_ccm64_encrypt_blocks
782 .p2align 4
783 _aesni_ccm64_encrypt_blocks:
784 movl 240(%rcx),%eax
785 movdqu (%r8),%xmm9
786 movdqa L$increment64(%rip),%xmm6
787 movdqa L$bswap_mask(%rip),%xmm7
789 shrl $1,%eax
790 leaq 0(%rcx),%r11
791 movdqu (%r9),%xmm3
792 movdqa %xmm9,%xmm2
793 movl %eax,%r10d
794 .byte 102,68,15,56,0,207
795 jmp L$ccm64_enc_outer
796 .p2align 4
797 L$ccm64_enc_outer:
798 movups (%r11),%xmm0
799 movl %r10d,%eax
800 movups (%rdi),%xmm8
802 xorps %xmm0,%xmm2
803 movups 16(%r11),%xmm1
804 xorps %xmm8,%xmm0
805 leaq 32(%r11),%rcx
806 xorps %xmm0,%xmm3
807 movups (%rcx),%xmm0
809 L$ccm64_enc2_loop:
810 .byte 102,15,56,220,209
811 decl %eax
812 .byte 102,15,56,220,217
813 movups 16(%rcx),%xmm1
814 .byte 102,15,56,220,208
815 leaq 32(%rcx),%rcx
816 .byte 102,15,56,220,216
817 movups 0(%rcx),%xmm0
818 jnz L$ccm64_enc2_loop
819 .byte 102,15,56,220,209
820 .byte 102,15,56,220,217
821 paddq %xmm6,%xmm9
822 .byte 102,15,56,221,208
823 .byte 102,15,56,221,216
825 decq %rdx
826 leaq 16(%rdi),%rdi
827 xorps %xmm2,%xmm8
828 movdqa %xmm9,%xmm2
829 movups %xmm8,(%rsi)
830 leaq 16(%rsi),%rsi
831 .byte 102,15,56,0,215
832 jnz L$ccm64_enc_outer
834 movups %xmm3,(%r9)
835 .byte 0xf3,0xc3
837 .globl _aesni_ccm64_decrypt_blocks
839 .p2align 4
840 _aesni_ccm64_decrypt_blocks:
841 movl 240(%rcx),%eax
842 movups (%r8),%xmm9
843 movdqu (%r9),%xmm3
844 movdqa L$increment64(%rip),%xmm6
845 movdqa L$bswap_mask(%rip),%xmm7
847 movaps %xmm9,%xmm2
848 movl %eax,%r10d
849 movq %rcx,%r11
850 .byte 102,68,15,56,0,207
851 movups (%rcx),%xmm0
852 movups 16(%rcx),%xmm1
853 leaq 32(%rcx),%rcx
854 xorps %xmm0,%xmm2
855 L$oop_enc1_5:
856 .byte 102,15,56,220,209
857 decl %eax
858 movups (%rcx),%xmm1
859 leaq 16(%rcx),%rcx
860 jnz L$oop_enc1_5
861 .byte 102,15,56,221,209
862 movups (%rdi),%xmm8
863 paddq %xmm6,%xmm9
864 leaq 16(%rdi),%rdi
865 jmp L$ccm64_dec_outer
866 .p2align 4
867 L$ccm64_dec_outer:
868 xorps %xmm2,%xmm8
869 movdqa %xmm9,%xmm2
870 movl %r10d,%eax
871 movups %xmm8,(%rsi)
872 leaq 16(%rsi),%rsi
873 .byte 102,15,56,0,215
875 subq $1,%rdx
876 jz L$ccm64_dec_break
878 movups (%r11),%xmm0
879 shrl $1,%eax
880 movups 16(%r11),%xmm1
881 xorps %xmm0,%xmm8
882 leaq 32(%r11),%rcx
883 xorps %xmm0,%xmm2
884 xorps %xmm8,%xmm3
885 movups (%rcx),%xmm0
887 L$ccm64_dec2_loop:
888 .byte 102,15,56,220,209
889 decl %eax
890 .byte 102,15,56,220,217
891 movups 16(%rcx),%xmm1
892 .byte 102,15,56,220,208
893 leaq 32(%rcx),%rcx
894 .byte 102,15,56,220,216
895 movups 0(%rcx),%xmm0
896 jnz L$ccm64_dec2_loop
897 movups (%rdi),%xmm8
898 paddq %xmm6,%xmm9
899 .byte 102,15,56,220,209
900 .byte 102,15,56,220,217
901 leaq 16(%rdi),%rdi
902 .byte 102,15,56,221,208
903 .byte 102,15,56,221,216
904 jmp L$ccm64_dec_outer
906 .p2align 4
907 L$ccm64_dec_break:
909 movups (%r11),%xmm0
910 movups 16(%r11),%xmm1
911 xorps %xmm0,%xmm8
912 leaq 32(%r11),%r11
913 xorps %xmm8,%xmm3
914 L$oop_enc1_6:
915 .byte 102,15,56,220,217
916 decl %eax
917 movups (%r11),%xmm1
918 leaq 16(%r11),%r11
919 jnz L$oop_enc1_6
920 .byte 102,15,56,221,217
921 movups %xmm3,(%r9)
922 .byte 0xf3,0xc3
924 .globl _aesni_ctr32_encrypt_blocks
926 .p2align 4
927 _aesni_ctr32_encrypt_blocks:
928 cmpq $1,%rdx
929 je L$ctr32_one_shortcut
931 movdqu (%r8),%xmm14
932 movdqa L$bswap_mask(%rip),%xmm15
933 xorl %eax,%eax
934 .byte 102,69,15,58,22,242,3
935 .byte 102,68,15,58,34,240,3
937 movl 240(%rcx),%eax
938 bswapl %r10d
939 pxor %xmm12,%xmm12
940 pxor %xmm13,%xmm13
941 .byte 102,69,15,58,34,226,0
942 leaq 3(%r10),%r11
943 .byte 102,69,15,58,34,235,0
944 incl %r10d
945 .byte 102,69,15,58,34,226,1
946 incq %r11
947 .byte 102,69,15,58,34,235,1
948 incl %r10d
949 .byte 102,69,15,58,34,226,2
950 incq %r11
951 .byte 102,69,15,58,34,235,2
952 movdqa %xmm12,-40(%rsp)
953 .byte 102,69,15,56,0,231
954 movdqa %xmm13,-24(%rsp)
955 .byte 102,69,15,56,0,239
957 pshufd $192,%xmm12,%xmm2
958 pshufd $128,%xmm12,%xmm3
959 pshufd $64,%xmm12,%xmm4
960 cmpq $6,%rdx
961 jb L$ctr32_tail
962 shrl $1,%eax
963 movq %rcx,%r11
964 movl %eax,%r10d
965 subq $6,%rdx
966 jmp L$ctr32_loop6
968 .p2align 4
969 L$ctr32_loop6:
970 pshufd $192,%xmm13,%xmm5
971 por %xmm14,%xmm2
972 movups (%r11),%xmm0
973 pshufd $128,%xmm13,%xmm6
974 por %xmm14,%xmm3
975 movups 16(%r11),%xmm1
976 pshufd $64,%xmm13,%xmm7
977 por %xmm14,%xmm4
978 por %xmm14,%xmm5
979 xorps %xmm0,%xmm2
980 por %xmm14,%xmm6
981 por %xmm14,%xmm7
986 pxor %xmm0,%xmm3
987 .byte 102,15,56,220,209
988 leaq 32(%r11),%rcx
989 pxor %xmm0,%xmm4
990 .byte 102,15,56,220,217
991 movdqa L$increment32(%rip),%xmm13
992 pxor %xmm0,%xmm5
993 .byte 102,15,56,220,225
994 movdqa -40(%rsp),%xmm12
995 pxor %xmm0,%xmm6
996 .byte 102,15,56,220,233
997 pxor %xmm0,%xmm7
998 movups (%rcx),%xmm0
999 decl %eax
1000 .byte 102,15,56,220,241
1001 .byte 102,15,56,220,249
1002 jmp L$ctr32_enc_loop6_enter
1003 .p2align 4
1004 L$ctr32_enc_loop6:
1005 .byte 102,15,56,220,209
1006 .byte 102,15,56,220,217
1007 decl %eax
1008 .byte 102,15,56,220,225
1009 .byte 102,15,56,220,233
1010 .byte 102,15,56,220,241
1011 .byte 102,15,56,220,249
1012 L$ctr32_enc_loop6_enter:
1013 movups 16(%rcx),%xmm1
1014 .byte 102,15,56,220,208
1015 .byte 102,15,56,220,216
1016 leaq 32(%rcx),%rcx
1017 .byte 102,15,56,220,224
1018 .byte 102,15,56,220,232
1019 .byte 102,15,56,220,240
1020 .byte 102,15,56,220,248
1021 movups (%rcx),%xmm0
1022 jnz L$ctr32_enc_loop6
1024 .byte 102,15,56,220,209
1025 paddd %xmm13,%xmm12
1026 .byte 102,15,56,220,217
1027 paddd -24(%rsp),%xmm13
1028 .byte 102,15,56,220,225
1029 movdqa %xmm12,-40(%rsp)
1030 .byte 102,15,56,220,233
1031 movdqa %xmm13,-24(%rsp)
1032 .byte 102,15,56,220,241
1033 .byte 102,69,15,56,0,231
1034 .byte 102,15,56,220,249
1035 .byte 102,69,15,56,0,239
1037 .byte 102,15,56,221,208
1038 movups (%rdi),%xmm8
1039 .byte 102,15,56,221,216
1040 movups 16(%rdi),%xmm9
1041 .byte 102,15,56,221,224
1042 movups 32(%rdi),%xmm10
1043 .byte 102,15,56,221,232
1044 movups 48(%rdi),%xmm11
1045 .byte 102,15,56,221,240
1046 movups 64(%rdi),%xmm1
1047 .byte 102,15,56,221,248
1048 movups 80(%rdi),%xmm0
1049 leaq 96(%rdi),%rdi
1051 xorps %xmm2,%xmm8
1052 pshufd $192,%xmm12,%xmm2
1053 xorps %xmm3,%xmm9
1054 pshufd $128,%xmm12,%xmm3
1055 movups %xmm8,(%rsi)
1056 xorps %xmm4,%xmm10
1057 pshufd $64,%xmm12,%xmm4
1058 movups %xmm9,16(%rsi)
1059 xorps %xmm5,%xmm11
1060 movups %xmm10,32(%rsi)
1061 xorps %xmm6,%xmm1
1062 movups %xmm11,48(%rsi)
1063 xorps %xmm7,%xmm0
1064 movups %xmm1,64(%rsi)
1065 movups %xmm0,80(%rsi)
1066 leaq 96(%rsi),%rsi
1067 movl %r10d,%eax
1068 subq $6,%rdx
1069 jnc L$ctr32_loop6
1071 addq $6,%rdx
1072 jz L$ctr32_done
1073 movq %r11,%rcx
1074 leal 1(%rax,%rax,1),%eax
1076 L$ctr32_tail:
1077 por %xmm14,%xmm2
1078 movups (%rdi),%xmm8
1079 cmpq $2,%rdx
1080 jb L$ctr32_one
1082 por %xmm14,%xmm3
1083 movups 16(%rdi),%xmm9
1084 je L$ctr32_two
1086 pshufd $192,%xmm13,%xmm5
1087 por %xmm14,%xmm4
1088 movups 32(%rdi),%xmm10
1089 cmpq $4,%rdx
1090 jb L$ctr32_three
1092 pshufd $128,%xmm13,%xmm6
1093 por %xmm14,%xmm5
1094 movups 48(%rdi),%xmm11
1095 je L$ctr32_four
1097 por %xmm14,%xmm6
1098 xorps %xmm7,%xmm7
1100 call _aesni_encrypt6
1102 movups 64(%rdi),%xmm1
1103 xorps %xmm2,%xmm8
1104 xorps %xmm3,%xmm9
1105 movups %xmm8,(%rsi)
1106 xorps %xmm4,%xmm10
1107 movups %xmm9,16(%rsi)
1108 xorps %xmm5,%xmm11
1109 movups %xmm10,32(%rsi)
1110 xorps %xmm6,%xmm1
1111 movups %xmm11,48(%rsi)
1112 movups %xmm1,64(%rsi)
1113 jmp L$ctr32_done
1115 .p2align 4
1116 L$ctr32_one_shortcut:
1117 movups (%r8),%xmm2
1118 movups (%rdi),%xmm8
1119 movl 240(%rcx),%eax
1120 L$ctr32_one:
1121 movups (%rcx),%xmm0
1122 movups 16(%rcx),%xmm1
1123 leaq 32(%rcx),%rcx
1124 xorps %xmm0,%xmm2
1125 L$oop_enc1_7:
1126 .byte 102,15,56,220,209
1127 decl %eax
1128 movups (%rcx),%xmm1
1129 leaq 16(%rcx),%rcx
1130 jnz L$oop_enc1_7
1131 .byte 102,15,56,221,209
1132 xorps %xmm2,%xmm8
1133 movups %xmm8,(%rsi)
1134 jmp L$ctr32_done
1136 .p2align 4
1137 L$ctr32_two:
1138 xorps %xmm4,%xmm4
1139 call _aesni_encrypt3
1140 xorps %xmm2,%xmm8
1141 xorps %xmm3,%xmm9
1142 movups %xmm8,(%rsi)
1143 movups %xmm9,16(%rsi)
1144 jmp L$ctr32_done
1146 .p2align 4
1147 L$ctr32_three:
1148 call _aesni_encrypt3
1149 xorps %xmm2,%xmm8
1150 xorps %xmm3,%xmm9
1151 movups %xmm8,(%rsi)
1152 xorps %xmm4,%xmm10
1153 movups %xmm9,16(%rsi)
1154 movups %xmm10,32(%rsi)
1155 jmp L$ctr32_done
1157 .p2align 4
1158 L$ctr32_four:
1159 call _aesni_encrypt4
1160 xorps %xmm2,%xmm8
1161 xorps %xmm3,%xmm9
1162 movups %xmm8,(%rsi)
1163 xorps %xmm4,%xmm10
1164 movups %xmm9,16(%rsi)
1165 xorps %xmm5,%xmm11
1166 movups %xmm10,32(%rsi)
1167 movups %xmm11,48(%rsi)
1169 L$ctr32_done:
1170 .byte 0xf3,0xc3
1172 .globl _aesni_xts_encrypt
1174 .p2align 4
1175 _aesni_xts_encrypt:
1176 leaq -104(%rsp),%rsp
1177 movups (%r9),%xmm15
1178 movl 240(%r8),%eax
1179 movl 240(%rcx),%r10d
1180 movups (%r8),%xmm0
1181 movups 16(%r8),%xmm1
1182 leaq 32(%r8),%r8
1183 xorps %xmm0,%xmm15
1184 L$oop_enc1_8:
1185 .byte 102,68,15,56,220,249
1186 decl %eax
1187 movups (%r8),%xmm1
1188 leaq 16(%r8),%r8
1189 jnz L$oop_enc1_8
1190 .byte 102,68,15,56,221,249
1191 movq %rcx,%r11
1192 movl %r10d,%eax
1193 movq %rdx,%r9
1194 andq $-16,%rdx
1196 movdqa L$xts_magic(%rip),%xmm8
1197 pxor %xmm14,%xmm14
1198 pcmpgtd %xmm15,%xmm14
1199 pshufd $19,%xmm14,%xmm9
1200 pxor %xmm14,%xmm14
1201 movdqa %xmm15,%xmm10
1202 paddq %xmm15,%xmm15
1203 pand %xmm8,%xmm9
1204 pcmpgtd %xmm15,%xmm14
1205 pxor %xmm9,%xmm15
1206 pshufd $19,%xmm14,%xmm9
1207 pxor %xmm14,%xmm14
1208 movdqa %xmm15,%xmm11
1209 paddq %xmm15,%xmm15
1210 pand %xmm8,%xmm9
1211 pcmpgtd %xmm15,%xmm14
1212 pxor %xmm9,%xmm15
1213 pshufd $19,%xmm14,%xmm9
1214 pxor %xmm14,%xmm14
1215 movdqa %xmm15,%xmm12
1216 paddq %xmm15,%xmm15
1217 pand %xmm8,%xmm9
1218 pcmpgtd %xmm15,%xmm14
1219 pxor %xmm9,%xmm15
1220 pshufd $19,%xmm14,%xmm9
1221 pxor %xmm14,%xmm14
1222 movdqa %xmm15,%xmm13
1223 paddq %xmm15,%xmm15
1224 pand %xmm8,%xmm9
1225 pcmpgtd %xmm15,%xmm14
1226 pxor %xmm9,%xmm15
1227 subq $96,%rdx
1228 jc L$xts_enc_short
1230 shrl $1,%eax
1231 subl $1,%eax
1232 movl %eax,%r10d
1233 jmp L$xts_enc_grandloop
1235 .p2align 4
1236 L$xts_enc_grandloop:
1237 pshufd $19,%xmm14,%xmm9
1238 movdqa %xmm15,%xmm14
1239 paddq %xmm15,%xmm15
1240 movdqu 0(%rdi),%xmm2
1241 pand %xmm8,%xmm9
1242 movdqu 16(%rdi),%xmm3
1243 pxor %xmm9,%xmm15
1245 movdqu 32(%rdi),%xmm4
1246 pxor %xmm10,%xmm2
1247 movdqu 48(%rdi),%xmm5
1248 pxor %xmm11,%xmm3
1249 movdqu 64(%rdi),%xmm6
1250 pxor %xmm12,%xmm4
1251 movdqu 80(%rdi),%xmm7
1252 leaq 96(%rdi),%rdi
1253 pxor %xmm13,%xmm5
1254 movups (%r11),%xmm0
1255 pxor %xmm14,%xmm6
1256 pxor %xmm15,%xmm7
1260 movups 16(%r11),%xmm1
1261 pxor %xmm0,%xmm2
1262 pxor %xmm0,%xmm3
1263 movdqa %xmm10,0(%rsp)
1264 .byte 102,15,56,220,209
1265 leaq 32(%r11),%rcx
1266 pxor %xmm0,%xmm4
1267 movdqa %xmm11,16(%rsp)
1268 .byte 102,15,56,220,217
1269 pxor %xmm0,%xmm5
1270 movdqa %xmm12,32(%rsp)
1271 .byte 102,15,56,220,225
1272 pxor %xmm0,%xmm6
1273 movdqa %xmm13,48(%rsp)
1274 .byte 102,15,56,220,233
1275 pxor %xmm0,%xmm7
1276 movups (%rcx),%xmm0
1277 decl %eax
1278 movdqa %xmm14,64(%rsp)
1279 .byte 102,15,56,220,241
1280 movdqa %xmm15,80(%rsp)
1281 .byte 102,15,56,220,249
1282 pxor %xmm14,%xmm14
1283 pcmpgtd %xmm15,%xmm14
1284 jmp L$xts_enc_loop6_enter
1286 .p2align 4
1287 L$xts_enc_loop6:
1288 .byte 102,15,56,220,209
1289 .byte 102,15,56,220,217
1290 decl %eax
1291 .byte 102,15,56,220,225
1292 .byte 102,15,56,220,233
1293 .byte 102,15,56,220,241
1294 .byte 102,15,56,220,249
1295 L$xts_enc_loop6_enter:
1296 movups 16(%rcx),%xmm1
1297 .byte 102,15,56,220,208
1298 .byte 102,15,56,220,216
1299 leaq 32(%rcx),%rcx
1300 .byte 102,15,56,220,224
1301 .byte 102,15,56,220,232
1302 .byte 102,15,56,220,240
1303 .byte 102,15,56,220,248
1304 movups (%rcx),%xmm0
1305 jnz L$xts_enc_loop6
1307 pshufd $19,%xmm14,%xmm9
1308 pxor %xmm14,%xmm14
1309 paddq %xmm15,%xmm15
1310 .byte 102,15,56,220,209
1311 pand %xmm8,%xmm9
1312 .byte 102,15,56,220,217
1313 pcmpgtd %xmm15,%xmm14
1314 .byte 102,15,56,220,225
1315 pxor %xmm9,%xmm15
1316 .byte 102,15,56,220,233
1317 .byte 102,15,56,220,241
1318 .byte 102,15,56,220,249
1319 movups 16(%rcx),%xmm1
1321 pshufd $19,%xmm14,%xmm9
1322 pxor %xmm14,%xmm14
1323 movdqa %xmm15,%xmm10
1324 paddq %xmm15,%xmm15
1325 .byte 102,15,56,220,208
1326 pand %xmm8,%xmm9
1327 .byte 102,15,56,220,216
1328 pcmpgtd %xmm15,%xmm14
1329 .byte 102,15,56,220,224
1330 pxor %xmm9,%xmm15
1331 .byte 102,15,56,220,232
1332 .byte 102,15,56,220,240
1333 .byte 102,15,56,220,248
1334 movups 32(%rcx),%xmm0
1336 pshufd $19,%xmm14,%xmm9
1337 pxor %xmm14,%xmm14
1338 movdqa %xmm15,%xmm11
1339 paddq %xmm15,%xmm15
1340 .byte 102,15,56,220,209
1341 pand %xmm8,%xmm9
1342 .byte 102,15,56,220,217
1343 pcmpgtd %xmm15,%xmm14
1344 .byte 102,15,56,220,225
1345 pxor %xmm9,%xmm15
1346 .byte 102,15,56,220,233
1347 .byte 102,15,56,220,241
1348 .byte 102,15,56,220,249
1350 pshufd $19,%xmm14,%xmm9
1351 pxor %xmm14,%xmm14
1352 movdqa %xmm15,%xmm12
1353 paddq %xmm15,%xmm15
1354 .byte 102,15,56,221,208
1355 pand %xmm8,%xmm9
1356 .byte 102,15,56,221,216
1357 pcmpgtd %xmm15,%xmm14
1358 .byte 102,15,56,221,224
1359 pxor %xmm9,%xmm15
1360 .byte 102,15,56,221,232
1361 .byte 102,15,56,221,240
1362 .byte 102,15,56,221,248
1364 pshufd $19,%xmm14,%xmm9
1365 pxor %xmm14,%xmm14
1366 movdqa %xmm15,%xmm13
1367 paddq %xmm15,%xmm15
1368 xorps 0(%rsp),%xmm2
1369 pand %xmm8,%xmm9
1370 xorps 16(%rsp),%xmm3
1371 pcmpgtd %xmm15,%xmm14
1372 pxor %xmm9,%xmm15
1374 xorps 32(%rsp),%xmm4
1375 movups %xmm2,0(%rsi)
1376 xorps 48(%rsp),%xmm5
1377 movups %xmm3,16(%rsi)
1378 xorps 64(%rsp),%xmm6
1379 movups %xmm4,32(%rsi)
1380 xorps 80(%rsp),%xmm7
1381 movups %xmm5,48(%rsi)
1382 movl %r10d,%eax
1383 movups %xmm6,64(%rsi)
1384 movups %xmm7,80(%rsi)
1385 leaq 96(%rsi),%rsi
1386 subq $96,%rdx
1387 jnc L$xts_enc_grandloop
1389 leal 3(%rax,%rax,1),%eax
1390 movq %r11,%rcx
1391 movl %eax,%r10d
1393 L$xts_enc_short:
1394 addq $96,%rdx
1395 jz L$xts_enc_done
1397 cmpq $32,%rdx
1398 jb L$xts_enc_one
1399 je L$xts_enc_two
1401 cmpq $64,%rdx
1402 jb L$xts_enc_three
1403 je L$xts_enc_four
1405 pshufd $19,%xmm14,%xmm9
1406 movdqa %xmm15,%xmm14
1407 paddq %xmm15,%xmm15
1408 movdqu (%rdi),%xmm2
1409 pand %xmm8,%xmm9
1410 movdqu 16(%rdi),%xmm3
1411 pxor %xmm9,%xmm15
1413 movdqu 32(%rdi),%xmm4
1414 pxor %xmm10,%xmm2
1415 movdqu 48(%rdi),%xmm5
1416 pxor %xmm11,%xmm3
1417 movdqu 64(%rdi),%xmm6
1418 leaq 80(%rdi),%rdi
1419 pxor %xmm12,%xmm4
1420 pxor %xmm13,%xmm5
1421 pxor %xmm14,%xmm6
1423 call _aesni_encrypt6
1425 xorps %xmm10,%xmm2
1426 movdqa %xmm15,%xmm10
1427 xorps %xmm11,%xmm3
1428 xorps %xmm12,%xmm4
1429 movdqu %xmm2,(%rsi)
1430 xorps %xmm13,%xmm5
1431 movdqu %xmm3,16(%rsi)
1432 xorps %xmm14,%xmm6
1433 movdqu %xmm4,32(%rsi)
1434 movdqu %xmm5,48(%rsi)
1435 movdqu %xmm6,64(%rsi)
1436 leaq 80(%rsi),%rsi
1437 jmp L$xts_enc_done
1439 .p2align 4
1440 L$xts_enc_one:
1441 movups (%rdi),%xmm2
1442 leaq 16(%rdi),%rdi
1443 xorps %xmm10,%xmm2
1444 movups (%rcx),%xmm0
1445 movups 16(%rcx),%xmm1
1446 leaq 32(%rcx),%rcx
1447 xorps %xmm0,%xmm2
1448 L$oop_enc1_9:
1449 .byte 102,15,56,220,209
1450 decl %eax
1451 movups (%rcx),%xmm1
1452 leaq 16(%rcx),%rcx
1453 jnz L$oop_enc1_9
1454 .byte 102,15,56,221,209
1455 xorps %xmm10,%xmm2
1456 movdqa %xmm11,%xmm10
1457 movups %xmm2,(%rsi)
1458 leaq 16(%rsi),%rsi
1459 jmp L$xts_enc_done
1461 .p2align 4
1462 L$xts_enc_two:
1463 movups (%rdi),%xmm2
1464 movups 16(%rdi),%xmm3
1465 leaq 32(%rdi),%rdi
1466 xorps %xmm10,%xmm2
1467 xorps %xmm11,%xmm3
1469 call _aesni_encrypt3
1471 xorps %xmm10,%xmm2
1472 movdqa %xmm12,%xmm10
1473 xorps %xmm11,%xmm3
1474 movups %xmm2,(%rsi)
1475 movups %xmm3,16(%rsi)
1476 leaq 32(%rsi),%rsi
1477 jmp L$xts_enc_done
1479 .p2align 4
1480 L$xts_enc_three:
1481 movups (%rdi),%xmm2
1482 movups 16(%rdi),%xmm3
1483 movups 32(%rdi),%xmm4
1484 leaq 48(%rdi),%rdi
1485 xorps %xmm10,%xmm2
1486 xorps %xmm11,%xmm3
1487 xorps %xmm12,%xmm4
1489 call _aesni_encrypt3
1491 xorps %xmm10,%xmm2
1492 movdqa %xmm13,%xmm10
1493 xorps %xmm11,%xmm3
1494 xorps %xmm12,%xmm4
1495 movups %xmm2,(%rsi)
1496 movups %xmm3,16(%rsi)
1497 movups %xmm4,32(%rsi)
1498 leaq 48(%rsi),%rsi
1499 jmp L$xts_enc_done
1501 .p2align 4
1502 L$xts_enc_four:
1503 movups (%rdi),%xmm2
1504 movups 16(%rdi),%xmm3
1505 movups 32(%rdi),%xmm4
1506 xorps %xmm10,%xmm2
1507 movups 48(%rdi),%xmm5
1508 leaq 64(%rdi),%rdi
1509 xorps %xmm11,%xmm3
1510 xorps %xmm12,%xmm4
1511 xorps %xmm13,%xmm5
1513 call _aesni_encrypt4
1515 xorps %xmm10,%xmm2
1516 movdqa %xmm15,%xmm10
1517 xorps %xmm11,%xmm3
1518 xorps %xmm12,%xmm4
1519 movups %xmm2,(%rsi)
1520 xorps %xmm13,%xmm5
1521 movups %xmm3,16(%rsi)
1522 movups %xmm4,32(%rsi)
1523 movups %xmm5,48(%rsi)
1524 leaq 64(%rsi),%rsi
1525 jmp L$xts_enc_done
1527 .p2align 4
1528 L$xts_enc_done:
1529 andq $15,%r9
1530 jz L$xts_enc_ret
1531 movq %r9,%rdx
1533 L$xts_enc_steal:
1534 movzbl (%rdi),%eax
1535 movzbl -16(%rsi),%ecx
1536 leaq 1(%rdi),%rdi
1537 movb %al,-16(%rsi)
1538 movb %cl,0(%rsi)
1539 leaq 1(%rsi),%rsi
1540 subq $1,%rdx
1541 jnz L$xts_enc_steal
1543 subq %r9,%rsi
1544 movq %r11,%rcx
1545 movl %r10d,%eax
1547 movups -16(%rsi),%xmm2
1548 xorps %xmm10,%xmm2
1549 movups (%rcx),%xmm0
1550 movups 16(%rcx),%xmm1
1551 leaq 32(%rcx),%rcx
1552 xorps %xmm0,%xmm2
1553 L$oop_enc1_10:
1554 .byte 102,15,56,220,209
1555 decl %eax
1556 movups (%rcx),%xmm1
1557 leaq 16(%rcx),%rcx
1558 jnz L$oop_enc1_10
1559 .byte 102,15,56,221,209
1560 xorps %xmm10,%xmm2
1561 movups %xmm2,-16(%rsi)
1563 L$xts_enc_ret:
1564 leaq 104(%rsp),%rsp
1565 L$xts_enc_epilogue:
1566 .byte 0xf3,0xc3
1568 .globl _aesni_xts_decrypt
1570 .p2align 4
1571 _aesni_xts_decrypt:
1572 leaq -104(%rsp),%rsp
1573 movups (%r9),%xmm15
1574 movl 240(%r8),%eax
1575 movl 240(%rcx),%r10d
1576 movups (%r8),%xmm0
1577 movups 16(%r8),%xmm1
1578 leaq 32(%r8),%r8
1579 xorps %xmm0,%xmm15
1580 L$oop_enc1_11:
1581 .byte 102,68,15,56,220,249
1582 decl %eax
1583 movups (%r8),%xmm1
1584 leaq 16(%r8),%r8
1585 jnz L$oop_enc1_11
1586 .byte 102,68,15,56,221,249
1587 xorl %eax,%eax
1588 testq $15,%rdx
1589 setnz %al
1590 shlq $4,%rax
1591 subq %rax,%rdx
1593 movq %rcx,%r11
1594 movl %r10d,%eax
1595 movq %rdx,%r9
1596 andq $-16,%rdx
1598 movdqa L$xts_magic(%rip),%xmm8
1599 pxor %xmm14,%xmm14
1600 pcmpgtd %xmm15,%xmm14
1601 pshufd $19,%xmm14,%xmm9
1602 pxor %xmm14,%xmm14
1603 movdqa %xmm15,%xmm10
1604 paddq %xmm15,%xmm15
1605 pand %xmm8,%xmm9
1606 pcmpgtd %xmm15,%xmm14
1607 pxor %xmm9,%xmm15
1608 pshufd $19,%xmm14,%xmm9
1609 pxor %xmm14,%xmm14
1610 movdqa %xmm15,%xmm11
1611 paddq %xmm15,%xmm15
1612 pand %xmm8,%xmm9
1613 pcmpgtd %xmm15,%xmm14
1614 pxor %xmm9,%xmm15
1615 pshufd $19,%xmm14,%xmm9
1616 pxor %xmm14,%xmm14
1617 movdqa %xmm15,%xmm12
1618 paddq %xmm15,%xmm15
1619 pand %xmm8,%xmm9
1620 pcmpgtd %xmm15,%xmm14
1621 pxor %xmm9,%xmm15
1622 pshufd $19,%xmm14,%xmm9
1623 pxor %xmm14,%xmm14
1624 movdqa %xmm15,%xmm13
1625 paddq %xmm15,%xmm15
1626 pand %xmm8,%xmm9
1627 pcmpgtd %xmm15,%xmm14
1628 pxor %xmm9,%xmm15
1629 subq $96,%rdx
1630 jc L$xts_dec_short
1632 shrl $1,%eax
1633 subl $1,%eax
1634 movl %eax,%r10d
1635 jmp L$xts_dec_grandloop
1637 .p2align 4
1638 L$xts_dec_grandloop:
1639 pshufd $19,%xmm14,%xmm9
1640 movdqa %xmm15,%xmm14
1641 paddq %xmm15,%xmm15
1642 movdqu 0(%rdi),%xmm2
1643 pand %xmm8,%xmm9
1644 movdqu 16(%rdi),%xmm3
1645 pxor %xmm9,%xmm15
1647 movdqu 32(%rdi),%xmm4
1648 pxor %xmm10,%xmm2
1649 movdqu 48(%rdi),%xmm5
1650 pxor %xmm11,%xmm3
1651 movdqu 64(%rdi),%xmm6
1652 pxor %xmm12,%xmm4
1653 movdqu 80(%rdi),%xmm7
1654 leaq 96(%rdi),%rdi
1655 pxor %xmm13,%xmm5
1656 movups (%r11),%xmm0
1657 pxor %xmm14,%xmm6
1658 pxor %xmm15,%xmm7
1662 movups 16(%r11),%xmm1
1663 pxor %xmm0,%xmm2
1664 pxor %xmm0,%xmm3
1665 movdqa %xmm10,0(%rsp)
1666 .byte 102,15,56,222,209
1667 leaq 32(%r11),%rcx
1668 pxor %xmm0,%xmm4
1669 movdqa %xmm11,16(%rsp)
1670 .byte 102,15,56,222,217
1671 pxor %xmm0,%xmm5
1672 movdqa %xmm12,32(%rsp)
1673 .byte 102,15,56,222,225
1674 pxor %xmm0,%xmm6
1675 movdqa %xmm13,48(%rsp)
1676 .byte 102,15,56,222,233
1677 pxor %xmm0,%xmm7
1678 movups (%rcx),%xmm0
1679 decl %eax
1680 movdqa %xmm14,64(%rsp)
1681 .byte 102,15,56,222,241
1682 movdqa %xmm15,80(%rsp)
1683 .byte 102,15,56,222,249
1684 pxor %xmm14,%xmm14
1685 pcmpgtd %xmm15,%xmm14
1686 jmp L$xts_dec_loop6_enter
1688 .p2align 4
1689 L$xts_dec_loop6:
1690 .byte 102,15,56,222,209
1691 .byte 102,15,56,222,217
1692 decl %eax
1693 .byte 102,15,56,222,225
1694 .byte 102,15,56,222,233
1695 .byte 102,15,56,222,241
1696 .byte 102,15,56,222,249
1697 L$xts_dec_loop6_enter:
1698 movups 16(%rcx),%xmm1
1699 .byte 102,15,56,222,208
1700 .byte 102,15,56,222,216
1701 leaq 32(%rcx),%rcx
1702 .byte 102,15,56,222,224
1703 .byte 102,15,56,222,232
1704 .byte 102,15,56,222,240
1705 .byte 102,15,56,222,248
1706 movups (%rcx),%xmm0
1707 jnz L$xts_dec_loop6
1709 pshufd $19,%xmm14,%xmm9
1710 pxor %xmm14,%xmm14
1711 paddq %xmm15,%xmm15
1712 .byte 102,15,56,222,209
1713 pand %xmm8,%xmm9
1714 .byte 102,15,56,222,217
1715 pcmpgtd %xmm15,%xmm14
1716 .byte 102,15,56,222,225
1717 pxor %xmm9,%xmm15
1718 .byte 102,15,56,222,233
1719 .byte 102,15,56,222,241
1720 .byte 102,15,56,222,249
1721 movups 16(%rcx),%xmm1
1723 pshufd $19,%xmm14,%xmm9
1724 pxor %xmm14,%xmm14
1725 movdqa %xmm15,%xmm10
1726 paddq %xmm15,%xmm15
1727 .byte 102,15,56,222,208
1728 pand %xmm8,%xmm9
1729 .byte 102,15,56,222,216
1730 pcmpgtd %xmm15,%xmm14
1731 .byte 102,15,56,222,224
1732 pxor %xmm9,%xmm15
1733 .byte 102,15,56,222,232
1734 .byte 102,15,56,222,240
1735 .byte 102,15,56,222,248
1736 movups 32(%rcx),%xmm0
1738 pshufd $19,%xmm14,%xmm9
1739 pxor %xmm14,%xmm14
1740 movdqa %xmm15,%xmm11
1741 paddq %xmm15,%xmm15
1742 .byte 102,15,56,222,209
1743 pand %xmm8,%xmm9
1744 .byte 102,15,56,222,217
1745 pcmpgtd %xmm15,%xmm14
1746 .byte 102,15,56,222,225
1747 pxor %xmm9,%xmm15
1748 .byte 102,15,56,222,233
1749 .byte 102,15,56,222,241
1750 .byte 102,15,56,222,249
1752 pshufd $19,%xmm14,%xmm9
1753 pxor %xmm14,%xmm14
1754 movdqa %xmm15,%xmm12
1755 paddq %xmm15,%xmm15
1756 .byte 102,15,56,223,208
1757 pand %xmm8,%xmm9
1758 .byte 102,15,56,223,216
1759 pcmpgtd %xmm15,%xmm14
1760 .byte 102,15,56,223,224
1761 pxor %xmm9,%xmm15
1762 .byte 102,15,56,223,232
1763 .byte 102,15,56,223,240
1764 .byte 102,15,56,223,248
1766 pshufd $19,%xmm14,%xmm9
1767 pxor %xmm14,%xmm14
1768 movdqa %xmm15,%xmm13
1769 paddq %xmm15,%xmm15
1770 xorps 0(%rsp),%xmm2
1771 pand %xmm8,%xmm9
1772 xorps 16(%rsp),%xmm3
1773 pcmpgtd %xmm15,%xmm14
1774 pxor %xmm9,%xmm15
1776 xorps 32(%rsp),%xmm4
1777 movups %xmm2,0(%rsi)
1778 xorps 48(%rsp),%xmm5
1779 movups %xmm3,16(%rsi)
1780 xorps 64(%rsp),%xmm6
1781 movups %xmm4,32(%rsi)
1782 xorps 80(%rsp),%xmm7
1783 movups %xmm5,48(%rsi)
1784 movl %r10d,%eax
1785 movups %xmm6,64(%rsi)
1786 movups %xmm7,80(%rsi)
1787 leaq 96(%rsi),%rsi
1788 subq $96,%rdx
1789 jnc L$xts_dec_grandloop
1791 leal 3(%rax,%rax,1),%eax
1792 movq %r11,%rcx
1793 movl %eax,%r10d
1795 L$xts_dec_short:
1796 addq $96,%rdx
1797 jz L$xts_dec_done
1799 cmpq $32,%rdx
1800 jb L$xts_dec_one
1801 je L$xts_dec_two
1803 cmpq $64,%rdx
1804 jb L$xts_dec_three
1805 je L$xts_dec_four
1807 pshufd $19,%xmm14,%xmm9
1808 movdqa %xmm15,%xmm14
1809 paddq %xmm15,%xmm15
1810 movdqu (%rdi),%xmm2
1811 pand %xmm8,%xmm9
1812 movdqu 16(%rdi),%xmm3
1813 pxor %xmm9,%xmm15
1815 movdqu 32(%rdi),%xmm4
1816 pxor %xmm10,%xmm2
1817 movdqu 48(%rdi),%xmm5
1818 pxor %xmm11,%xmm3
1819 movdqu 64(%rdi),%xmm6
1820 leaq 80(%rdi),%rdi
1821 pxor %xmm12,%xmm4
1822 pxor %xmm13,%xmm5
1823 pxor %xmm14,%xmm6
1825 call _aesni_decrypt6
1827 xorps %xmm10,%xmm2
1828 xorps %xmm11,%xmm3
1829 xorps %xmm12,%xmm4
1830 movdqu %xmm2,(%rsi)
1831 xorps %xmm13,%xmm5
1832 movdqu %xmm3,16(%rsi)
1833 xorps %xmm14,%xmm6
1834 movdqu %xmm4,32(%rsi)
1835 pxor %xmm14,%xmm14
1836 movdqu %xmm5,48(%rsi)
1837 pcmpgtd %xmm15,%xmm14
1838 movdqu %xmm6,64(%rsi)
1839 leaq 80(%rsi),%rsi
1840 pshufd $19,%xmm14,%xmm11
1841 andq $15,%r9
1842 jz L$xts_dec_ret
1844 movdqa %xmm15,%xmm10
1845 paddq %xmm15,%xmm15
1846 pand %xmm8,%xmm11
1847 pxor %xmm15,%xmm11
1848 jmp L$xts_dec_done2
1850 .p2align 4
1851 L$xts_dec_one:
1852 movups (%rdi),%xmm2
1853 leaq 16(%rdi),%rdi
1854 xorps %xmm10,%xmm2
1855 movups (%rcx),%xmm0
1856 movups 16(%rcx),%xmm1
1857 leaq 32(%rcx),%rcx
1858 xorps %xmm0,%xmm2
1859 L$oop_dec1_12:
1860 .byte 102,15,56,222,209
1861 decl %eax
1862 movups (%rcx),%xmm1
1863 leaq 16(%rcx),%rcx
1864 jnz L$oop_dec1_12
1865 .byte 102,15,56,223,209
1866 xorps %xmm10,%xmm2
1867 movdqa %xmm11,%xmm10
1868 movups %xmm2,(%rsi)
1869 movdqa %xmm12,%xmm11
1870 leaq 16(%rsi),%rsi
1871 jmp L$xts_dec_done
1873 .p2align 4
1874 L$xts_dec_two:
1875 movups (%rdi),%xmm2
1876 movups 16(%rdi),%xmm3
1877 leaq 32(%rdi),%rdi
1878 xorps %xmm10,%xmm2
1879 xorps %xmm11,%xmm3
1881 call _aesni_decrypt3
1883 xorps %xmm10,%xmm2
1884 movdqa %xmm12,%xmm10
1885 xorps %xmm11,%xmm3
1886 movdqa %xmm13,%xmm11
1887 movups %xmm2,(%rsi)
1888 movups %xmm3,16(%rsi)
1889 leaq 32(%rsi),%rsi
1890 jmp L$xts_dec_done
1892 .p2align 4
1893 L$xts_dec_three:
1894 movups (%rdi),%xmm2
1895 movups 16(%rdi),%xmm3
1896 movups 32(%rdi),%xmm4
1897 leaq 48(%rdi),%rdi
1898 xorps %xmm10,%xmm2
1899 xorps %xmm11,%xmm3
1900 xorps %xmm12,%xmm4
1902 call _aesni_decrypt3
1904 xorps %xmm10,%xmm2
1905 movdqa %xmm13,%xmm10
1906 xorps %xmm11,%xmm3
1907 movdqa %xmm15,%xmm11
1908 xorps %xmm12,%xmm4
1909 movups %xmm2,(%rsi)
1910 movups %xmm3,16(%rsi)
1911 movups %xmm4,32(%rsi)
1912 leaq 48(%rsi),%rsi
1913 jmp L$xts_dec_done
1915 .p2align 4
1916 L$xts_dec_four:
1917 pshufd $19,%xmm14,%xmm9
1918 movdqa %xmm15,%xmm14
1919 paddq %xmm15,%xmm15
1920 movups (%rdi),%xmm2
1921 pand %xmm8,%xmm9
1922 movups 16(%rdi),%xmm3
1923 pxor %xmm9,%xmm15
1925 movups 32(%rdi),%xmm4
1926 xorps %xmm10,%xmm2
1927 movups 48(%rdi),%xmm5
1928 leaq 64(%rdi),%rdi
1929 xorps %xmm11,%xmm3
1930 xorps %xmm12,%xmm4
1931 xorps %xmm13,%xmm5
1933 call _aesni_decrypt4
1935 xorps %xmm10,%xmm2
1936 movdqa %xmm14,%xmm10
1937 xorps %xmm11,%xmm3
1938 movdqa %xmm15,%xmm11
1939 xorps %xmm12,%xmm4
1940 movups %xmm2,(%rsi)
1941 xorps %xmm13,%xmm5
1942 movups %xmm3,16(%rsi)
1943 movups %xmm4,32(%rsi)
1944 movups %xmm5,48(%rsi)
1945 leaq 64(%rsi),%rsi
1946 jmp L$xts_dec_done
1948 .p2align 4
1949 L$xts_dec_done:
1950 andq $15,%r9
1951 jz L$xts_dec_ret
1952 L$xts_dec_done2:
1953 movq %r9,%rdx
1954 movq %r11,%rcx
1955 movl %r10d,%eax
1957 movups (%rdi),%xmm2
1958 xorps %xmm11,%xmm2
1959 movups (%rcx),%xmm0
1960 movups 16(%rcx),%xmm1
1961 leaq 32(%rcx),%rcx
1962 xorps %xmm0,%xmm2
1963 L$oop_dec1_13:
1964 .byte 102,15,56,222,209
1965 decl %eax
1966 movups (%rcx),%xmm1
1967 leaq 16(%rcx),%rcx
1968 jnz L$oop_dec1_13
1969 .byte 102,15,56,223,209
1970 xorps %xmm11,%xmm2
1971 movups %xmm2,(%rsi)
1973 L$xts_dec_steal:
1974 movzbl 16(%rdi),%eax
1975 movzbl (%rsi),%ecx
1976 leaq 1(%rdi),%rdi
1977 movb %al,(%rsi)
1978 movb %cl,16(%rsi)
1979 leaq 1(%rsi),%rsi
1980 subq $1,%rdx
1981 jnz L$xts_dec_steal
1983 subq %r9,%rsi
1984 movq %r11,%rcx
1985 movl %r10d,%eax
1987 movups (%rsi),%xmm2
1988 xorps %xmm10,%xmm2
1989 movups (%rcx),%xmm0
1990 movups 16(%rcx),%xmm1
1991 leaq 32(%rcx),%rcx
1992 xorps %xmm0,%xmm2
1993 L$oop_dec1_14:
1994 .byte 102,15,56,222,209
1995 decl %eax
1996 movups (%rcx),%xmm1
1997 leaq 16(%rcx),%rcx
1998 jnz L$oop_dec1_14
1999 .byte 102,15,56,223,209
2000 xorps %xmm10,%xmm2
2001 movups %xmm2,(%rsi)
2003 L$xts_dec_ret:
2004 leaq 104(%rsp),%rsp
2005 L$xts_dec_epilogue:
2006 .byte 0xf3,0xc3
2008 .globl _aesni_cbc_encrypt
2010 .p2align 4
2011 _aesni_cbc_encrypt:
2012 testq %rdx,%rdx
2013 jz L$cbc_ret
2015 movl 240(%rcx),%r10d
2016 movq %rcx,%r11
2017 testl %r9d,%r9d
2018 jz L$cbc_decrypt
2020 movups (%r8),%xmm2
2021 movl %r10d,%eax
2022 cmpq $16,%rdx
2023 jb L$cbc_enc_tail
2024 subq $16,%rdx
2025 jmp L$cbc_enc_loop
2026 .p2align 4
2027 L$cbc_enc_loop:
2028 movups (%rdi),%xmm3
2029 leaq 16(%rdi),%rdi
2031 movups (%rcx),%xmm0
2032 movups 16(%rcx),%xmm1
2033 xorps %xmm0,%xmm3
2034 leaq 32(%rcx),%rcx
2035 xorps %xmm3,%xmm2
2036 L$oop_enc1_15:
2037 .byte 102,15,56,220,209
2038 decl %eax
2039 movups (%rcx),%xmm1
2040 leaq 16(%rcx),%rcx
2041 jnz L$oop_enc1_15
2042 .byte 102,15,56,221,209
2043 movl %r10d,%eax
2044 movq %r11,%rcx
2045 movups %xmm2,0(%rsi)
2046 leaq 16(%rsi),%rsi
2047 subq $16,%rdx
2048 jnc L$cbc_enc_loop
2049 addq $16,%rdx
2050 jnz L$cbc_enc_tail
2051 movups %xmm2,(%r8)
2052 jmp L$cbc_ret
2054 L$cbc_enc_tail:
2055 movq %rdx,%rcx
2056 xchgq %rdi,%rsi
2057 .long 0x9066A4F3
2058 movl $16,%ecx
2059 subq %rdx,%rcx
2060 xorl %eax,%eax
2061 .long 0x9066AAF3
2062 leaq -16(%rdi),%rdi
2063 movl %r10d,%eax
2064 movq %rdi,%rsi
2065 movq %r11,%rcx
2066 xorq %rdx,%rdx
2067 jmp L$cbc_enc_loop
2069 .p2align 4
2070 L$cbc_decrypt:
2071 movups (%r8),%xmm9
2072 movl %r10d,%eax
2073 cmpq $112,%rdx
2074 jbe L$cbc_dec_tail
2075 shrl $1,%r10d
2076 subq $112,%rdx
2077 movl %r10d,%eax
2078 movaps %xmm9,-24(%rsp)
2079 jmp L$cbc_dec_loop8_enter
2080 .p2align 4
2081 L$cbc_dec_loop8:
2082 movaps %xmm0,-24(%rsp)
2083 movups %xmm9,(%rsi)
2084 leaq 16(%rsi),%rsi
2085 L$cbc_dec_loop8_enter:
2086 movups (%rcx),%xmm0
2087 movups (%rdi),%xmm2
2088 movups 16(%rdi),%xmm3
2089 movups 16(%rcx),%xmm1
2091 leaq 32(%rcx),%rcx
2092 movdqu 32(%rdi),%xmm4
2093 xorps %xmm0,%xmm2
2094 movdqu 48(%rdi),%xmm5
2095 xorps %xmm0,%xmm3
2096 movdqu 64(%rdi),%xmm6
2097 .byte 102,15,56,222,209
2098 pxor %xmm0,%xmm4
2099 movdqu 80(%rdi),%xmm7
2100 .byte 102,15,56,222,217
2101 pxor %xmm0,%xmm5
2102 movdqu 96(%rdi),%xmm8
2103 .byte 102,15,56,222,225
2104 pxor %xmm0,%xmm6
2105 movdqu 112(%rdi),%xmm9
2106 .byte 102,15,56,222,233
2107 pxor %xmm0,%xmm7
2108 decl %eax
2109 .byte 102,15,56,222,241
2110 pxor %xmm0,%xmm8
2111 .byte 102,15,56,222,249
2112 pxor %xmm0,%xmm9
2113 movups (%rcx),%xmm0
2114 .byte 102,68,15,56,222,193
2115 .byte 102,68,15,56,222,201
2116 movups 16(%rcx),%xmm1
2118 call L$dec_loop8_enter
2120 movups (%rdi),%xmm1
2121 movups 16(%rdi),%xmm0
2122 xorps -24(%rsp),%xmm2
2123 xorps %xmm1,%xmm3
2124 movups 32(%rdi),%xmm1
2125 xorps %xmm0,%xmm4
2126 movups 48(%rdi),%xmm0
2127 xorps %xmm1,%xmm5
2128 movups 64(%rdi),%xmm1
2129 xorps %xmm0,%xmm6
2130 movups 80(%rdi),%xmm0
2131 xorps %xmm1,%xmm7
2132 movups 96(%rdi),%xmm1
2133 xorps %xmm0,%xmm8
2134 movups 112(%rdi),%xmm0
2135 xorps %xmm1,%xmm9
2136 movups %xmm2,(%rsi)
2137 movups %xmm3,16(%rsi)
2138 movups %xmm4,32(%rsi)
2139 movups %xmm5,48(%rsi)
2140 movl %r10d,%eax
2141 movups %xmm6,64(%rsi)
2142 movq %r11,%rcx
2143 movups %xmm7,80(%rsi)
2144 leaq 128(%rdi),%rdi
2145 movups %xmm8,96(%rsi)
2146 leaq 112(%rsi),%rsi
2147 subq $128,%rdx
2148 ja L$cbc_dec_loop8
2150 movaps %xmm9,%xmm2
2151 movaps %xmm0,%xmm9
2152 addq $112,%rdx
2153 jle L$cbc_dec_tail_collected
2154 movups %xmm2,(%rsi)
2155 leal 1(%r10,%r10,1),%eax
2156 leaq 16(%rsi),%rsi
2157 L$cbc_dec_tail:
2158 movups (%rdi),%xmm2
2159 movaps %xmm2,%xmm8
2160 cmpq $16,%rdx
2161 jbe L$cbc_dec_one
2163 movups 16(%rdi),%xmm3
2164 movaps %xmm3,%xmm7
2165 cmpq $32,%rdx
2166 jbe L$cbc_dec_two
2168 movups 32(%rdi),%xmm4
2169 movaps %xmm4,%xmm6
2170 cmpq $48,%rdx
2171 jbe L$cbc_dec_three
2173 movups 48(%rdi),%xmm5
2174 cmpq $64,%rdx
2175 jbe L$cbc_dec_four
2177 movups 64(%rdi),%xmm6
2178 cmpq $80,%rdx
2179 jbe L$cbc_dec_five
2181 movups 80(%rdi),%xmm7
2182 cmpq $96,%rdx
2183 jbe L$cbc_dec_six
2185 movups 96(%rdi),%xmm8
2186 movaps %xmm9,-24(%rsp)
2187 call _aesni_decrypt8
2188 movups (%rdi),%xmm1
2189 movups 16(%rdi),%xmm0
2190 xorps -24(%rsp),%xmm2
2191 xorps %xmm1,%xmm3
2192 movups 32(%rdi),%xmm1
2193 xorps %xmm0,%xmm4
2194 movups 48(%rdi),%xmm0
2195 xorps %xmm1,%xmm5
2196 movups 64(%rdi),%xmm1
2197 xorps %xmm0,%xmm6
2198 movups 80(%rdi),%xmm0
2199 xorps %xmm1,%xmm7
2200 movups 96(%rdi),%xmm9
2201 xorps %xmm0,%xmm8
2202 movups %xmm2,(%rsi)
2203 movups %xmm3,16(%rsi)
2204 movups %xmm4,32(%rsi)
2205 movups %xmm5,48(%rsi)
2206 movups %xmm6,64(%rsi)
2207 movups %xmm7,80(%rsi)
2208 leaq 96(%rsi),%rsi
2209 movaps %xmm8,%xmm2
2210 subq $112,%rdx
2211 jmp L$cbc_dec_tail_collected
2212 .p2align 4
2213 L$cbc_dec_one:
2214 movups (%rcx),%xmm0
2215 movups 16(%rcx),%xmm1
2216 leaq 32(%rcx),%rcx
2217 xorps %xmm0,%xmm2
2218 L$oop_dec1_16:
2219 .byte 102,15,56,222,209
2220 decl %eax
2221 movups (%rcx),%xmm1
2222 leaq 16(%rcx),%rcx
2223 jnz L$oop_dec1_16
2224 .byte 102,15,56,223,209
2225 xorps %xmm9,%xmm2
2226 movaps %xmm8,%xmm9
2227 subq $16,%rdx
2228 jmp L$cbc_dec_tail_collected
2229 .p2align 4
2230 L$cbc_dec_two:
2231 xorps %xmm4,%xmm4
2232 call _aesni_decrypt3
2233 xorps %xmm9,%xmm2
2234 xorps %xmm8,%xmm3
2235 movups %xmm2,(%rsi)
2236 movaps %xmm7,%xmm9
2237 movaps %xmm3,%xmm2
2238 leaq 16(%rsi),%rsi
2239 subq $32,%rdx
2240 jmp L$cbc_dec_tail_collected
2241 .p2align 4
2242 L$cbc_dec_three:
2243 call _aesni_decrypt3
2244 xorps %xmm9,%xmm2
2245 xorps %xmm8,%xmm3
2246 movups %xmm2,(%rsi)
2247 xorps %xmm7,%xmm4
2248 movups %xmm3,16(%rsi)
2249 movaps %xmm6,%xmm9
2250 movaps %xmm4,%xmm2
2251 leaq 32(%rsi),%rsi
2252 subq $48,%rdx
2253 jmp L$cbc_dec_tail_collected
2254 .p2align 4
2255 L$cbc_dec_four:
2256 call _aesni_decrypt4
2257 xorps %xmm9,%xmm2
2258 movups 48(%rdi),%xmm9
2259 xorps %xmm8,%xmm3
2260 movups %xmm2,(%rsi)
2261 xorps %xmm7,%xmm4
2262 movups %xmm3,16(%rsi)
2263 xorps %xmm6,%xmm5
2264 movups %xmm4,32(%rsi)
2265 movaps %xmm5,%xmm2
2266 leaq 48(%rsi),%rsi
2267 subq $64,%rdx
2268 jmp L$cbc_dec_tail_collected
2269 .p2align 4
2270 L$cbc_dec_five:
2271 xorps %xmm7,%xmm7
2272 call _aesni_decrypt6
2273 movups 16(%rdi),%xmm1
2274 movups 32(%rdi),%xmm0
2275 xorps %xmm9,%xmm2
2276 xorps %xmm8,%xmm3
2277 xorps %xmm1,%xmm4
2278 movups 48(%rdi),%xmm1
2279 xorps %xmm0,%xmm5
2280 movups 64(%rdi),%xmm9
2281 xorps %xmm1,%xmm6
2282 movups %xmm2,(%rsi)
2283 movups %xmm3,16(%rsi)
2284 movups %xmm4,32(%rsi)
2285 movups %xmm5,48(%rsi)
2286 leaq 64(%rsi),%rsi
2287 movaps %xmm6,%xmm2
2288 subq $80,%rdx
2289 jmp L$cbc_dec_tail_collected
2290 .p2align 4
2291 L$cbc_dec_six:
2292 call _aesni_decrypt6
2293 movups 16(%rdi),%xmm1
2294 movups 32(%rdi),%xmm0
2295 xorps %xmm9,%xmm2
2296 xorps %xmm8,%xmm3
2297 xorps %xmm1,%xmm4
2298 movups 48(%rdi),%xmm1
2299 xorps %xmm0,%xmm5
2300 movups 64(%rdi),%xmm0
2301 xorps %xmm1,%xmm6
2302 movups 80(%rdi),%xmm9
2303 xorps %xmm0,%xmm7
2304 movups %xmm2,(%rsi)
2305 movups %xmm3,16(%rsi)
2306 movups %xmm4,32(%rsi)
2307 movups %xmm5,48(%rsi)
2308 movups %xmm6,64(%rsi)
2309 leaq 80(%rsi),%rsi
2310 movaps %xmm7,%xmm2
2311 subq $96,%rdx
2312 jmp L$cbc_dec_tail_collected
2313 .p2align 4
2314 L$cbc_dec_tail_collected:
2315 andq $15,%rdx
2316 movups %xmm9,(%r8)
2317 jnz L$cbc_dec_tail_partial
2318 movups %xmm2,(%rsi)
2319 jmp L$cbc_dec_ret
2320 .p2align 4
2321 L$cbc_dec_tail_partial:
2322 movaps %xmm2,-24(%rsp)
2323 movq $16,%rcx
2324 movq %rsi,%rdi
2325 subq %rdx,%rcx
2326 leaq -24(%rsp),%rsi
2327 .long 0x9066A4F3
2329 L$cbc_dec_ret:
2330 L$cbc_ret:
2331 .byte 0xf3,0xc3
2333 .globl _aesni_set_decrypt_key
2335 .p2align 4
2336 _aesni_set_decrypt_key:
2337 .byte 0x48,0x83,0xEC,0x08
2338 call __aesni_set_encrypt_key
2339 shll $4,%esi
2340 testl %eax,%eax
2341 jnz L$dec_key_ret
2342 leaq 16(%rdx,%rsi,1),%rdi
2344 movups (%rdx),%xmm0
2345 movups (%rdi),%xmm1
2346 movups %xmm0,(%rdi)
2347 movups %xmm1,(%rdx)
2348 leaq 16(%rdx),%rdx
2349 leaq -16(%rdi),%rdi
2351 L$dec_key_inverse:
2352 movups (%rdx),%xmm0
2353 movups (%rdi),%xmm1
2354 .byte 102,15,56,219,192
2355 .byte 102,15,56,219,201
2356 leaq 16(%rdx),%rdx
2357 leaq -16(%rdi),%rdi
2358 movups %xmm0,16(%rdi)
2359 movups %xmm1,-16(%rdx)
2360 cmpq %rdx,%rdi
2361 ja L$dec_key_inverse
2363 movups (%rdx),%xmm0
2364 .byte 102,15,56,219,192
2365 movups %xmm0,(%rdi)
2366 L$dec_key_ret:
2367 addq $8,%rsp
2368 .byte 0xf3,0xc3
2369 L$SEH_end_set_decrypt_key:
2371 .globl _aesni_set_encrypt_key
2373 .p2align 4
2374 _aesni_set_encrypt_key:
2375 __aesni_set_encrypt_key:
2376 .byte 0x48,0x83,0xEC,0x08
2377 movq $-1,%rax
2378 testq %rdi,%rdi
2379 jz L$enc_key_ret
2380 testq %rdx,%rdx
2381 jz L$enc_key_ret
2383 movups (%rdi),%xmm0
2384 xorps %xmm4,%xmm4
2385 leaq 16(%rdx),%rax
2386 cmpl $256,%esi
2387 je L$14rounds
2388 cmpl $192,%esi
2389 je L$12rounds
2390 cmpl $128,%esi
2391 jne L$bad_keybits
2393 L$10rounds:
2394 movl $9,%esi
2395 movups %xmm0,(%rdx)
2396 .byte 102,15,58,223,200,1
2397 call L$key_expansion_128_cold
2398 .byte 102,15,58,223,200,2
2399 call L$key_expansion_128
2400 .byte 102,15,58,223,200,4
2401 call L$key_expansion_128
2402 .byte 102,15,58,223,200,8
2403 call L$key_expansion_128
2404 .byte 102,15,58,223,200,16
2405 call L$key_expansion_128
2406 .byte 102,15,58,223,200,32
2407 call L$key_expansion_128
2408 .byte 102,15,58,223,200,64
2409 call L$key_expansion_128
2410 .byte 102,15,58,223,200,128
2411 call L$key_expansion_128
2412 .byte 102,15,58,223,200,27
2413 call L$key_expansion_128
2414 .byte 102,15,58,223,200,54
2415 call L$key_expansion_128
2416 movups %xmm0,(%rax)
2417 movl %esi,80(%rax)
2418 xorl %eax,%eax
2419 jmp L$enc_key_ret
2421 .p2align 4
2422 L$12rounds:
2423 movq 16(%rdi),%xmm2
2424 movl $11,%esi
2425 movups %xmm0,(%rdx)
2426 .byte 102,15,58,223,202,1
2427 call L$key_expansion_192a_cold
2428 .byte 102,15,58,223,202,2
2429 call L$key_expansion_192b
2430 .byte 102,15,58,223,202,4
2431 call L$key_expansion_192a
2432 .byte 102,15,58,223,202,8
2433 call L$key_expansion_192b
2434 .byte 102,15,58,223,202,16
2435 call L$key_expansion_192a
2436 .byte 102,15,58,223,202,32
2437 call L$key_expansion_192b
2438 .byte 102,15,58,223,202,64
2439 call L$key_expansion_192a
2440 .byte 102,15,58,223,202,128
2441 call L$key_expansion_192b
2442 movups %xmm0,(%rax)
2443 movl %esi,48(%rax)
2444 xorq %rax,%rax
2445 jmp L$enc_key_ret
2447 .p2align 4
2448 L$14rounds:
2449 movups 16(%rdi),%xmm2
2450 movl $13,%esi
2451 leaq 16(%rax),%rax
2452 movups %xmm0,(%rdx)
2453 movups %xmm2,16(%rdx)
2454 .byte 102,15,58,223,202,1
2455 call L$key_expansion_256a_cold
2456 .byte 102,15,58,223,200,1
2457 call L$key_expansion_256b
2458 .byte 102,15,58,223,202,2
2459 call L$key_expansion_256a
2460 .byte 102,15,58,223,200,2
2461 call L$key_expansion_256b
2462 .byte 102,15,58,223,202,4
2463 call L$key_expansion_256a
2464 .byte 102,15,58,223,200,4
2465 call L$key_expansion_256b
2466 .byte 102,15,58,223,202,8
2467 call L$key_expansion_256a
2468 .byte 102,15,58,223,200,8
2469 call L$key_expansion_256b
2470 .byte 102,15,58,223,202,16
2471 call L$key_expansion_256a
2472 .byte 102,15,58,223,200,16
2473 call L$key_expansion_256b
2474 .byte 102,15,58,223,202,32
2475 call L$key_expansion_256a
2476 .byte 102,15,58,223,200,32
2477 call L$key_expansion_256b
2478 .byte 102,15,58,223,202,64
2479 call L$key_expansion_256a
2480 movups %xmm0,(%rax)
2481 movl %esi,16(%rax)
2482 xorq %rax,%rax
2483 jmp L$enc_key_ret
2485 .p2align 4
2486 L$bad_keybits:
2487 movq $-2,%rax
2488 L$enc_key_ret:
2489 addq $8,%rsp
2490 .byte 0xf3,0xc3
2491 L$SEH_end_set_encrypt_key:
2493 .p2align 4
2494 L$key_expansion_128:
2495 movups %xmm0,(%rax)
2496 leaq 16(%rax),%rax
2497 L$key_expansion_128_cold:
2498 shufps $16,%xmm0,%xmm4
2499 xorps %xmm4,%xmm0
2500 shufps $140,%xmm0,%xmm4
2501 xorps %xmm4,%xmm0
2502 shufps $255,%xmm1,%xmm1
2503 xorps %xmm1,%xmm0
2504 .byte 0xf3,0xc3
2506 .p2align 4
2507 L$key_expansion_192a:
2508 movups %xmm0,(%rax)
2509 leaq 16(%rax),%rax
2510 L$key_expansion_192a_cold:
2511 movaps %xmm2,%xmm5
2512 L$key_expansion_192b_warm:
2513 shufps $16,%xmm0,%xmm4
2514 movdqa %xmm2,%xmm3
2515 xorps %xmm4,%xmm0
2516 shufps $140,%xmm0,%xmm4
2517 pslldq $4,%xmm3
2518 xorps %xmm4,%xmm0
2519 pshufd $85,%xmm1,%xmm1
2520 pxor %xmm3,%xmm2
2521 pxor %xmm1,%xmm0
2522 pshufd $255,%xmm0,%xmm3
2523 pxor %xmm3,%xmm2
2524 .byte 0xf3,0xc3
2526 .p2align 4
2527 L$key_expansion_192b:
2528 movaps %xmm0,%xmm3
2529 shufps $68,%xmm0,%xmm5
2530 movups %xmm5,(%rax)
2531 shufps $78,%xmm2,%xmm3
2532 movups %xmm3,16(%rax)
2533 leaq 32(%rax),%rax
2534 jmp L$key_expansion_192b_warm
2536 .p2align 4
2537 L$key_expansion_256a:
2538 movups %xmm2,(%rax)
2539 leaq 16(%rax),%rax
2540 L$key_expansion_256a_cold:
2541 shufps $16,%xmm0,%xmm4
2542 xorps %xmm4,%xmm0
2543 shufps $140,%xmm0,%xmm4
2544 xorps %xmm4,%xmm0
2545 shufps $255,%xmm1,%xmm1
2546 xorps %xmm1,%xmm0
2547 .byte 0xf3,0xc3
2549 .p2align 4
2550 L$key_expansion_256b:
2551 movups %xmm0,(%rax)
2552 leaq 16(%rax),%rax
2554 shufps $16,%xmm2,%xmm4
2555 xorps %xmm4,%xmm2
2556 shufps $140,%xmm2,%xmm4
2557 xorps %xmm4,%xmm2
2558 shufps $170,%xmm1,%xmm1
2559 xorps %xmm1,%xmm2
2560 .byte 0xf3,0xc3
2563 .p2align 6
2564 L$bswap_mask:
2565 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2566 L$increment32:
2567 .long 6,6,6,0
2568 L$increment64:
2569 .long 1,0,0,0
2570 L$xts_magic:
2571 .long 0x87,0,1,0
2573 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2574 .p2align 6