updated licenses
[gnutls.git] / lib / accelerated / x86 / coff / appro-aes-x86-64-coff.s
blob7bd96654d82de687827631d97c850f279c0325d0
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # * Redistributions of source code must retain copyright notices,
9 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # * Neither the name of the Andy Polyakov nor the names of its
17 # copyright holder and contributors may be used to endorse or
18 # promote products derived from this software without specific
19 # prior written permission.
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # *** This file is auto-generated ***
40 .text
41 .globl aesni_encrypt
42 .def aesni_encrypt; .scl 2; .type 32; .endef
43 .p2align 4
44 aesni_encrypt:
45 movups (%rcx),%xmm2
46 movl 240(%r8),%eax
47 movups (%r8),%xmm0
48 movups 16(%r8),%xmm1
49 leaq 32(%r8),%r8
50 xorps %xmm0,%xmm2
51 .Loop_enc1_1:
52 .byte 102,15,56,220,209
53 decl %eax
54 movups (%r8),%xmm1
55 leaq 16(%r8),%r8
56 jnz .Loop_enc1_1
57 .byte 102,15,56,221,209
58 movups %xmm2,(%rdx)
59 .byte 0xf3,0xc3
62 .globl aesni_decrypt
63 .def aesni_decrypt; .scl 2; .type 32; .endef
64 .p2align 4
65 aesni_decrypt:
66 movups (%rcx),%xmm2
67 movl 240(%r8),%eax
68 movups (%r8),%xmm0
69 movups 16(%r8),%xmm1
70 leaq 32(%r8),%r8
71 xorps %xmm0,%xmm2
72 .Loop_dec1_2:
73 .byte 102,15,56,222,209
74 decl %eax
75 movups (%r8),%xmm1
76 leaq 16(%r8),%r8
77 jnz .Loop_dec1_2
78 .byte 102,15,56,223,209
79 movups %xmm2,(%rdx)
80 .byte 0xf3,0xc3
82 .def _aesni_encrypt3; .scl 3; .type 32; .endef
83 .p2align 4
84 _aesni_encrypt3:
85 movups (%rcx),%xmm0
86 shrl $1,%eax
87 movups 16(%rcx),%xmm1
88 leaq 32(%rcx),%rcx
89 xorps %xmm0,%xmm2
90 xorps %xmm0,%xmm3
91 xorps %xmm0,%xmm4
92 movups (%rcx),%xmm0
94 .Lenc_loop3:
95 .byte 102,15,56,220,209
96 .byte 102,15,56,220,217
97 decl %eax
98 .byte 102,15,56,220,225
99 movups 16(%rcx),%xmm1
100 .byte 102,15,56,220,208
101 .byte 102,15,56,220,216
102 leaq 32(%rcx),%rcx
103 .byte 102,15,56,220,224
104 movups (%rcx),%xmm0
105 jnz .Lenc_loop3
107 .byte 102,15,56,220,209
108 .byte 102,15,56,220,217
109 .byte 102,15,56,220,225
110 .byte 102,15,56,221,208
111 .byte 102,15,56,221,216
112 .byte 102,15,56,221,224
113 .byte 0xf3,0xc3
115 .def _aesni_decrypt3; .scl 3; .type 32; .endef
116 .p2align 4
117 _aesni_decrypt3:
118 movups (%rcx),%xmm0
119 shrl $1,%eax
120 movups 16(%rcx),%xmm1
121 leaq 32(%rcx),%rcx
122 xorps %xmm0,%xmm2
123 xorps %xmm0,%xmm3
124 xorps %xmm0,%xmm4
125 movups (%rcx),%xmm0
127 .Ldec_loop3:
128 .byte 102,15,56,222,209
129 .byte 102,15,56,222,217
130 decl %eax
131 .byte 102,15,56,222,225
132 movups 16(%rcx),%xmm1
133 .byte 102,15,56,222,208
134 .byte 102,15,56,222,216
135 leaq 32(%rcx),%rcx
136 .byte 102,15,56,222,224
137 movups (%rcx),%xmm0
138 jnz .Ldec_loop3
140 .byte 102,15,56,222,209
141 .byte 102,15,56,222,217
142 .byte 102,15,56,222,225
143 .byte 102,15,56,223,208
144 .byte 102,15,56,223,216
145 .byte 102,15,56,223,224
146 .byte 0xf3,0xc3
148 .def _aesni_encrypt4; .scl 3; .type 32; .endef
149 .p2align 4
150 _aesni_encrypt4:
151 movups (%rcx),%xmm0
152 shrl $1,%eax
153 movups 16(%rcx),%xmm1
154 leaq 32(%rcx),%rcx
155 xorps %xmm0,%xmm2
156 xorps %xmm0,%xmm3
157 xorps %xmm0,%xmm4
158 xorps %xmm0,%xmm5
159 movups (%rcx),%xmm0
161 .Lenc_loop4:
162 .byte 102,15,56,220,209
163 .byte 102,15,56,220,217
164 decl %eax
165 .byte 102,15,56,220,225
166 .byte 102,15,56,220,233
167 movups 16(%rcx),%xmm1
168 .byte 102,15,56,220,208
169 .byte 102,15,56,220,216
170 leaq 32(%rcx),%rcx
171 .byte 102,15,56,220,224
172 .byte 102,15,56,220,232
173 movups (%rcx),%xmm0
174 jnz .Lenc_loop4
176 .byte 102,15,56,220,209
177 .byte 102,15,56,220,217
178 .byte 102,15,56,220,225
179 .byte 102,15,56,220,233
180 .byte 102,15,56,221,208
181 .byte 102,15,56,221,216
182 .byte 102,15,56,221,224
183 .byte 102,15,56,221,232
184 .byte 0xf3,0xc3
186 .def _aesni_decrypt4; .scl 3; .type 32; .endef
187 .p2align 4
188 _aesni_decrypt4:
189 movups (%rcx),%xmm0
190 shrl $1,%eax
191 movups 16(%rcx),%xmm1
192 leaq 32(%rcx),%rcx
193 xorps %xmm0,%xmm2
194 xorps %xmm0,%xmm3
195 xorps %xmm0,%xmm4
196 xorps %xmm0,%xmm5
197 movups (%rcx),%xmm0
199 .Ldec_loop4:
200 .byte 102,15,56,222,209
201 .byte 102,15,56,222,217
202 decl %eax
203 .byte 102,15,56,222,225
204 .byte 102,15,56,222,233
205 movups 16(%rcx),%xmm1
206 .byte 102,15,56,222,208
207 .byte 102,15,56,222,216
208 leaq 32(%rcx),%rcx
209 .byte 102,15,56,222,224
210 .byte 102,15,56,222,232
211 movups (%rcx),%xmm0
212 jnz .Ldec_loop4
214 .byte 102,15,56,222,209
215 .byte 102,15,56,222,217
216 .byte 102,15,56,222,225
217 .byte 102,15,56,222,233
218 .byte 102,15,56,223,208
219 .byte 102,15,56,223,216
220 .byte 102,15,56,223,224
221 .byte 102,15,56,223,232
222 .byte 0xf3,0xc3
224 .def _aesni_encrypt6; .scl 3; .type 32; .endef
225 .p2align 4
226 _aesni_encrypt6:
227 movups (%rcx),%xmm0
228 shrl $1,%eax
229 movups 16(%rcx),%xmm1
230 leaq 32(%rcx),%rcx
231 xorps %xmm0,%xmm2
232 pxor %xmm0,%xmm3
233 .byte 102,15,56,220,209
234 pxor %xmm0,%xmm4
235 .byte 102,15,56,220,217
236 pxor %xmm0,%xmm5
237 .byte 102,15,56,220,225
238 pxor %xmm0,%xmm6
239 .byte 102,15,56,220,233
240 pxor %xmm0,%xmm7
241 decl %eax
242 .byte 102,15,56,220,241
243 movups (%rcx),%xmm0
244 .byte 102,15,56,220,249
245 jmp .Lenc_loop6_enter
246 .p2align 4
247 .Lenc_loop6:
248 .byte 102,15,56,220,209
249 .byte 102,15,56,220,217
250 decl %eax
251 .byte 102,15,56,220,225
252 .byte 102,15,56,220,233
253 .byte 102,15,56,220,241
254 .byte 102,15,56,220,249
255 .Lenc_loop6_enter:
256 movups 16(%rcx),%xmm1
257 .byte 102,15,56,220,208
258 .byte 102,15,56,220,216
259 leaq 32(%rcx),%rcx
260 .byte 102,15,56,220,224
261 .byte 102,15,56,220,232
262 .byte 102,15,56,220,240
263 .byte 102,15,56,220,248
264 movups (%rcx),%xmm0
265 jnz .Lenc_loop6
267 .byte 102,15,56,220,209
268 .byte 102,15,56,220,217
269 .byte 102,15,56,220,225
270 .byte 102,15,56,220,233
271 .byte 102,15,56,220,241
272 .byte 102,15,56,220,249
273 .byte 102,15,56,221,208
274 .byte 102,15,56,221,216
275 .byte 102,15,56,221,224
276 .byte 102,15,56,221,232
277 .byte 102,15,56,221,240
278 .byte 102,15,56,221,248
279 .byte 0xf3,0xc3
281 .def _aesni_decrypt6; .scl 3; .type 32; .endef
282 .p2align 4
283 _aesni_decrypt6:
284 movups (%rcx),%xmm0
285 shrl $1,%eax
286 movups 16(%rcx),%xmm1
287 leaq 32(%rcx),%rcx
288 xorps %xmm0,%xmm2
289 pxor %xmm0,%xmm3
290 .byte 102,15,56,222,209
291 pxor %xmm0,%xmm4
292 .byte 102,15,56,222,217
293 pxor %xmm0,%xmm5
294 .byte 102,15,56,222,225
295 pxor %xmm0,%xmm6
296 .byte 102,15,56,222,233
297 pxor %xmm0,%xmm7
298 decl %eax
299 .byte 102,15,56,222,241
300 movups (%rcx),%xmm0
301 .byte 102,15,56,222,249
302 jmp .Ldec_loop6_enter
303 .p2align 4
304 .Ldec_loop6:
305 .byte 102,15,56,222,209
306 .byte 102,15,56,222,217
307 decl %eax
308 .byte 102,15,56,222,225
309 .byte 102,15,56,222,233
310 .byte 102,15,56,222,241
311 .byte 102,15,56,222,249
312 .Ldec_loop6_enter:
313 movups 16(%rcx),%xmm1
314 .byte 102,15,56,222,208
315 .byte 102,15,56,222,216
316 leaq 32(%rcx),%rcx
317 .byte 102,15,56,222,224
318 .byte 102,15,56,222,232
319 .byte 102,15,56,222,240
320 .byte 102,15,56,222,248
321 movups (%rcx),%xmm0
322 jnz .Ldec_loop6
324 .byte 102,15,56,222,209
325 .byte 102,15,56,222,217
326 .byte 102,15,56,222,225
327 .byte 102,15,56,222,233
328 .byte 102,15,56,222,241
329 .byte 102,15,56,222,249
330 .byte 102,15,56,223,208
331 .byte 102,15,56,223,216
332 .byte 102,15,56,223,224
333 .byte 102,15,56,223,232
334 .byte 102,15,56,223,240
335 .byte 102,15,56,223,248
336 .byte 0xf3,0xc3
338 .def _aesni_encrypt8; .scl 3; .type 32; .endef
339 .p2align 4
340 _aesni_encrypt8:
341 movups (%rcx),%xmm0
342 shrl $1,%eax
343 movups 16(%rcx),%xmm1
344 leaq 32(%rcx),%rcx
345 xorps %xmm0,%xmm2
346 xorps %xmm0,%xmm3
347 .byte 102,15,56,220,209
348 pxor %xmm0,%xmm4
349 .byte 102,15,56,220,217
350 pxor %xmm0,%xmm5
351 .byte 102,15,56,220,225
352 pxor %xmm0,%xmm6
353 .byte 102,15,56,220,233
354 pxor %xmm0,%xmm7
355 decl %eax
356 .byte 102,15,56,220,241
357 pxor %xmm0,%xmm8
358 .byte 102,15,56,220,249
359 pxor %xmm0,%xmm9
360 movups (%rcx),%xmm0
361 .byte 102,68,15,56,220,193
362 .byte 102,68,15,56,220,201
363 movups 16(%rcx),%xmm1
364 jmp .Lenc_loop8_enter
365 .p2align 4
366 .Lenc_loop8:
367 .byte 102,15,56,220,209
368 .byte 102,15,56,220,217
369 decl %eax
370 .byte 102,15,56,220,225
371 .byte 102,15,56,220,233
372 .byte 102,15,56,220,241
373 .byte 102,15,56,220,249
374 .byte 102,68,15,56,220,193
375 .byte 102,68,15,56,220,201
376 movups 16(%rcx),%xmm1
377 .Lenc_loop8_enter:
378 .byte 102,15,56,220,208
379 .byte 102,15,56,220,216
380 leaq 32(%rcx),%rcx
381 .byte 102,15,56,220,224
382 .byte 102,15,56,220,232
383 .byte 102,15,56,220,240
384 .byte 102,15,56,220,248
385 .byte 102,68,15,56,220,192
386 .byte 102,68,15,56,220,200
387 movups (%rcx),%xmm0
388 jnz .Lenc_loop8
390 .byte 102,15,56,220,209
391 .byte 102,15,56,220,217
392 .byte 102,15,56,220,225
393 .byte 102,15,56,220,233
394 .byte 102,15,56,220,241
395 .byte 102,15,56,220,249
396 .byte 102,68,15,56,220,193
397 .byte 102,68,15,56,220,201
398 .byte 102,15,56,221,208
399 .byte 102,15,56,221,216
400 .byte 102,15,56,221,224
401 .byte 102,15,56,221,232
402 .byte 102,15,56,221,240
403 .byte 102,15,56,221,248
404 .byte 102,68,15,56,221,192
405 .byte 102,68,15,56,221,200
406 .byte 0xf3,0xc3
408 .def _aesni_decrypt8; .scl 3; .type 32; .endef
409 .p2align 4
410 _aesni_decrypt8:
411 movups (%rcx),%xmm0
412 shrl $1,%eax
413 movups 16(%rcx),%xmm1
414 leaq 32(%rcx),%rcx
415 xorps %xmm0,%xmm2
416 xorps %xmm0,%xmm3
417 .byte 102,15,56,222,209
418 pxor %xmm0,%xmm4
419 .byte 102,15,56,222,217
420 pxor %xmm0,%xmm5
421 .byte 102,15,56,222,225
422 pxor %xmm0,%xmm6
423 .byte 102,15,56,222,233
424 pxor %xmm0,%xmm7
425 decl %eax
426 .byte 102,15,56,222,241
427 pxor %xmm0,%xmm8
428 .byte 102,15,56,222,249
429 pxor %xmm0,%xmm9
430 movups (%rcx),%xmm0
431 .byte 102,68,15,56,222,193
432 .byte 102,68,15,56,222,201
433 movups 16(%rcx),%xmm1
434 jmp .Ldec_loop8_enter
435 .p2align 4
436 .Ldec_loop8:
437 .byte 102,15,56,222,209
438 .byte 102,15,56,222,217
439 decl %eax
440 .byte 102,15,56,222,225
441 .byte 102,15,56,222,233
442 .byte 102,15,56,222,241
443 .byte 102,15,56,222,249
444 .byte 102,68,15,56,222,193
445 .byte 102,68,15,56,222,201
446 movups 16(%rcx),%xmm1
447 .Ldec_loop8_enter:
448 .byte 102,15,56,222,208
449 .byte 102,15,56,222,216
450 leaq 32(%rcx),%rcx
451 .byte 102,15,56,222,224
452 .byte 102,15,56,222,232
453 .byte 102,15,56,222,240
454 .byte 102,15,56,222,248
455 .byte 102,68,15,56,222,192
456 .byte 102,68,15,56,222,200
457 movups (%rcx),%xmm0
458 jnz .Ldec_loop8
460 .byte 102,15,56,222,209
461 .byte 102,15,56,222,217
462 .byte 102,15,56,222,225
463 .byte 102,15,56,222,233
464 .byte 102,15,56,222,241
465 .byte 102,15,56,222,249
466 .byte 102,68,15,56,222,193
467 .byte 102,68,15,56,222,201
468 .byte 102,15,56,223,208
469 .byte 102,15,56,223,216
470 .byte 102,15,56,223,224
471 .byte 102,15,56,223,232
472 .byte 102,15,56,223,240
473 .byte 102,15,56,223,248
474 .byte 102,68,15,56,223,192
475 .byte 102,68,15,56,223,200
476 .byte 0xf3,0xc3
478 .globl aesni_ecb_encrypt
479 .def aesni_ecb_encrypt; .scl 2; .type 32; .endef
480 .p2align 4
481 aesni_ecb_encrypt:
482 movq %rdi,8(%rsp)
483 movq %rsi,16(%rsp)
484 movq %rsp,%rax
485 .LSEH_begin_aesni_ecb_encrypt:
486 movq %rcx,%rdi
487 movq %rdx,%rsi
488 movq %r8,%rdx
489 movq %r9,%rcx
490 movq 40(%rsp),%r8
492 andq $-16,%rdx
493 jz .Lecb_ret
495 movl 240(%rcx),%eax
496 movups (%rcx),%xmm0
497 movq %rcx,%r11
498 movl %eax,%r10d
499 testl %r8d,%r8d
500 jz .Lecb_decrypt
502 cmpq $128,%rdx
503 jb .Lecb_enc_tail
505 movdqu (%rdi),%xmm2
506 movdqu 16(%rdi),%xmm3
507 movdqu 32(%rdi),%xmm4
508 movdqu 48(%rdi),%xmm5
509 movdqu 64(%rdi),%xmm6
510 movdqu 80(%rdi),%xmm7
511 movdqu 96(%rdi),%xmm8
512 movdqu 112(%rdi),%xmm9
513 leaq 128(%rdi),%rdi
514 subq $128,%rdx
515 jmp .Lecb_enc_loop8_enter
516 .p2align 4
517 .Lecb_enc_loop8:
518 movups %xmm2,(%rsi)
519 movq %r11,%rcx
520 movdqu (%rdi),%xmm2
521 movl %r10d,%eax
522 movups %xmm3,16(%rsi)
523 movdqu 16(%rdi),%xmm3
524 movups %xmm4,32(%rsi)
525 movdqu 32(%rdi),%xmm4
526 movups %xmm5,48(%rsi)
527 movdqu 48(%rdi),%xmm5
528 movups %xmm6,64(%rsi)
529 movdqu 64(%rdi),%xmm6
530 movups %xmm7,80(%rsi)
531 movdqu 80(%rdi),%xmm7
532 movups %xmm8,96(%rsi)
533 movdqu 96(%rdi),%xmm8
534 movups %xmm9,112(%rsi)
535 leaq 128(%rsi),%rsi
536 movdqu 112(%rdi),%xmm9
537 leaq 128(%rdi),%rdi
538 .Lecb_enc_loop8_enter:
540 call _aesni_encrypt8
542 subq $128,%rdx
543 jnc .Lecb_enc_loop8
545 movups %xmm2,(%rsi)
546 movq %r11,%rcx
547 movups %xmm3,16(%rsi)
548 movl %r10d,%eax
549 movups %xmm4,32(%rsi)
550 movups %xmm5,48(%rsi)
551 movups %xmm6,64(%rsi)
552 movups %xmm7,80(%rsi)
553 movups %xmm8,96(%rsi)
554 movups %xmm9,112(%rsi)
555 leaq 128(%rsi),%rsi
556 addq $128,%rdx
557 jz .Lecb_ret
559 .Lecb_enc_tail:
560 movups (%rdi),%xmm2
561 cmpq $32,%rdx
562 jb .Lecb_enc_one
563 movups 16(%rdi),%xmm3
564 je .Lecb_enc_two
565 movups 32(%rdi),%xmm4
566 cmpq $64,%rdx
567 jb .Lecb_enc_three
568 movups 48(%rdi),%xmm5
569 je .Lecb_enc_four
570 movups 64(%rdi),%xmm6
571 cmpq $96,%rdx
572 jb .Lecb_enc_five
573 movups 80(%rdi),%xmm7
574 je .Lecb_enc_six
575 movdqu 96(%rdi),%xmm8
576 call _aesni_encrypt8
577 movups %xmm2,(%rsi)
578 movups %xmm3,16(%rsi)
579 movups %xmm4,32(%rsi)
580 movups %xmm5,48(%rsi)
581 movups %xmm6,64(%rsi)
582 movups %xmm7,80(%rsi)
583 movups %xmm8,96(%rsi)
584 jmp .Lecb_ret
585 .p2align 4
586 .Lecb_enc_one:
587 movups (%rcx),%xmm0
588 movups 16(%rcx),%xmm1
589 leaq 32(%rcx),%rcx
590 xorps %xmm0,%xmm2
591 .Loop_enc1_3:
592 .byte 102,15,56,220,209
593 decl %eax
594 movups (%rcx),%xmm1
595 leaq 16(%rcx),%rcx
596 jnz .Loop_enc1_3
597 .byte 102,15,56,221,209
598 movups %xmm2,(%rsi)
599 jmp .Lecb_ret
600 .p2align 4
601 .Lecb_enc_two:
602 xorps %xmm4,%xmm4
603 call _aesni_encrypt3
604 movups %xmm2,(%rsi)
605 movups %xmm3,16(%rsi)
606 jmp .Lecb_ret
607 .p2align 4
608 .Lecb_enc_three:
609 call _aesni_encrypt3
610 movups %xmm2,(%rsi)
611 movups %xmm3,16(%rsi)
612 movups %xmm4,32(%rsi)
613 jmp .Lecb_ret
614 .p2align 4
615 .Lecb_enc_four:
616 call _aesni_encrypt4
617 movups %xmm2,(%rsi)
618 movups %xmm3,16(%rsi)
619 movups %xmm4,32(%rsi)
620 movups %xmm5,48(%rsi)
621 jmp .Lecb_ret
622 .p2align 4
623 .Lecb_enc_five:
624 xorps %xmm7,%xmm7
625 call _aesni_encrypt6
626 movups %xmm2,(%rsi)
627 movups %xmm3,16(%rsi)
628 movups %xmm4,32(%rsi)
629 movups %xmm5,48(%rsi)
630 movups %xmm6,64(%rsi)
631 jmp .Lecb_ret
632 .p2align 4
633 .Lecb_enc_six:
634 call _aesni_encrypt6
635 movups %xmm2,(%rsi)
636 movups %xmm3,16(%rsi)
637 movups %xmm4,32(%rsi)
638 movups %xmm5,48(%rsi)
639 movups %xmm6,64(%rsi)
640 movups %xmm7,80(%rsi)
641 jmp .Lecb_ret
643 .p2align 4
644 .Lecb_decrypt:
645 cmpq $128,%rdx
646 jb .Lecb_dec_tail
648 movdqu (%rdi),%xmm2
649 movdqu 16(%rdi),%xmm3
650 movdqu 32(%rdi),%xmm4
651 movdqu 48(%rdi),%xmm5
652 movdqu 64(%rdi),%xmm6
653 movdqu 80(%rdi),%xmm7
654 movdqu 96(%rdi),%xmm8
655 movdqu 112(%rdi),%xmm9
656 leaq 128(%rdi),%rdi
657 subq $128,%rdx
658 jmp .Lecb_dec_loop8_enter
659 .p2align 4
660 .Lecb_dec_loop8:
661 movups %xmm2,(%rsi)
662 movq %r11,%rcx
663 movdqu (%rdi),%xmm2
664 movl %r10d,%eax
665 movups %xmm3,16(%rsi)
666 movdqu 16(%rdi),%xmm3
667 movups %xmm4,32(%rsi)
668 movdqu 32(%rdi),%xmm4
669 movups %xmm5,48(%rsi)
670 movdqu 48(%rdi),%xmm5
671 movups %xmm6,64(%rsi)
672 movdqu 64(%rdi),%xmm6
673 movups %xmm7,80(%rsi)
674 movdqu 80(%rdi),%xmm7
675 movups %xmm8,96(%rsi)
676 movdqu 96(%rdi),%xmm8
677 movups %xmm9,112(%rsi)
678 leaq 128(%rsi),%rsi
679 movdqu 112(%rdi),%xmm9
680 leaq 128(%rdi),%rdi
681 .Lecb_dec_loop8_enter:
683 call _aesni_decrypt8
685 movups (%r11),%xmm0
686 subq $128,%rdx
687 jnc .Lecb_dec_loop8
689 movups %xmm2,(%rsi)
690 movq %r11,%rcx
691 movups %xmm3,16(%rsi)
692 movl %r10d,%eax
693 movups %xmm4,32(%rsi)
694 movups %xmm5,48(%rsi)
695 movups %xmm6,64(%rsi)
696 movups %xmm7,80(%rsi)
697 movups %xmm8,96(%rsi)
698 movups %xmm9,112(%rsi)
699 leaq 128(%rsi),%rsi
700 addq $128,%rdx
701 jz .Lecb_ret
703 .Lecb_dec_tail:
704 movups (%rdi),%xmm2
705 cmpq $32,%rdx
706 jb .Lecb_dec_one
707 movups 16(%rdi),%xmm3
708 je .Lecb_dec_two
709 movups 32(%rdi),%xmm4
710 cmpq $64,%rdx
711 jb .Lecb_dec_three
712 movups 48(%rdi),%xmm5
713 je .Lecb_dec_four
714 movups 64(%rdi),%xmm6
715 cmpq $96,%rdx
716 jb .Lecb_dec_five
717 movups 80(%rdi),%xmm7
718 je .Lecb_dec_six
719 movups 96(%rdi),%xmm8
720 movups (%rcx),%xmm0
721 call _aesni_decrypt8
722 movups %xmm2,(%rsi)
723 movups %xmm3,16(%rsi)
724 movups %xmm4,32(%rsi)
725 movups %xmm5,48(%rsi)
726 movups %xmm6,64(%rsi)
727 movups %xmm7,80(%rsi)
728 movups %xmm8,96(%rsi)
729 jmp .Lecb_ret
730 .p2align 4
731 .Lecb_dec_one:
732 movups (%rcx),%xmm0
733 movups 16(%rcx),%xmm1
734 leaq 32(%rcx),%rcx
735 xorps %xmm0,%xmm2
736 .Loop_dec1_4:
737 .byte 102,15,56,222,209
738 decl %eax
739 movups (%rcx),%xmm1
740 leaq 16(%rcx),%rcx
741 jnz .Loop_dec1_4
742 .byte 102,15,56,223,209
743 movups %xmm2,(%rsi)
744 jmp .Lecb_ret
745 .p2align 4
746 .Lecb_dec_two:
747 xorps %xmm4,%xmm4
748 call _aesni_decrypt3
749 movups %xmm2,(%rsi)
750 movups %xmm3,16(%rsi)
751 jmp .Lecb_ret
752 .p2align 4
753 .Lecb_dec_three:
754 call _aesni_decrypt3
755 movups %xmm2,(%rsi)
756 movups %xmm3,16(%rsi)
757 movups %xmm4,32(%rsi)
758 jmp .Lecb_ret
759 .p2align 4
760 .Lecb_dec_four:
761 call _aesni_decrypt4
762 movups %xmm2,(%rsi)
763 movups %xmm3,16(%rsi)
764 movups %xmm4,32(%rsi)
765 movups %xmm5,48(%rsi)
766 jmp .Lecb_ret
767 .p2align 4
768 .Lecb_dec_five:
769 xorps %xmm7,%xmm7
770 call _aesni_decrypt6
771 movups %xmm2,(%rsi)
772 movups %xmm3,16(%rsi)
773 movups %xmm4,32(%rsi)
774 movups %xmm5,48(%rsi)
775 movups %xmm6,64(%rsi)
776 jmp .Lecb_ret
777 .p2align 4
778 .Lecb_dec_six:
779 call _aesni_decrypt6
780 movups %xmm2,(%rsi)
781 movups %xmm3,16(%rsi)
782 movups %xmm4,32(%rsi)
783 movups %xmm5,48(%rsi)
784 movups %xmm6,64(%rsi)
785 movups %xmm7,80(%rsi)
787 .Lecb_ret:
788 movq 8(%rsp),%rdi
789 movq 16(%rsp),%rsi
790 .byte 0xf3,0xc3
791 .LSEH_end_aesni_ecb_encrypt:
792 .globl aesni_ccm64_encrypt_blocks
793 .def aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef
794 .p2align 4
795 aesni_ccm64_encrypt_blocks:
796 movq %rdi,8(%rsp)
797 movq %rsi,16(%rsp)
798 movq %rsp,%rax
799 .LSEH_begin_aesni_ccm64_encrypt_blocks:
800 movq %rcx,%rdi
801 movq %rdx,%rsi
802 movq %r8,%rdx
803 movq %r9,%rcx
804 movq 40(%rsp),%r8
805 movq 48(%rsp),%r9
807 leaq -88(%rsp),%rsp
808 movaps %xmm6,(%rsp)
809 movaps %xmm7,16(%rsp)
810 movaps %xmm8,32(%rsp)
811 movaps %xmm9,48(%rsp)
812 .Lccm64_enc_body:
813 movl 240(%rcx),%eax
814 movdqu (%r8),%xmm9
815 movdqa .Lincrement64(%rip),%xmm6
816 movdqa .Lbswap_mask(%rip),%xmm7
818 shrl $1,%eax
819 leaq 0(%rcx),%r11
820 movdqu (%r9),%xmm3
821 movdqa %xmm9,%xmm2
822 movl %eax,%r10d
823 .byte 102,68,15,56,0,207
824 jmp .Lccm64_enc_outer
825 .p2align 4
826 .Lccm64_enc_outer:
827 movups (%r11),%xmm0
828 movl %r10d,%eax
829 movups (%rdi),%xmm8
831 xorps %xmm0,%xmm2
832 movups 16(%r11),%xmm1
833 xorps %xmm8,%xmm0
834 leaq 32(%r11),%rcx
835 xorps %xmm0,%xmm3
836 movups (%rcx),%xmm0
838 .Lccm64_enc2_loop:
839 .byte 102,15,56,220,209
840 decl %eax
841 .byte 102,15,56,220,217
842 movups 16(%rcx),%xmm1
843 .byte 102,15,56,220,208
844 leaq 32(%rcx),%rcx
845 .byte 102,15,56,220,216
846 movups 0(%rcx),%xmm0
847 jnz .Lccm64_enc2_loop
848 .byte 102,15,56,220,209
849 .byte 102,15,56,220,217
850 paddq %xmm6,%xmm9
851 .byte 102,15,56,221,208
852 .byte 102,15,56,221,216
854 decq %rdx
855 leaq 16(%rdi),%rdi
856 xorps %xmm2,%xmm8
857 movdqa %xmm9,%xmm2
858 movups %xmm8,(%rsi)
859 leaq 16(%rsi),%rsi
860 .byte 102,15,56,0,215
861 jnz .Lccm64_enc_outer
863 movups %xmm3,(%r9)
864 movaps (%rsp),%xmm6
865 movaps 16(%rsp),%xmm7
866 movaps 32(%rsp),%xmm8
867 movaps 48(%rsp),%xmm9
868 leaq 88(%rsp),%rsp
869 .Lccm64_enc_ret:
870 movq 8(%rsp),%rdi
871 movq 16(%rsp),%rsi
872 .byte 0xf3,0xc3
873 .LSEH_end_aesni_ccm64_encrypt_blocks:
874 .globl aesni_ccm64_decrypt_blocks
875 .def aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef
876 .p2align 4
877 aesni_ccm64_decrypt_blocks:
878 movq %rdi,8(%rsp)
879 movq %rsi,16(%rsp)
880 movq %rsp,%rax
881 .LSEH_begin_aesni_ccm64_decrypt_blocks:
882 movq %rcx,%rdi
883 movq %rdx,%rsi
884 movq %r8,%rdx
885 movq %r9,%rcx
886 movq 40(%rsp),%r8
887 movq 48(%rsp),%r9
889 leaq -88(%rsp),%rsp
890 movaps %xmm6,(%rsp)
891 movaps %xmm7,16(%rsp)
892 movaps %xmm8,32(%rsp)
893 movaps %xmm9,48(%rsp)
894 .Lccm64_dec_body:
895 movl 240(%rcx),%eax
896 movups (%r8),%xmm9
897 movdqu (%r9),%xmm3
898 movdqa .Lincrement64(%rip),%xmm6
899 movdqa .Lbswap_mask(%rip),%xmm7
901 movaps %xmm9,%xmm2
902 movl %eax,%r10d
903 movq %rcx,%r11
904 .byte 102,68,15,56,0,207
905 movups (%rcx),%xmm0
906 movups 16(%rcx),%xmm1
907 leaq 32(%rcx),%rcx
908 xorps %xmm0,%xmm2
909 .Loop_enc1_5:
910 .byte 102,15,56,220,209
911 decl %eax
912 movups (%rcx),%xmm1
913 leaq 16(%rcx),%rcx
914 jnz .Loop_enc1_5
915 .byte 102,15,56,221,209
916 movups (%rdi),%xmm8
917 paddq %xmm6,%xmm9
918 leaq 16(%rdi),%rdi
919 jmp .Lccm64_dec_outer
920 .p2align 4
921 .Lccm64_dec_outer:
922 xorps %xmm2,%xmm8
923 movdqa %xmm9,%xmm2
924 movl %r10d,%eax
925 movups %xmm8,(%rsi)
926 leaq 16(%rsi),%rsi
927 .byte 102,15,56,0,215
929 subq $1,%rdx
930 jz .Lccm64_dec_break
932 movups (%r11),%xmm0
933 shrl $1,%eax
934 movups 16(%r11),%xmm1
935 xorps %xmm0,%xmm8
936 leaq 32(%r11),%rcx
937 xorps %xmm0,%xmm2
938 xorps %xmm8,%xmm3
939 movups (%rcx),%xmm0
941 .Lccm64_dec2_loop:
942 .byte 102,15,56,220,209
943 decl %eax
944 .byte 102,15,56,220,217
945 movups 16(%rcx),%xmm1
946 .byte 102,15,56,220,208
947 leaq 32(%rcx),%rcx
948 .byte 102,15,56,220,216
949 movups 0(%rcx),%xmm0
950 jnz .Lccm64_dec2_loop
951 movups (%rdi),%xmm8
952 paddq %xmm6,%xmm9
953 .byte 102,15,56,220,209
954 .byte 102,15,56,220,217
955 leaq 16(%rdi),%rdi
956 .byte 102,15,56,221,208
957 .byte 102,15,56,221,216
958 jmp .Lccm64_dec_outer
960 .p2align 4
961 .Lccm64_dec_break:
963 movups (%r11),%xmm0
964 movups 16(%r11),%xmm1
965 xorps %xmm0,%xmm8
966 leaq 32(%r11),%r11
967 xorps %xmm8,%xmm3
968 .Loop_enc1_6:
969 .byte 102,15,56,220,217
970 decl %eax
971 movups (%r11),%xmm1
972 leaq 16(%r11),%r11
973 jnz .Loop_enc1_6
974 .byte 102,15,56,221,217
975 movups %xmm3,(%r9)
976 movaps (%rsp),%xmm6
977 movaps 16(%rsp),%xmm7
978 movaps 32(%rsp),%xmm8
979 movaps 48(%rsp),%xmm9
980 leaq 88(%rsp),%rsp
981 .Lccm64_dec_ret:
982 movq 8(%rsp),%rdi
983 movq 16(%rsp),%rsi
984 .byte 0xf3,0xc3
985 .LSEH_end_aesni_ccm64_decrypt_blocks:
986 .globl aesni_ctr32_encrypt_blocks
987 .def aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
988 .p2align 4
989 aesni_ctr32_encrypt_blocks:
990 movq %rdi,8(%rsp)
991 movq %rsi,16(%rsp)
992 movq %rsp,%rax
993 .LSEH_begin_aesni_ctr32_encrypt_blocks:
994 movq %rcx,%rdi
995 movq %rdx,%rsi
996 movq %r8,%rdx
997 movq %r9,%rcx
998 movq 40(%rsp),%r8
1000 leaq -200(%rsp),%rsp
1001 movaps %xmm6,32(%rsp)
1002 movaps %xmm7,48(%rsp)
1003 movaps %xmm8,64(%rsp)
1004 movaps %xmm9,80(%rsp)
1005 movaps %xmm10,96(%rsp)
1006 movaps %xmm11,112(%rsp)
1007 movaps %xmm12,128(%rsp)
1008 movaps %xmm13,144(%rsp)
1009 movaps %xmm14,160(%rsp)
1010 movaps %xmm15,176(%rsp)
1011 .Lctr32_body:
1012 cmpq $1,%rdx
1013 je .Lctr32_one_shortcut
1015 movdqu (%r8),%xmm14
1016 movdqa .Lbswap_mask(%rip),%xmm15
1017 xorl %eax,%eax
1018 .byte 102,69,15,58,22,242,3
1019 .byte 102,68,15,58,34,240,3
1021 movl 240(%rcx),%eax
1022 bswapl %r10d
1023 pxor %xmm12,%xmm12
1024 pxor %xmm13,%xmm13
1025 .byte 102,69,15,58,34,226,0
1026 leaq 3(%r10),%r11
1027 .byte 102,69,15,58,34,235,0
1028 incl %r10d
1029 .byte 102,69,15,58,34,226,1
1030 incq %r11
1031 .byte 102,69,15,58,34,235,1
1032 incl %r10d
1033 .byte 102,69,15,58,34,226,2
1034 incq %r11
1035 .byte 102,69,15,58,34,235,2
1036 movdqa %xmm12,0(%rsp)
1037 .byte 102,69,15,56,0,231
1038 movdqa %xmm13,16(%rsp)
1039 .byte 102,69,15,56,0,239
1041 pshufd $192,%xmm12,%xmm2
1042 pshufd $128,%xmm12,%xmm3
1043 pshufd $64,%xmm12,%xmm4
1044 cmpq $6,%rdx
1045 jb .Lctr32_tail
1046 shrl $1,%eax
1047 movq %rcx,%r11
1048 movl %eax,%r10d
1049 subq $6,%rdx
1050 jmp .Lctr32_loop6
1052 .p2align 4
1053 .Lctr32_loop6:
1054 pshufd $192,%xmm13,%xmm5
1055 por %xmm14,%xmm2
1056 movups (%r11),%xmm0
1057 pshufd $128,%xmm13,%xmm6
1058 por %xmm14,%xmm3
1059 movups 16(%r11),%xmm1
1060 pshufd $64,%xmm13,%xmm7
1061 por %xmm14,%xmm4
1062 por %xmm14,%xmm5
1063 xorps %xmm0,%xmm2
1064 por %xmm14,%xmm6
1065 por %xmm14,%xmm7
1070 pxor %xmm0,%xmm3
1071 .byte 102,15,56,220,209
1072 leaq 32(%r11),%rcx
1073 pxor %xmm0,%xmm4
1074 .byte 102,15,56,220,217
1075 movdqa .Lincrement32(%rip),%xmm13
1076 pxor %xmm0,%xmm5
1077 .byte 102,15,56,220,225
1078 movdqa 0(%rsp),%xmm12
1079 pxor %xmm0,%xmm6
1080 .byte 102,15,56,220,233
1081 pxor %xmm0,%xmm7
1082 movups (%rcx),%xmm0
1083 decl %eax
1084 .byte 102,15,56,220,241
1085 .byte 102,15,56,220,249
1086 jmp .Lctr32_enc_loop6_enter
1087 .p2align 4
1088 .Lctr32_enc_loop6:
1089 .byte 102,15,56,220,209
1090 .byte 102,15,56,220,217
1091 decl %eax
1092 .byte 102,15,56,220,225
1093 .byte 102,15,56,220,233
1094 .byte 102,15,56,220,241
1095 .byte 102,15,56,220,249
1096 .Lctr32_enc_loop6_enter:
1097 movups 16(%rcx),%xmm1
1098 .byte 102,15,56,220,208
1099 .byte 102,15,56,220,216
1100 leaq 32(%rcx),%rcx
1101 .byte 102,15,56,220,224
1102 .byte 102,15,56,220,232
1103 .byte 102,15,56,220,240
1104 .byte 102,15,56,220,248
1105 movups (%rcx),%xmm0
1106 jnz .Lctr32_enc_loop6
1108 .byte 102,15,56,220,209
1109 paddd %xmm13,%xmm12
1110 .byte 102,15,56,220,217
1111 paddd 16(%rsp),%xmm13
1112 .byte 102,15,56,220,225
1113 movdqa %xmm12,0(%rsp)
1114 .byte 102,15,56,220,233
1115 movdqa %xmm13,16(%rsp)
1116 .byte 102,15,56,220,241
1117 .byte 102,69,15,56,0,231
1118 .byte 102,15,56,220,249
1119 .byte 102,69,15,56,0,239
1121 .byte 102,15,56,221,208
1122 movups (%rdi),%xmm8
1123 .byte 102,15,56,221,216
1124 movups 16(%rdi),%xmm9
1125 .byte 102,15,56,221,224
1126 movups 32(%rdi),%xmm10
1127 .byte 102,15,56,221,232
1128 movups 48(%rdi),%xmm11
1129 .byte 102,15,56,221,240
1130 movups 64(%rdi),%xmm1
1131 .byte 102,15,56,221,248
1132 movups 80(%rdi),%xmm0
1133 leaq 96(%rdi),%rdi
1135 xorps %xmm2,%xmm8
1136 pshufd $192,%xmm12,%xmm2
1137 xorps %xmm3,%xmm9
1138 pshufd $128,%xmm12,%xmm3
1139 movups %xmm8,(%rsi)
1140 xorps %xmm4,%xmm10
1141 pshufd $64,%xmm12,%xmm4
1142 movups %xmm9,16(%rsi)
1143 xorps %xmm5,%xmm11
1144 movups %xmm10,32(%rsi)
1145 xorps %xmm6,%xmm1
1146 movups %xmm11,48(%rsi)
1147 xorps %xmm7,%xmm0
1148 movups %xmm1,64(%rsi)
1149 movups %xmm0,80(%rsi)
1150 leaq 96(%rsi),%rsi
1151 movl %r10d,%eax
1152 subq $6,%rdx
1153 jnc .Lctr32_loop6
1155 addq $6,%rdx
1156 jz .Lctr32_done
1157 movq %r11,%rcx
1158 leal 1(%rax,%rax,1),%eax
1160 .Lctr32_tail:
1161 por %xmm14,%xmm2
1162 movups (%rdi),%xmm8
1163 cmpq $2,%rdx
1164 jb .Lctr32_one
1166 por %xmm14,%xmm3
1167 movups 16(%rdi),%xmm9
1168 je .Lctr32_two
1170 pshufd $192,%xmm13,%xmm5
1171 por %xmm14,%xmm4
1172 movups 32(%rdi),%xmm10
1173 cmpq $4,%rdx
1174 jb .Lctr32_three
1176 pshufd $128,%xmm13,%xmm6
1177 por %xmm14,%xmm5
1178 movups 48(%rdi),%xmm11
1179 je .Lctr32_four
1181 por %xmm14,%xmm6
1182 xorps %xmm7,%xmm7
1184 call _aesni_encrypt6
1186 movups 64(%rdi),%xmm1
1187 xorps %xmm2,%xmm8
1188 xorps %xmm3,%xmm9
1189 movups %xmm8,(%rsi)
1190 xorps %xmm4,%xmm10
1191 movups %xmm9,16(%rsi)
1192 xorps %xmm5,%xmm11
1193 movups %xmm10,32(%rsi)
1194 xorps %xmm6,%xmm1
1195 movups %xmm11,48(%rsi)
1196 movups %xmm1,64(%rsi)
1197 jmp .Lctr32_done
1199 .p2align 4
1200 .Lctr32_one_shortcut:
1201 movups (%r8),%xmm2
1202 movups (%rdi),%xmm8
1203 movl 240(%rcx),%eax
1204 .Lctr32_one:
1205 movups (%rcx),%xmm0
1206 movups 16(%rcx),%xmm1
1207 leaq 32(%rcx),%rcx
1208 xorps %xmm0,%xmm2
1209 .Loop_enc1_7:
1210 .byte 102,15,56,220,209
1211 decl %eax
1212 movups (%rcx),%xmm1
1213 leaq 16(%rcx),%rcx
1214 jnz .Loop_enc1_7
1215 .byte 102,15,56,221,209
1216 xorps %xmm2,%xmm8
1217 movups %xmm8,(%rsi)
1218 jmp .Lctr32_done
1220 .p2align 4
1221 .Lctr32_two:
1222 xorps %xmm4,%xmm4
1223 call _aesni_encrypt3
1224 xorps %xmm2,%xmm8
1225 xorps %xmm3,%xmm9
1226 movups %xmm8,(%rsi)
1227 movups %xmm9,16(%rsi)
1228 jmp .Lctr32_done
1230 .p2align 4
1231 .Lctr32_three:
1232 call _aesni_encrypt3
1233 xorps %xmm2,%xmm8
1234 xorps %xmm3,%xmm9
1235 movups %xmm8,(%rsi)
1236 xorps %xmm4,%xmm10
1237 movups %xmm9,16(%rsi)
1238 movups %xmm10,32(%rsi)
1239 jmp .Lctr32_done
1241 .p2align 4
1242 .Lctr32_four:
1243 call _aesni_encrypt4
1244 xorps %xmm2,%xmm8
1245 xorps %xmm3,%xmm9
1246 movups %xmm8,(%rsi)
1247 xorps %xmm4,%xmm10
1248 movups %xmm9,16(%rsi)
1249 xorps %xmm5,%xmm11
1250 movups %xmm10,32(%rsi)
1251 movups %xmm11,48(%rsi)
1253 .Lctr32_done:
1254 movaps 32(%rsp),%xmm6
1255 movaps 48(%rsp),%xmm7
1256 movaps 64(%rsp),%xmm8
1257 movaps 80(%rsp),%xmm9
1258 movaps 96(%rsp),%xmm10
1259 movaps 112(%rsp),%xmm11
1260 movaps 128(%rsp),%xmm12
1261 movaps 144(%rsp),%xmm13
1262 movaps 160(%rsp),%xmm14
1263 movaps 176(%rsp),%xmm15
1264 leaq 200(%rsp),%rsp
1265 .Lctr32_ret:
1266 movq 8(%rsp),%rdi
1267 movq 16(%rsp),%rsi
1268 .byte 0xf3,0xc3
1269 .LSEH_end_aesni_ctr32_encrypt_blocks:
1270 .globl aesni_xts_encrypt
1271 .def aesni_xts_encrypt; .scl 2; .type 32; .endef
1272 .p2align 4
1273 aesni_xts_encrypt:
1274 movq %rdi,8(%rsp)
1275 movq %rsi,16(%rsp)
1276 movq %rsp,%rax
1277 .LSEH_begin_aesni_xts_encrypt:
1278 movq %rcx,%rdi
1279 movq %rdx,%rsi
1280 movq %r8,%rdx
1281 movq %r9,%rcx
1282 movq 40(%rsp),%r8
1283 movq 48(%rsp),%r9
1285 leaq -264(%rsp),%rsp
1286 movaps %xmm6,96(%rsp)
1287 movaps %xmm7,112(%rsp)
1288 movaps %xmm8,128(%rsp)
1289 movaps %xmm9,144(%rsp)
1290 movaps %xmm10,160(%rsp)
1291 movaps %xmm11,176(%rsp)
1292 movaps %xmm12,192(%rsp)
1293 movaps %xmm13,208(%rsp)
1294 movaps %xmm14,224(%rsp)
1295 movaps %xmm15,240(%rsp)
1296 .Lxts_enc_body:
1297 movups (%r9),%xmm15
1298 movl 240(%r8),%eax
1299 movl 240(%rcx),%r10d
1300 movups (%r8),%xmm0
1301 movups 16(%r8),%xmm1
1302 leaq 32(%r8),%r8
1303 xorps %xmm0,%xmm15
1304 .Loop_enc1_8:
1305 .byte 102,68,15,56,220,249
1306 decl %eax
1307 movups (%r8),%xmm1
1308 leaq 16(%r8),%r8
1309 jnz .Loop_enc1_8
1310 .byte 102,68,15,56,221,249
1311 movq %rcx,%r11
1312 movl %r10d,%eax
1313 movq %rdx,%r9
1314 andq $-16,%rdx
1316 movdqa .Lxts_magic(%rip),%xmm8
1317 pxor %xmm14,%xmm14
1318 pcmpgtd %xmm15,%xmm14
1319 pshufd $19,%xmm14,%xmm9
1320 pxor %xmm14,%xmm14
1321 movdqa %xmm15,%xmm10
1322 paddq %xmm15,%xmm15
1323 pand %xmm8,%xmm9
1324 pcmpgtd %xmm15,%xmm14
1325 pxor %xmm9,%xmm15
1326 pshufd $19,%xmm14,%xmm9
1327 pxor %xmm14,%xmm14
1328 movdqa %xmm15,%xmm11
1329 paddq %xmm15,%xmm15
1330 pand %xmm8,%xmm9
1331 pcmpgtd %xmm15,%xmm14
1332 pxor %xmm9,%xmm15
1333 pshufd $19,%xmm14,%xmm9
1334 pxor %xmm14,%xmm14
1335 movdqa %xmm15,%xmm12
1336 paddq %xmm15,%xmm15
1337 pand %xmm8,%xmm9
1338 pcmpgtd %xmm15,%xmm14
1339 pxor %xmm9,%xmm15
1340 pshufd $19,%xmm14,%xmm9
1341 pxor %xmm14,%xmm14
1342 movdqa %xmm15,%xmm13
1343 paddq %xmm15,%xmm15
1344 pand %xmm8,%xmm9
1345 pcmpgtd %xmm15,%xmm14
1346 pxor %xmm9,%xmm15
1347 subq $96,%rdx
1348 jc .Lxts_enc_short
1350 shrl $1,%eax
1351 subl $1,%eax
1352 movl %eax,%r10d
1353 jmp .Lxts_enc_grandloop
1355 .p2align 4
1356 .Lxts_enc_grandloop:
1357 pshufd $19,%xmm14,%xmm9
1358 movdqa %xmm15,%xmm14
1359 paddq %xmm15,%xmm15
1360 movdqu 0(%rdi),%xmm2
1361 pand %xmm8,%xmm9
1362 movdqu 16(%rdi),%xmm3
1363 pxor %xmm9,%xmm15
1365 movdqu 32(%rdi),%xmm4
1366 pxor %xmm10,%xmm2
1367 movdqu 48(%rdi),%xmm5
1368 pxor %xmm11,%xmm3
1369 movdqu 64(%rdi),%xmm6
1370 pxor %xmm12,%xmm4
1371 movdqu 80(%rdi),%xmm7
1372 leaq 96(%rdi),%rdi
1373 pxor %xmm13,%xmm5
1374 movups (%r11),%xmm0
1375 pxor %xmm14,%xmm6
1376 pxor %xmm15,%xmm7
1380 movups 16(%r11),%xmm1
1381 pxor %xmm0,%xmm2
1382 pxor %xmm0,%xmm3
1383 movdqa %xmm10,0(%rsp)
1384 .byte 102,15,56,220,209
1385 leaq 32(%r11),%rcx
1386 pxor %xmm0,%xmm4
1387 movdqa %xmm11,16(%rsp)
1388 .byte 102,15,56,220,217
1389 pxor %xmm0,%xmm5
1390 movdqa %xmm12,32(%rsp)
1391 .byte 102,15,56,220,225
1392 pxor %xmm0,%xmm6
1393 movdqa %xmm13,48(%rsp)
1394 .byte 102,15,56,220,233
1395 pxor %xmm0,%xmm7
1396 movups (%rcx),%xmm0
1397 decl %eax
1398 movdqa %xmm14,64(%rsp)
1399 .byte 102,15,56,220,241
1400 movdqa %xmm15,80(%rsp)
1401 .byte 102,15,56,220,249
1402 pxor %xmm14,%xmm14
1403 pcmpgtd %xmm15,%xmm14
1404 jmp .Lxts_enc_loop6_enter
1406 .p2align 4
1407 .Lxts_enc_loop6:
1408 .byte 102,15,56,220,209
1409 .byte 102,15,56,220,217
1410 decl %eax
1411 .byte 102,15,56,220,225
1412 .byte 102,15,56,220,233
1413 .byte 102,15,56,220,241
1414 .byte 102,15,56,220,249
1415 .Lxts_enc_loop6_enter:
1416 movups 16(%rcx),%xmm1
1417 .byte 102,15,56,220,208
1418 .byte 102,15,56,220,216
1419 leaq 32(%rcx),%rcx
1420 .byte 102,15,56,220,224
1421 .byte 102,15,56,220,232
1422 .byte 102,15,56,220,240
1423 .byte 102,15,56,220,248
1424 movups (%rcx),%xmm0
1425 jnz .Lxts_enc_loop6
1427 pshufd $19,%xmm14,%xmm9
1428 pxor %xmm14,%xmm14
1429 paddq %xmm15,%xmm15
1430 .byte 102,15,56,220,209
1431 pand %xmm8,%xmm9
1432 .byte 102,15,56,220,217
1433 pcmpgtd %xmm15,%xmm14
1434 .byte 102,15,56,220,225
1435 pxor %xmm9,%xmm15
1436 .byte 102,15,56,220,233
1437 .byte 102,15,56,220,241
1438 .byte 102,15,56,220,249
1439 movups 16(%rcx),%xmm1
1441 pshufd $19,%xmm14,%xmm9
1442 pxor %xmm14,%xmm14
1443 movdqa %xmm15,%xmm10
1444 paddq %xmm15,%xmm15
1445 .byte 102,15,56,220,208
1446 pand %xmm8,%xmm9
1447 .byte 102,15,56,220,216
1448 pcmpgtd %xmm15,%xmm14
1449 .byte 102,15,56,220,224
1450 pxor %xmm9,%xmm15
1451 .byte 102,15,56,220,232
1452 .byte 102,15,56,220,240
1453 .byte 102,15,56,220,248
1454 movups 32(%rcx),%xmm0
1456 pshufd $19,%xmm14,%xmm9
1457 pxor %xmm14,%xmm14
1458 movdqa %xmm15,%xmm11
1459 paddq %xmm15,%xmm15
1460 .byte 102,15,56,220,209
1461 pand %xmm8,%xmm9
1462 .byte 102,15,56,220,217
1463 pcmpgtd %xmm15,%xmm14
1464 .byte 102,15,56,220,225
1465 pxor %xmm9,%xmm15
1466 .byte 102,15,56,220,233
1467 .byte 102,15,56,220,241
1468 .byte 102,15,56,220,249
1470 pshufd $19,%xmm14,%xmm9
1471 pxor %xmm14,%xmm14
1472 movdqa %xmm15,%xmm12
1473 paddq %xmm15,%xmm15
1474 .byte 102,15,56,221,208
1475 pand %xmm8,%xmm9
1476 .byte 102,15,56,221,216
1477 pcmpgtd %xmm15,%xmm14
1478 .byte 102,15,56,221,224
1479 pxor %xmm9,%xmm15
1480 .byte 102,15,56,221,232
1481 .byte 102,15,56,221,240
1482 .byte 102,15,56,221,248
1484 pshufd $19,%xmm14,%xmm9
1485 pxor %xmm14,%xmm14
1486 movdqa %xmm15,%xmm13
1487 paddq %xmm15,%xmm15
1488 xorps 0(%rsp),%xmm2
1489 pand %xmm8,%xmm9
1490 xorps 16(%rsp),%xmm3
1491 pcmpgtd %xmm15,%xmm14
1492 pxor %xmm9,%xmm15
1494 xorps 32(%rsp),%xmm4
1495 movups %xmm2,0(%rsi)
1496 xorps 48(%rsp),%xmm5
1497 movups %xmm3,16(%rsi)
1498 xorps 64(%rsp),%xmm6
1499 movups %xmm4,32(%rsi)
1500 xorps 80(%rsp),%xmm7
1501 movups %xmm5,48(%rsi)
1502 movl %r10d,%eax
1503 movups %xmm6,64(%rsi)
1504 movups %xmm7,80(%rsi)
1505 leaq 96(%rsi),%rsi
1506 subq $96,%rdx
1507 jnc .Lxts_enc_grandloop
1509 leal 3(%rax,%rax,1),%eax
1510 movq %r11,%rcx
1511 movl %eax,%r10d
1513 .Lxts_enc_short:
1514 addq $96,%rdx
1515 jz .Lxts_enc_done
1517 cmpq $32,%rdx
1518 jb .Lxts_enc_one
1519 je .Lxts_enc_two
1521 cmpq $64,%rdx
1522 jb .Lxts_enc_three
1523 je .Lxts_enc_four
1525 pshufd $19,%xmm14,%xmm9
1526 movdqa %xmm15,%xmm14
1527 paddq %xmm15,%xmm15
1528 movdqu (%rdi),%xmm2
1529 pand %xmm8,%xmm9
1530 movdqu 16(%rdi),%xmm3
1531 pxor %xmm9,%xmm15
1533 movdqu 32(%rdi),%xmm4
1534 pxor %xmm10,%xmm2
1535 movdqu 48(%rdi),%xmm5
1536 pxor %xmm11,%xmm3
1537 movdqu 64(%rdi),%xmm6
1538 leaq 80(%rdi),%rdi
1539 pxor %xmm12,%xmm4
1540 pxor %xmm13,%xmm5
1541 pxor %xmm14,%xmm6
1543 call _aesni_encrypt6
1545 xorps %xmm10,%xmm2
1546 movdqa %xmm15,%xmm10
1547 xorps %xmm11,%xmm3
1548 xorps %xmm12,%xmm4
1549 movdqu %xmm2,(%rsi)
1550 xorps %xmm13,%xmm5
1551 movdqu %xmm3,16(%rsi)
1552 xorps %xmm14,%xmm6
1553 movdqu %xmm4,32(%rsi)
1554 movdqu %xmm5,48(%rsi)
1555 movdqu %xmm6,64(%rsi)
1556 leaq 80(%rsi),%rsi
1557 jmp .Lxts_enc_done
1559 .p2align 4
1560 .Lxts_enc_one:
1561 movups (%rdi),%xmm2
1562 leaq 16(%rdi),%rdi
1563 xorps %xmm10,%xmm2
1564 movups (%rcx),%xmm0
1565 movups 16(%rcx),%xmm1
1566 leaq 32(%rcx),%rcx
1567 xorps %xmm0,%xmm2
1568 .Loop_enc1_9:
1569 .byte 102,15,56,220,209
1570 decl %eax
1571 movups (%rcx),%xmm1
1572 leaq 16(%rcx),%rcx
1573 jnz .Loop_enc1_9
1574 .byte 102,15,56,221,209
1575 xorps %xmm10,%xmm2
1576 movdqa %xmm11,%xmm10
1577 movups %xmm2,(%rsi)
1578 leaq 16(%rsi),%rsi
1579 jmp .Lxts_enc_done
1581 .p2align 4
1582 .Lxts_enc_two:
1583 movups (%rdi),%xmm2
1584 movups 16(%rdi),%xmm3
1585 leaq 32(%rdi),%rdi
1586 xorps %xmm10,%xmm2
1587 xorps %xmm11,%xmm3
1589 call _aesni_encrypt3
1591 xorps %xmm10,%xmm2
1592 movdqa %xmm12,%xmm10
1593 xorps %xmm11,%xmm3
1594 movups %xmm2,(%rsi)
1595 movups %xmm3,16(%rsi)
1596 leaq 32(%rsi),%rsi
1597 jmp .Lxts_enc_done
1599 .p2align 4
1600 .Lxts_enc_three:
1601 movups (%rdi),%xmm2
1602 movups 16(%rdi),%xmm3
1603 movups 32(%rdi),%xmm4
1604 leaq 48(%rdi),%rdi
1605 xorps %xmm10,%xmm2
1606 xorps %xmm11,%xmm3
1607 xorps %xmm12,%xmm4
1609 call _aesni_encrypt3
1611 xorps %xmm10,%xmm2
1612 movdqa %xmm13,%xmm10
1613 xorps %xmm11,%xmm3
1614 xorps %xmm12,%xmm4
1615 movups %xmm2,(%rsi)
1616 movups %xmm3,16(%rsi)
1617 movups %xmm4,32(%rsi)
1618 leaq 48(%rsi),%rsi
1619 jmp .Lxts_enc_done
1621 .p2align 4
1622 .Lxts_enc_four:
1623 movups (%rdi),%xmm2
1624 movups 16(%rdi),%xmm3
1625 movups 32(%rdi),%xmm4
1626 xorps %xmm10,%xmm2
1627 movups 48(%rdi),%xmm5
1628 leaq 64(%rdi),%rdi
1629 xorps %xmm11,%xmm3
1630 xorps %xmm12,%xmm4
1631 xorps %xmm13,%xmm5
1633 call _aesni_encrypt4
1635 xorps %xmm10,%xmm2
1636 movdqa %xmm15,%xmm10
1637 xorps %xmm11,%xmm3
1638 xorps %xmm12,%xmm4
1639 movups %xmm2,(%rsi)
1640 xorps %xmm13,%xmm5
1641 movups %xmm3,16(%rsi)
1642 movups %xmm4,32(%rsi)
1643 movups %xmm5,48(%rsi)
1644 leaq 64(%rsi),%rsi
1645 jmp .Lxts_enc_done
1647 .p2align 4
1648 .Lxts_enc_done:
1649 andq $15,%r9
1650 jz .Lxts_enc_ret
1651 movq %r9,%rdx
1653 .Lxts_enc_steal:
1654 movzbl (%rdi),%eax
1655 movzbl -16(%rsi),%ecx
1656 leaq 1(%rdi),%rdi
1657 movb %al,-16(%rsi)
1658 movb %cl,0(%rsi)
1659 leaq 1(%rsi),%rsi
1660 subq $1,%rdx
1661 jnz .Lxts_enc_steal
1663 subq %r9,%rsi
1664 movq %r11,%rcx
1665 movl %r10d,%eax
1667 movups -16(%rsi),%xmm2
1668 xorps %xmm10,%xmm2
1669 movups (%rcx),%xmm0
1670 movups 16(%rcx),%xmm1
1671 leaq 32(%rcx),%rcx
1672 xorps %xmm0,%xmm2
1673 .Loop_enc1_10:
1674 .byte 102,15,56,220,209
1675 decl %eax
1676 movups (%rcx),%xmm1
1677 leaq 16(%rcx),%rcx
1678 jnz .Loop_enc1_10
1679 .byte 102,15,56,221,209
1680 xorps %xmm10,%xmm2
1681 movups %xmm2,-16(%rsi)
1683 .Lxts_enc_ret:
1684 movaps 96(%rsp),%xmm6
1685 movaps 112(%rsp),%xmm7
1686 movaps 128(%rsp),%xmm8
1687 movaps 144(%rsp),%xmm9
1688 movaps 160(%rsp),%xmm10
1689 movaps 176(%rsp),%xmm11
1690 movaps 192(%rsp),%xmm12
1691 movaps 208(%rsp),%xmm13
1692 movaps 224(%rsp),%xmm14
1693 movaps 240(%rsp),%xmm15
1694 leaq 264(%rsp),%rsp
1695 .Lxts_enc_epilogue:
1696 movq 8(%rsp),%rdi
1697 movq 16(%rsp),%rsi
1698 .byte 0xf3,0xc3
1699 .LSEH_end_aesni_xts_encrypt:
1700 .globl aesni_xts_decrypt
1701 .def aesni_xts_decrypt; .scl 2; .type 32; .endef
1702 .p2align 4
1703 aesni_xts_decrypt:
1704 movq %rdi,8(%rsp)
1705 movq %rsi,16(%rsp)
1706 movq %rsp,%rax
1707 .LSEH_begin_aesni_xts_decrypt:
1708 movq %rcx,%rdi
1709 movq %rdx,%rsi
1710 movq %r8,%rdx
1711 movq %r9,%rcx
1712 movq 40(%rsp),%r8
1713 movq 48(%rsp),%r9
1715 leaq -264(%rsp),%rsp
1716 movaps %xmm6,96(%rsp)
1717 movaps %xmm7,112(%rsp)
1718 movaps %xmm8,128(%rsp)
1719 movaps %xmm9,144(%rsp)
1720 movaps %xmm10,160(%rsp)
1721 movaps %xmm11,176(%rsp)
1722 movaps %xmm12,192(%rsp)
1723 movaps %xmm13,208(%rsp)
1724 movaps %xmm14,224(%rsp)
1725 movaps %xmm15,240(%rsp)
1726 .Lxts_dec_body:
1727 movups (%r9),%xmm15
1728 movl 240(%r8),%eax
1729 movl 240(%rcx),%r10d
1730 movups (%r8),%xmm0
1731 movups 16(%r8),%xmm1
1732 leaq 32(%r8),%r8
1733 xorps %xmm0,%xmm15
1734 .Loop_enc1_11:
1735 .byte 102,68,15,56,220,249
1736 decl %eax
1737 movups (%r8),%xmm1
1738 leaq 16(%r8),%r8
1739 jnz .Loop_enc1_11
1740 .byte 102,68,15,56,221,249
1741 xorl %eax,%eax
1742 testq $15,%rdx
1743 setnz %al
1744 shlq $4,%rax
1745 subq %rax,%rdx
1747 movq %rcx,%r11
1748 movl %r10d,%eax
1749 movq %rdx,%r9
1750 andq $-16,%rdx
1752 movdqa .Lxts_magic(%rip),%xmm8
1753 pxor %xmm14,%xmm14
1754 pcmpgtd %xmm15,%xmm14
1755 pshufd $19,%xmm14,%xmm9
1756 pxor %xmm14,%xmm14
1757 movdqa %xmm15,%xmm10
1758 paddq %xmm15,%xmm15
1759 pand %xmm8,%xmm9
1760 pcmpgtd %xmm15,%xmm14
1761 pxor %xmm9,%xmm15
1762 pshufd $19,%xmm14,%xmm9
1763 pxor %xmm14,%xmm14
1764 movdqa %xmm15,%xmm11
1765 paddq %xmm15,%xmm15
1766 pand %xmm8,%xmm9
1767 pcmpgtd %xmm15,%xmm14
1768 pxor %xmm9,%xmm15
1769 pshufd $19,%xmm14,%xmm9
1770 pxor %xmm14,%xmm14
1771 movdqa %xmm15,%xmm12
1772 paddq %xmm15,%xmm15
1773 pand %xmm8,%xmm9
1774 pcmpgtd %xmm15,%xmm14
1775 pxor %xmm9,%xmm15
1776 pshufd $19,%xmm14,%xmm9
1777 pxor %xmm14,%xmm14
1778 movdqa %xmm15,%xmm13
1779 paddq %xmm15,%xmm15
1780 pand %xmm8,%xmm9
1781 pcmpgtd %xmm15,%xmm14
1782 pxor %xmm9,%xmm15
1783 subq $96,%rdx
1784 jc .Lxts_dec_short
1786 shrl $1,%eax
1787 subl $1,%eax
1788 movl %eax,%r10d
1789 jmp .Lxts_dec_grandloop
1791 .p2align 4
1792 .Lxts_dec_grandloop:
1793 pshufd $19,%xmm14,%xmm9
1794 movdqa %xmm15,%xmm14
1795 paddq %xmm15,%xmm15
1796 movdqu 0(%rdi),%xmm2
1797 pand %xmm8,%xmm9
1798 movdqu 16(%rdi),%xmm3
1799 pxor %xmm9,%xmm15
1801 movdqu 32(%rdi),%xmm4
1802 pxor %xmm10,%xmm2
1803 movdqu 48(%rdi),%xmm5
1804 pxor %xmm11,%xmm3
1805 movdqu 64(%rdi),%xmm6
1806 pxor %xmm12,%xmm4
1807 movdqu 80(%rdi),%xmm7
1808 leaq 96(%rdi),%rdi
1809 pxor %xmm13,%xmm5
1810 movups (%r11),%xmm0
1811 pxor %xmm14,%xmm6
1812 pxor %xmm15,%xmm7
1816 movups 16(%r11),%xmm1
1817 pxor %xmm0,%xmm2
1818 pxor %xmm0,%xmm3
1819 movdqa %xmm10,0(%rsp)
1820 .byte 102,15,56,222,209
1821 leaq 32(%r11),%rcx
1822 pxor %xmm0,%xmm4
1823 movdqa %xmm11,16(%rsp)
1824 .byte 102,15,56,222,217
1825 pxor %xmm0,%xmm5
1826 movdqa %xmm12,32(%rsp)
1827 .byte 102,15,56,222,225
1828 pxor %xmm0,%xmm6
1829 movdqa %xmm13,48(%rsp)
1830 .byte 102,15,56,222,233
1831 pxor %xmm0,%xmm7
1832 movups (%rcx),%xmm0
1833 decl %eax
1834 movdqa %xmm14,64(%rsp)
1835 .byte 102,15,56,222,241
1836 movdqa %xmm15,80(%rsp)
1837 .byte 102,15,56,222,249
1838 pxor %xmm14,%xmm14
1839 pcmpgtd %xmm15,%xmm14
1840 jmp .Lxts_dec_loop6_enter
1842 .p2align 4
1843 .Lxts_dec_loop6:
1844 .byte 102,15,56,222,209
1845 .byte 102,15,56,222,217
1846 decl %eax
1847 .byte 102,15,56,222,225
1848 .byte 102,15,56,222,233
1849 .byte 102,15,56,222,241
1850 .byte 102,15,56,222,249
1851 .Lxts_dec_loop6_enter:
1852 movups 16(%rcx),%xmm1
1853 .byte 102,15,56,222,208
1854 .byte 102,15,56,222,216
1855 leaq 32(%rcx),%rcx
1856 .byte 102,15,56,222,224
1857 .byte 102,15,56,222,232
1858 .byte 102,15,56,222,240
1859 .byte 102,15,56,222,248
1860 movups (%rcx),%xmm0
1861 jnz .Lxts_dec_loop6
1863 pshufd $19,%xmm14,%xmm9
1864 pxor %xmm14,%xmm14
1865 paddq %xmm15,%xmm15
1866 .byte 102,15,56,222,209
1867 pand %xmm8,%xmm9
1868 .byte 102,15,56,222,217
1869 pcmpgtd %xmm15,%xmm14
1870 .byte 102,15,56,222,225
1871 pxor %xmm9,%xmm15
1872 .byte 102,15,56,222,233
1873 .byte 102,15,56,222,241
1874 .byte 102,15,56,222,249
1875 movups 16(%rcx),%xmm1
1877 pshufd $19,%xmm14,%xmm9
1878 pxor %xmm14,%xmm14
1879 movdqa %xmm15,%xmm10
1880 paddq %xmm15,%xmm15
1881 .byte 102,15,56,222,208
1882 pand %xmm8,%xmm9
1883 .byte 102,15,56,222,216
1884 pcmpgtd %xmm15,%xmm14
1885 .byte 102,15,56,222,224
1886 pxor %xmm9,%xmm15
1887 .byte 102,15,56,222,232
1888 .byte 102,15,56,222,240
1889 .byte 102,15,56,222,248
1890 movups 32(%rcx),%xmm0
1892 pshufd $19,%xmm14,%xmm9
1893 pxor %xmm14,%xmm14
1894 movdqa %xmm15,%xmm11
1895 paddq %xmm15,%xmm15
1896 .byte 102,15,56,222,209
1897 pand %xmm8,%xmm9
1898 .byte 102,15,56,222,217
1899 pcmpgtd %xmm15,%xmm14
1900 .byte 102,15,56,222,225
1901 pxor %xmm9,%xmm15
1902 .byte 102,15,56,222,233
1903 .byte 102,15,56,222,241
1904 .byte 102,15,56,222,249
1906 pshufd $19,%xmm14,%xmm9
1907 pxor %xmm14,%xmm14
1908 movdqa %xmm15,%xmm12
1909 paddq %xmm15,%xmm15
1910 .byte 102,15,56,223,208
1911 pand %xmm8,%xmm9
1912 .byte 102,15,56,223,216
1913 pcmpgtd %xmm15,%xmm14
1914 .byte 102,15,56,223,224
1915 pxor %xmm9,%xmm15
1916 .byte 102,15,56,223,232
1917 .byte 102,15,56,223,240
1918 .byte 102,15,56,223,248
1920 pshufd $19,%xmm14,%xmm9
1921 pxor %xmm14,%xmm14
1922 movdqa %xmm15,%xmm13
1923 paddq %xmm15,%xmm15
1924 xorps 0(%rsp),%xmm2
1925 pand %xmm8,%xmm9
1926 xorps 16(%rsp),%xmm3
1927 pcmpgtd %xmm15,%xmm14
1928 pxor %xmm9,%xmm15
1930 xorps 32(%rsp),%xmm4
1931 movups %xmm2,0(%rsi)
1932 xorps 48(%rsp),%xmm5
1933 movups %xmm3,16(%rsi)
1934 xorps 64(%rsp),%xmm6
1935 movups %xmm4,32(%rsi)
1936 xorps 80(%rsp),%xmm7
1937 movups %xmm5,48(%rsi)
1938 movl %r10d,%eax
1939 movups %xmm6,64(%rsi)
1940 movups %xmm7,80(%rsi)
1941 leaq 96(%rsi),%rsi
1942 subq $96,%rdx
1943 jnc .Lxts_dec_grandloop
1945 leal 3(%rax,%rax,1),%eax
1946 movq %r11,%rcx
1947 movl %eax,%r10d
1949 .Lxts_dec_short:
1950 addq $96,%rdx
1951 jz .Lxts_dec_done
1953 cmpq $32,%rdx
1954 jb .Lxts_dec_one
1955 je .Lxts_dec_two
1957 cmpq $64,%rdx
1958 jb .Lxts_dec_three
1959 je .Lxts_dec_four
1961 pshufd $19,%xmm14,%xmm9
1962 movdqa %xmm15,%xmm14
1963 paddq %xmm15,%xmm15
1964 movdqu (%rdi),%xmm2
1965 pand %xmm8,%xmm9
1966 movdqu 16(%rdi),%xmm3
1967 pxor %xmm9,%xmm15
1969 movdqu 32(%rdi),%xmm4
1970 pxor %xmm10,%xmm2
1971 movdqu 48(%rdi),%xmm5
1972 pxor %xmm11,%xmm3
1973 movdqu 64(%rdi),%xmm6
1974 leaq 80(%rdi),%rdi
1975 pxor %xmm12,%xmm4
1976 pxor %xmm13,%xmm5
1977 pxor %xmm14,%xmm6
1979 call _aesni_decrypt6
1981 xorps %xmm10,%xmm2
1982 xorps %xmm11,%xmm3
1983 xorps %xmm12,%xmm4
1984 movdqu %xmm2,(%rsi)
1985 xorps %xmm13,%xmm5
1986 movdqu %xmm3,16(%rsi)
1987 xorps %xmm14,%xmm6
1988 movdqu %xmm4,32(%rsi)
1989 pxor %xmm14,%xmm14
1990 movdqu %xmm5,48(%rsi)
1991 pcmpgtd %xmm15,%xmm14
1992 movdqu %xmm6,64(%rsi)
1993 leaq 80(%rsi),%rsi
1994 pshufd $19,%xmm14,%xmm11
1995 andq $15,%r9
1996 jz .Lxts_dec_ret
1998 movdqa %xmm15,%xmm10
1999 paddq %xmm15,%xmm15
2000 pand %xmm8,%xmm11
2001 pxor %xmm15,%xmm11
2002 jmp .Lxts_dec_done2
2004 .p2align 4
2005 .Lxts_dec_one:
2006 movups (%rdi),%xmm2
2007 leaq 16(%rdi),%rdi
2008 xorps %xmm10,%xmm2
2009 movups (%rcx),%xmm0
2010 movups 16(%rcx),%xmm1
2011 leaq 32(%rcx),%rcx
2012 xorps %xmm0,%xmm2
2013 .Loop_dec1_12:
2014 .byte 102,15,56,222,209
2015 decl %eax
2016 movups (%rcx),%xmm1
2017 leaq 16(%rcx),%rcx
2018 jnz .Loop_dec1_12
2019 .byte 102,15,56,223,209
2020 xorps %xmm10,%xmm2
2021 movdqa %xmm11,%xmm10
2022 movups %xmm2,(%rsi)
2023 movdqa %xmm12,%xmm11
2024 leaq 16(%rsi),%rsi
2025 jmp .Lxts_dec_done
2027 .p2align 4
2028 .Lxts_dec_two:
2029 movups (%rdi),%xmm2
2030 movups 16(%rdi),%xmm3
2031 leaq 32(%rdi),%rdi
2032 xorps %xmm10,%xmm2
2033 xorps %xmm11,%xmm3
2035 call _aesni_decrypt3
2037 xorps %xmm10,%xmm2
2038 movdqa %xmm12,%xmm10
2039 xorps %xmm11,%xmm3
2040 movdqa %xmm13,%xmm11
2041 movups %xmm2,(%rsi)
2042 movups %xmm3,16(%rsi)
2043 leaq 32(%rsi),%rsi
2044 jmp .Lxts_dec_done
2046 .p2align 4
2047 .Lxts_dec_three:
2048 movups (%rdi),%xmm2
2049 movups 16(%rdi),%xmm3
2050 movups 32(%rdi),%xmm4
2051 leaq 48(%rdi),%rdi
2052 xorps %xmm10,%xmm2
2053 xorps %xmm11,%xmm3
2054 xorps %xmm12,%xmm4
2056 call _aesni_decrypt3
2058 xorps %xmm10,%xmm2
2059 movdqa %xmm13,%xmm10
2060 xorps %xmm11,%xmm3
2061 movdqa %xmm15,%xmm11
2062 xorps %xmm12,%xmm4
2063 movups %xmm2,(%rsi)
2064 movups %xmm3,16(%rsi)
2065 movups %xmm4,32(%rsi)
2066 leaq 48(%rsi),%rsi
2067 jmp .Lxts_dec_done
2069 .p2align 4
2070 .Lxts_dec_four:
2071 pshufd $19,%xmm14,%xmm9
2072 movdqa %xmm15,%xmm14
2073 paddq %xmm15,%xmm15
2074 movups (%rdi),%xmm2
2075 pand %xmm8,%xmm9
2076 movups 16(%rdi),%xmm3
2077 pxor %xmm9,%xmm15
2079 movups 32(%rdi),%xmm4
2080 xorps %xmm10,%xmm2
2081 movups 48(%rdi),%xmm5
2082 leaq 64(%rdi),%rdi
2083 xorps %xmm11,%xmm3
2084 xorps %xmm12,%xmm4
2085 xorps %xmm13,%xmm5
2087 call _aesni_decrypt4
2089 xorps %xmm10,%xmm2
2090 movdqa %xmm14,%xmm10
2091 xorps %xmm11,%xmm3
2092 movdqa %xmm15,%xmm11
2093 xorps %xmm12,%xmm4
2094 movups %xmm2,(%rsi)
2095 xorps %xmm13,%xmm5
2096 movups %xmm3,16(%rsi)
2097 movups %xmm4,32(%rsi)
2098 movups %xmm5,48(%rsi)
2099 leaq 64(%rsi),%rsi
2100 jmp .Lxts_dec_done
2102 .p2align 4
2103 .Lxts_dec_done:
2104 andq $15,%r9
2105 jz .Lxts_dec_ret
2106 .Lxts_dec_done2:
2107 movq %r9,%rdx
2108 movq %r11,%rcx
2109 movl %r10d,%eax
2111 movups (%rdi),%xmm2
2112 xorps %xmm11,%xmm2
2113 movups (%rcx),%xmm0
2114 movups 16(%rcx),%xmm1
2115 leaq 32(%rcx),%rcx
2116 xorps %xmm0,%xmm2
2117 .Loop_dec1_13:
2118 .byte 102,15,56,222,209
2119 decl %eax
2120 movups (%rcx),%xmm1
2121 leaq 16(%rcx),%rcx
2122 jnz .Loop_dec1_13
2123 .byte 102,15,56,223,209
2124 xorps %xmm11,%xmm2
2125 movups %xmm2,(%rsi)
2127 .Lxts_dec_steal:
2128 movzbl 16(%rdi),%eax
2129 movzbl (%rsi),%ecx
2130 leaq 1(%rdi),%rdi
2131 movb %al,(%rsi)
2132 movb %cl,16(%rsi)
2133 leaq 1(%rsi),%rsi
2134 subq $1,%rdx
2135 jnz .Lxts_dec_steal
2137 subq %r9,%rsi
2138 movq %r11,%rcx
2139 movl %r10d,%eax
2141 movups (%rsi),%xmm2
2142 xorps %xmm10,%xmm2
2143 movups (%rcx),%xmm0
2144 movups 16(%rcx),%xmm1
2145 leaq 32(%rcx),%rcx
2146 xorps %xmm0,%xmm2
2147 .Loop_dec1_14:
2148 .byte 102,15,56,222,209
2149 decl %eax
2150 movups (%rcx),%xmm1
2151 leaq 16(%rcx),%rcx
2152 jnz .Loop_dec1_14
2153 .byte 102,15,56,223,209
2154 xorps %xmm10,%xmm2
2155 movups %xmm2,(%rsi)
2157 .Lxts_dec_ret:
2158 movaps 96(%rsp),%xmm6
2159 movaps 112(%rsp),%xmm7
2160 movaps 128(%rsp),%xmm8
2161 movaps 144(%rsp),%xmm9
2162 movaps 160(%rsp),%xmm10
2163 movaps 176(%rsp),%xmm11
2164 movaps 192(%rsp),%xmm12
2165 movaps 208(%rsp),%xmm13
2166 movaps 224(%rsp),%xmm14
2167 movaps 240(%rsp),%xmm15
2168 leaq 264(%rsp),%rsp
2169 .Lxts_dec_epilogue:
2170 movq 8(%rsp),%rdi
2171 movq 16(%rsp),%rsi
2172 .byte 0xf3,0xc3
2173 .LSEH_end_aesni_xts_decrypt:
2174 .globl aesni_cbc_encrypt
2175 .def aesni_cbc_encrypt; .scl 2; .type 32; .endef
2176 .p2align 4
2177 aesni_cbc_encrypt:
2178 movq %rdi,8(%rsp)
2179 movq %rsi,16(%rsp)
2180 movq %rsp,%rax
2181 .LSEH_begin_aesni_cbc_encrypt:
2182 movq %rcx,%rdi
2183 movq %rdx,%rsi
2184 movq %r8,%rdx
2185 movq %r9,%rcx
2186 movq 40(%rsp),%r8
2187 movq 48(%rsp),%r9
2189 testq %rdx,%rdx
2190 jz .Lcbc_ret
2192 movl 240(%rcx),%r10d
2193 movq %rcx,%r11
2194 testl %r9d,%r9d
2195 jz .Lcbc_decrypt
2197 movups (%r8),%xmm2
2198 movl %r10d,%eax
2199 cmpq $16,%rdx
2200 jb .Lcbc_enc_tail
2201 subq $16,%rdx
2202 jmp .Lcbc_enc_loop
2203 .p2align 4
2204 .Lcbc_enc_loop:
2205 movups (%rdi),%xmm3
2206 leaq 16(%rdi),%rdi
2208 movups (%rcx),%xmm0
2209 movups 16(%rcx),%xmm1
2210 xorps %xmm0,%xmm3
2211 leaq 32(%rcx),%rcx
2212 xorps %xmm3,%xmm2
2213 .Loop_enc1_15:
2214 .byte 102,15,56,220,209
2215 decl %eax
2216 movups (%rcx),%xmm1
2217 leaq 16(%rcx),%rcx
2218 jnz .Loop_enc1_15
2219 .byte 102,15,56,221,209
2220 movl %r10d,%eax
2221 movq %r11,%rcx
2222 movups %xmm2,0(%rsi)
2223 leaq 16(%rsi),%rsi
2224 subq $16,%rdx
2225 jnc .Lcbc_enc_loop
2226 addq $16,%rdx
2227 jnz .Lcbc_enc_tail
2228 movups %xmm2,(%r8)
2229 jmp .Lcbc_ret
2231 .Lcbc_enc_tail:
2232 movq %rdx,%rcx
2233 xchgq %rdi,%rsi
2234 .long 0x9066A4F3
2235 movl $16,%ecx
2236 subq %rdx,%rcx
2237 xorl %eax,%eax
2238 .long 0x9066AAF3
2239 leaq -16(%rdi),%rdi
2240 movl %r10d,%eax
2241 movq %rdi,%rsi
2242 movq %r11,%rcx
2243 xorq %rdx,%rdx
2244 jmp .Lcbc_enc_loop
2246 .p2align 4
2247 .Lcbc_decrypt:
2248 leaq -88(%rsp),%rsp
2249 movaps %xmm6,(%rsp)
2250 movaps %xmm7,16(%rsp)
2251 movaps %xmm8,32(%rsp)
2252 movaps %xmm9,48(%rsp)
2253 .Lcbc_decrypt_body:
2254 movups (%r8),%xmm9
2255 movl %r10d,%eax
2256 cmpq $112,%rdx
2257 jbe .Lcbc_dec_tail
2258 shrl $1,%r10d
2259 subq $112,%rdx
2260 movl %r10d,%eax
2261 movaps %xmm9,64(%rsp)
2262 jmp .Lcbc_dec_loop8_enter
2263 .p2align 4
2264 .Lcbc_dec_loop8:
2265 movaps %xmm0,64(%rsp)
2266 movups %xmm9,(%rsi)
2267 leaq 16(%rsi),%rsi
2268 .Lcbc_dec_loop8_enter:
2269 movups (%rcx),%xmm0
2270 movups (%rdi),%xmm2
2271 movups 16(%rdi),%xmm3
2272 movups 16(%rcx),%xmm1
2274 leaq 32(%rcx),%rcx
2275 movdqu 32(%rdi),%xmm4
2276 xorps %xmm0,%xmm2
2277 movdqu 48(%rdi),%xmm5
2278 xorps %xmm0,%xmm3
2279 movdqu 64(%rdi),%xmm6
2280 .byte 102,15,56,222,209
2281 pxor %xmm0,%xmm4
2282 movdqu 80(%rdi),%xmm7
2283 .byte 102,15,56,222,217
2284 pxor %xmm0,%xmm5
2285 movdqu 96(%rdi),%xmm8
2286 .byte 102,15,56,222,225
2287 pxor %xmm0,%xmm6
2288 movdqu 112(%rdi),%xmm9
2289 .byte 102,15,56,222,233
2290 pxor %xmm0,%xmm7
2291 decl %eax
2292 .byte 102,15,56,222,241
2293 pxor %xmm0,%xmm8
2294 .byte 102,15,56,222,249
2295 pxor %xmm0,%xmm9
2296 movups (%rcx),%xmm0
2297 .byte 102,68,15,56,222,193
2298 .byte 102,68,15,56,222,201
2299 movups 16(%rcx),%xmm1
2301 call .Ldec_loop8_enter
2303 movups (%rdi),%xmm1
2304 movups 16(%rdi),%xmm0
2305 xorps 64(%rsp),%xmm2
2306 xorps %xmm1,%xmm3
2307 movups 32(%rdi),%xmm1
2308 xorps %xmm0,%xmm4
2309 movups 48(%rdi),%xmm0
2310 xorps %xmm1,%xmm5
2311 movups 64(%rdi),%xmm1
2312 xorps %xmm0,%xmm6
2313 movups 80(%rdi),%xmm0
2314 xorps %xmm1,%xmm7
2315 movups 96(%rdi),%xmm1
2316 xorps %xmm0,%xmm8
2317 movups 112(%rdi),%xmm0
2318 xorps %xmm1,%xmm9
2319 movups %xmm2,(%rsi)
2320 movups %xmm3,16(%rsi)
2321 movups %xmm4,32(%rsi)
2322 movups %xmm5,48(%rsi)
2323 movl %r10d,%eax
2324 movups %xmm6,64(%rsi)
2325 movq %r11,%rcx
2326 movups %xmm7,80(%rsi)
2327 leaq 128(%rdi),%rdi
2328 movups %xmm8,96(%rsi)
2329 leaq 112(%rsi),%rsi
2330 subq $128,%rdx
2331 ja .Lcbc_dec_loop8
2333 movaps %xmm9,%xmm2
2334 movaps %xmm0,%xmm9
2335 addq $112,%rdx
2336 jle .Lcbc_dec_tail_collected
2337 movups %xmm2,(%rsi)
2338 leal 1(%r10,%r10,1),%eax
2339 leaq 16(%rsi),%rsi
2340 .Lcbc_dec_tail:
2341 movups (%rdi),%xmm2
2342 movaps %xmm2,%xmm8
2343 cmpq $16,%rdx
2344 jbe .Lcbc_dec_one
2346 movups 16(%rdi),%xmm3
2347 movaps %xmm3,%xmm7
2348 cmpq $32,%rdx
2349 jbe .Lcbc_dec_two
2351 movups 32(%rdi),%xmm4
2352 movaps %xmm4,%xmm6
2353 cmpq $48,%rdx
2354 jbe .Lcbc_dec_three
2356 movups 48(%rdi),%xmm5
2357 cmpq $64,%rdx
2358 jbe .Lcbc_dec_four
2360 movups 64(%rdi),%xmm6
2361 cmpq $80,%rdx
2362 jbe .Lcbc_dec_five
2364 movups 80(%rdi),%xmm7
2365 cmpq $96,%rdx
2366 jbe .Lcbc_dec_six
2368 movups 96(%rdi),%xmm8
2369 movaps %xmm9,64(%rsp)
2370 call _aesni_decrypt8
2371 movups (%rdi),%xmm1
2372 movups 16(%rdi),%xmm0
2373 xorps 64(%rsp),%xmm2
2374 xorps %xmm1,%xmm3
2375 movups 32(%rdi),%xmm1
2376 xorps %xmm0,%xmm4
2377 movups 48(%rdi),%xmm0
2378 xorps %xmm1,%xmm5
2379 movups 64(%rdi),%xmm1
2380 xorps %xmm0,%xmm6
2381 movups 80(%rdi),%xmm0
2382 xorps %xmm1,%xmm7
2383 movups 96(%rdi),%xmm9
2384 xorps %xmm0,%xmm8
2385 movups %xmm2,(%rsi)
2386 movups %xmm3,16(%rsi)
2387 movups %xmm4,32(%rsi)
2388 movups %xmm5,48(%rsi)
2389 movups %xmm6,64(%rsi)
2390 movups %xmm7,80(%rsi)
2391 leaq 96(%rsi),%rsi
2392 movaps %xmm8,%xmm2
2393 subq $112,%rdx
2394 jmp .Lcbc_dec_tail_collected
2395 .p2align 4
2396 .Lcbc_dec_one:
2397 movups (%rcx),%xmm0
2398 movups 16(%rcx),%xmm1
2399 leaq 32(%rcx),%rcx
2400 xorps %xmm0,%xmm2
2401 .Loop_dec1_16:
2402 .byte 102,15,56,222,209
2403 decl %eax
2404 movups (%rcx),%xmm1
2405 leaq 16(%rcx),%rcx
2406 jnz .Loop_dec1_16
2407 .byte 102,15,56,223,209
2408 xorps %xmm9,%xmm2
2409 movaps %xmm8,%xmm9
2410 subq $16,%rdx
2411 jmp .Lcbc_dec_tail_collected
2412 .p2align 4
2413 .Lcbc_dec_two:
2414 xorps %xmm4,%xmm4
2415 call _aesni_decrypt3
2416 xorps %xmm9,%xmm2
2417 xorps %xmm8,%xmm3
2418 movups %xmm2,(%rsi)
2419 movaps %xmm7,%xmm9
2420 movaps %xmm3,%xmm2
2421 leaq 16(%rsi),%rsi
2422 subq $32,%rdx
2423 jmp .Lcbc_dec_tail_collected
2424 .p2align 4
2425 .Lcbc_dec_three:
2426 call _aesni_decrypt3
2427 xorps %xmm9,%xmm2
2428 xorps %xmm8,%xmm3
2429 movups %xmm2,(%rsi)
2430 xorps %xmm7,%xmm4
2431 movups %xmm3,16(%rsi)
2432 movaps %xmm6,%xmm9
2433 movaps %xmm4,%xmm2
2434 leaq 32(%rsi),%rsi
2435 subq $48,%rdx
2436 jmp .Lcbc_dec_tail_collected
2437 .p2align 4
2438 .Lcbc_dec_four:
2439 call _aesni_decrypt4
2440 xorps %xmm9,%xmm2
2441 movups 48(%rdi),%xmm9
2442 xorps %xmm8,%xmm3
2443 movups %xmm2,(%rsi)
2444 xorps %xmm7,%xmm4
2445 movups %xmm3,16(%rsi)
2446 xorps %xmm6,%xmm5
2447 movups %xmm4,32(%rsi)
2448 movaps %xmm5,%xmm2
2449 leaq 48(%rsi),%rsi
2450 subq $64,%rdx
2451 jmp .Lcbc_dec_tail_collected
2452 .p2align 4
2453 .Lcbc_dec_five:
2454 xorps %xmm7,%xmm7
2455 call _aesni_decrypt6
2456 movups 16(%rdi),%xmm1
2457 movups 32(%rdi),%xmm0
2458 xorps %xmm9,%xmm2
2459 xorps %xmm8,%xmm3
2460 xorps %xmm1,%xmm4
2461 movups 48(%rdi),%xmm1
2462 xorps %xmm0,%xmm5
2463 movups 64(%rdi),%xmm9
2464 xorps %xmm1,%xmm6
2465 movups %xmm2,(%rsi)
2466 movups %xmm3,16(%rsi)
2467 movups %xmm4,32(%rsi)
2468 movups %xmm5,48(%rsi)
2469 leaq 64(%rsi),%rsi
2470 movaps %xmm6,%xmm2
2471 subq $80,%rdx
2472 jmp .Lcbc_dec_tail_collected
2473 .p2align 4
2474 .Lcbc_dec_six:
2475 call _aesni_decrypt6
2476 movups 16(%rdi),%xmm1
2477 movups 32(%rdi),%xmm0
2478 xorps %xmm9,%xmm2
2479 xorps %xmm8,%xmm3
2480 xorps %xmm1,%xmm4
2481 movups 48(%rdi),%xmm1
2482 xorps %xmm0,%xmm5
2483 movups 64(%rdi),%xmm0
2484 xorps %xmm1,%xmm6
2485 movups 80(%rdi),%xmm9
2486 xorps %xmm0,%xmm7
2487 movups %xmm2,(%rsi)
2488 movups %xmm3,16(%rsi)
2489 movups %xmm4,32(%rsi)
2490 movups %xmm5,48(%rsi)
2491 movups %xmm6,64(%rsi)
2492 leaq 80(%rsi),%rsi
2493 movaps %xmm7,%xmm2
2494 subq $96,%rdx
2495 jmp .Lcbc_dec_tail_collected
2496 .p2align 4
2497 .Lcbc_dec_tail_collected:
2498 andq $15,%rdx
2499 movups %xmm9,(%r8)
2500 jnz .Lcbc_dec_tail_partial
2501 movups %xmm2,(%rsi)
2502 jmp .Lcbc_dec_ret
2503 .p2align 4
2504 .Lcbc_dec_tail_partial:
2505 movaps %xmm2,64(%rsp)
2506 movq $16,%rcx
2507 movq %rsi,%rdi
2508 subq %rdx,%rcx
2509 leaq 64(%rsp),%rsi
2510 .long 0x9066A4F3
2512 .Lcbc_dec_ret:
2513 movaps (%rsp),%xmm6
2514 movaps 16(%rsp),%xmm7
2515 movaps 32(%rsp),%xmm8
2516 movaps 48(%rsp),%xmm9
2517 leaq 88(%rsp),%rsp
2518 .Lcbc_ret:
2519 movq 8(%rsp),%rdi
2520 movq 16(%rsp),%rsi
2521 .byte 0xf3,0xc3
2522 .LSEH_end_aesni_cbc_encrypt:
2523 .globl aesni_set_decrypt_key
2524 .def aesni_set_decrypt_key; .scl 2; .type 32; .endef
2525 .p2align 4
2526 aesni_set_decrypt_key:
2527 .byte 0x48,0x83,0xEC,0x08
2528 call __aesni_set_encrypt_key
2529 shll $4,%edx
2530 testl %eax,%eax
2531 jnz .Ldec_key_ret
2532 leaq 16(%r8,%rdx,1),%rcx
2534 movups (%r8),%xmm0
2535 movups (%rcx),%xmm1
2536 movups %xmm0,(%rcx)
2537 movups %xmm1,(%r8)
2538 leaq 16(%r8),%r8
2539 leaq -16(%rcx),%rcx
2541 .Ldec_key_inverse:
2542 movups (%r8),%xmm0
2543 movups (%rcx),%xmm1
2544 .byte 102,15,56,219,192
2545 .byte 102,15,56,219,201
2546 leaq 16(%r8),%r8
2547 leaq -16(%rcx),%rcx
2548 movups %xmm0,16(%rcx)
2549 movups %xmm1,-16(%r8)
2550 cmpq %r8,%rcx
2551 ja .Ldec_key_inverse
2553 movups (%r8),%xmm0
2554 .byte 102,15,56,219,192
2555 movups %xmm0,(%rcx)
2556 .Ldec_key_ret:
2557 addq $8,%rsp
2558 .byte 0xf3,0xc3
2559 .LSEH_end_set_decrypt_key:
2561 .globl aesni_set_encrypt_key
2562 .def aesni_set_encrypt_key; .scl 2; .type 32; .endef
2563 .p2align 4
2564 aesni_set_encrypt_key:
2565 __aesni_set_encrypt_key:
2566 .byte 0x48,0x83,0xEC,0x08
2567 movq $-1,%rax
2568 testq %rcx,%rcx
2569 jz .Lenc_key_ret
2570 testq %r8,%r8
2571 jz .Lenc_key_ret
2573 movups (%rcx),%xmm0
2574 xorps %xmm4,%xmm4
2575 leaq 16(%r8),%rax
2576 cmpl $256,%edx
2577 je .L14rounds
2578 cmpl $192,%edx
2579 je .L12rounds
2580 cmpl $128,%edx
2581 jne .Lbad_keybits
2583 .L10rounds:
2584 movl $9,%edx
2585 movups %xmm0,(%r8)
2586 .byte 102,15,58,223,200,1
2587 call .Lkey_expansion_128_cold
2588 .byte 102,15,58,223,200,2
2589 call .Lkey_expansion_128
2590 .byte 102,15,58,223,200,4
2591 call .Lkey_expansion_128
2592 .byte 102,15,58,223,200,8
2593 call .Lkey_expansion_128
2594 .byte 102,15,58,223,200,16
2595 call .Lkey_expansion_128
2596 .byte 102,15,58,223,200,32
2597 call .Lkey_expansion_128
2598 .byte 102,15,58,223,200,64
2599 call .Lkey_expansion_128
2600 .byte 102,15,58,223,200,128
2601 call .Lkey_expansion_128
2602 .byte 102,15,58,223,200,27
2603 call .Lkey_expansion_128
2604 .byte 102,15,58,223,200,54
2605 call .Lkey_expansion_128
2606 movups %xmm0,(%rax)
2607 movl %edx,80(%rax)
2608 xorl %eax,%eax
2609 jmp .Lenc_key_ret
2611 .p2align 4
2612 .L12rounds:
2613 movq 16(%rcx),%xmm2
2614 movl $11,%edx
2615 movups %xmm0,(%r8)
2616 .byte 102,15,58,223,202,1
2617 call .Lkey_expansion_192a_cold
2618 .byte 102,15,58,223,202,2
2619 call .Lkey_expansion_192b
2620 .byte 102,15,58,223,202,4
2621 call .Lkey_expansion_192a
2622 .byte 102,15,58,223,202,8
2623 call .Lkey_expansion_192b
2624 .byte 102,15,58,223,202,16
2625 call .Lkey_expansion_192a
2626 .byte 102,15,58,223,202,32
2627 call .Lkey_expansion_192b
2628 .byte 102,15,58,223,202,64
2629 call .Lkey_expansion_192a
2630 .byte 102,15,58,223,202,128
2631 call .Lkey_expansion_192b
2632 movups %xmm0,(%rax)
2633 movl %edx,48(%rax)
2634 xorq %rax,%rax
2635 jmp .Lenc_key_ret
2637 .p2align 4
2638 .L14rounds:
2639 movups 16(%rcx),%xmm2
2640 movl $13,%edx
2641 leaq 16(%rax),%rax
2642 movups %xmm0,(%r8)
2643 movups %xmm2,16(%r8)
2644 .byte 102,15,58,223,202,1
2645 call .Lkey_expansion_256a_cold
2646 .byte 102,15,58,223,200,1
2647 call .Lkey_expansion_256b
2648 .byte 102,15,58,223,202,2
2649 call .Lkey_expansion_256a
2650 .byte 102,15,58,223,200,2
2651 call .Lkey_expansion_256b
2652 .byte 102,15,58,223,202,4
2653 call .Lkey_expansion_256a
2654 .byte 102,15,58,223,200,4
2655 call .Lkey_expansion_256b
2656 .byte 102,15,58,223,202,8
2657 call .Lkey_expansion_256a
2658 .byte 102,15,58,223,200,8
2659 call .Lkey_expansion_256b
2660 .byte 102,15,58,223,202,16
2661 call .Lkey_expansion_256a
2662 .byte 102,15,58,223,200,16
2663 call .Lkey_expansion_256b
2664 .byte 102,15,58,223,202,32
2665 call .Lkey_expansion_256a
2666 .byte 102,15,58,223,200,32
2667 call .Lkey_expansion_256b
2668 .byte 102,15,58,223,202,64
2669 call .Lkey_expansion_256a
2670 movups %xmm0,(%rax)
2671 movl %edx,16(%rax)
2672 xorq %rax,%rax
2673 jmp .Lenc_key_ret
2675 .p2align 4
2676 .Lbad_keybits:
2677 movq $-2,%rax
2678 .Lenc_key_ret:
2679 addq $8,%rsp
2680 .byte 0xf3,0xc3
2681 .LSEH_end_set_encrypt_key:
2683 .p2align 4
2684 .Lkey_expansion_128:
2685 movups %xmm0,(%rax)
2686 leaq 16(%rax),%rax
2687 .Lkey_expansion_128_cold:
2688 shufps $16,%xmm0,%xmm4
2689 xorps %xmm4,%xmm0
2690 shufps $140,%xmm0,%xmm4
2691 xorps %xmm4,%xmm0
2692 shufps $255,%xmm1,%xmm1
2693 xorps %xmm1,%xmm0
2694 .byte 0xf3,0xc3
2696 .p2align 4
2697 .Lkey_expansion_192a:
2698 movups %xmm0,(%rax)
2699 leaq 16(%rax),%rax
2700 .Lkey_expansion_192a_cold:
2701 movaps %xmm2,%xmm5
2702 .Lkey_expansion_192b_warm:
2703 shufps $16,%xmm0,%xmm4
2704 movdqa %xmm2,%xmm3
2705 xorps %xmm4,%xmm0
2706 shufps $140,%xmm0,%xmm4
2707 pslldq $4,%xmm3
2708 xorps %xmm4,%xmm0
2709 pshufd $85,%xmm1,%xmm1
2710 pxor %xmm3,%xmm2
2711 pxor %xmm1,%xmm0
2712 pshufd $255,%xmm0,%xmm3
2713 pxor %xmm3,%xmm2
2714 .byte 0xf3,0xc3
2716 .p2align 4
2717 .Lkey_expansion_192b:
2718 movaps %xmm0,%xmm3
2719 shufps $68,%xmm0,%xmm5
2720 movups %xmm5,(%rax)
2721 shufps $78,%xmm2,%xmm3
2722 movups %xmm3,16(%rax)
2723 leaq 32(%rax),%rax
2724 jmp .Lkey_expansion_192b_warm
2726 .p2align 4
2727 .Lkey_expansion_256a:
2728 movups %xmm2,(%rax)
2729 leaq 16(%rax),%rax
2730 .Lkey_expansion_256a_cold:
2731 shufps $16,%xmm0,%xmm4
2732 xorps %xmm4,%xmm0
2733 shufps $140,%xmm0,%xmm4
2734 xorps %xmm4,%xmm0
2735 shufps $255,%xmm1,%xmm1
2736 xorps %xmm1,%xmm0
2737 .byte 0xf3,0xc3
2739 .p2align 4
2740 .Lkey_expansion_256b:
2741 movups %xmm0,(%rax)
2742 leaq 16(%rax),%rax
2744 shufps $16,%xmm2,%xmm4
2745 xorps %xmm4,%xmm2
2746 shufps $140,%xmm2,%xmm4
2747 xorps %xmm4,%xmm2
2748 shufps $170,%xmm1,%xmm1
2749 xorps %xmm1,%xmm2
2750 .byte 0xf3,0xc3
2753 .p2align 6
2754 .Lbswap_mask:
2755 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2756 .Lincrement32:
2757 .long 6,6,6,0
2758 .Lincrement64:
2759 .long 1,0,0,0
2760 .Lxts_magic:
2761 .long 0x87,0,1,0
2763 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2764 .p2align 6
2766 .def ecb_se_handler; .scl 3; .type 32; .endef
2767 .p2align 4
2768 ecb_se_handler:
2769 pushq %rsi
2770 pushq %rdi
2771 pushq %rbx
2772 pushq %rbp
2773 pushq %r12
2774 pushq %r13
2775 pushq %r14
2776 pushq %r15
2777 pushfq
2778 subq $64,%rsp
2780 movq 152(%r8),%rax
2782 jmp .Lcommon_seh_tail
2785 .def ccm64_se_handler; .scl 3; .type 32; .endef
2786 .p2align 4
2787 ccm64_se_handler:
2788 pushq %rsi
2789 pushq %rdi
2790 pushq %rbx
2791 pushq %rbp
2792 pushq %r12
2793 pushq %r13
2794 pushq %r14
2795 pushq %r15
2796 pushfq
2797 subq $64,%rsp
2799 movq 120(%r8),%rax
2800 movq 248(%r8),%rbx
2802 movq 8(%r9),%rsi
2803 movq 56(%r9),%r11
2805 movl 0(%r11),%r10d
2806 leaq (%rsi,%r10,1),%r10
2807 cmpq %r10,%rbx
2808 jb .Lcommon_seh_tail
2810 movq 152(%r8),%rax
2812 movl 4(%r11),%r10d
2813 leaq (%rsi,%r10,1),%r10
2814 cmpq %r10,%rbx
2815 jae .Lcommon_seh_tail
2817 leaq 0(%rax),%rsi
2818 leaq 512(%r8),%rdi
2819 movl $8,%ecx
2820 .long 0xa548f3fc
2821 leaq 88(%rax),%rax
2823 jmp .Lcommon_seh_tail
2826 .def ctr32_se_handler; .scl 3; .type 32; .endef
2827 .p2align 4
2828 ctr32_se_handler:
2829 pushq %rsi
2830 pushq %rdi
2831 pushq %rbx
2832 pushq %rbp
2833 pushq %r12
2834 pushq %r13
2835 pushq %r14
2836 pushq %r15
2837 pushfq
2838 subq $64,%rsp
2840 movq 120(%r8),%rax
2841 movq 248(%r8),%rbx
2843 leaq .Lctr32_body(%rip),%r10
2844 cmpq %r10,%rbx
2845 jb .Lcommon_seh_tail
2847 movq 152(%r8),%rax
2849 leaq .Lctr32_ret(%rip),%r10
2850 cmpq %r10,%rbx
2851 jae .Lcommon_seh_tail
2853 leaq 32(%rax),%rsi
2854 leaq 512(%r8),%rdi
2855 movl $20,%ecx
2856 .long 0xa548f3fc
2857 leaq 200(%rax),%rax
2859 jmp .Lcommon_seh_tail
2862 .def xts_se_handler; .scl 3; .type 32; .endef
2863 .p2align 4
2864 xts_se_handler:
2865 pushq %rsi
2866 pushq %rdi
2867 pushq %rbx
2868 pushq %rbp
2869 pushq %r12
2870 pushq %r13
2871 pushq %r14
2872 pushq %r15
2873 pushfq
2874 subq $64,%rsp
2876 movq 120(%r8),%rax
2877 movq 248(%r8),%rbx
2879 movq 8(%r9),%rsi
2880 movq 56(%r9),%r11
2882 movl 0(%r11),%r10d
2883 leaq (%rsi,%r10,1),%r10
2884 cmpq %r10,%rbx
2885 jb .Lcommon_seh_tail
2887 movq 152(%r8),%rax
2889 movl 4(%r11),%r10d
2890 leaq (%rsi,%r10,1),%r10
2891 cmpq %r10,%rbx
2892 jae .Lcommon_seh_tail
2894 leaq 96(%rax),%rsi
2895 leaq 512(%r8),%rdi
2896 movl $20,%ecx
2897 .long 0xa548f3fc
2898 leaq 104+160(%rax),%rax
2900 jmp .Lcommon_seh_tail
2902 .def cbc_se_handler; .scl 3; .type 32; .endef
2903 .p2align 4
2904 cbc_se_handler:
2905 pushq %rsi
2906 pushq %rdi
2907 pushq %rbx
2908 pushq %rbp
2909 pushq %r12
2910 pushq %r13
2911 pushq %r14
2912 pushq %r15
2913 pushfq
2914 subq $64,%rsp
2916 movq 152(%r8),%rax
2917 movq 248(%r8),%rbx
2919 leaq .Lcbc_decrypt(%rip),%r10
2920 cmpq %r10,%rbx
2921 jb .Lcommon_seh_tail
2923 leaq .Lcbc_decrypt_body(%rip),%r10
2924 cmpq %r10,%rbx
2925 jb .Lrestore_cbc_rax
2927 leaq .Lcbc_ret(%rip),%r10
2928 cmpq %r10,%rbx
2929 jae .Lcommon_seh_tail
2931 leaq 0(%rax),%rsi
2932 leaq 512(%r8),%rdi
2933 movl $8,%ecx
2934 .long 0xa548f3fc
2935 leaq 88(%rax),%rax
2936 jmp .Lcommon_seh_tail
2938 .Lrestore_cbc_rax:
2939 movq 120(%r8),%rax
2941 .Lcommon_seh_tail:
2942 movq 8(%rax),%rdi
2943 movq 16(%rax),%rsi
2944 movq %rax,152(%r8)
2945 movq %rsi,168(%r8)
2946 movq %rdi,176(%r8)
2948 movq 40(%r9),%rdi
2949 movq %r8,%rsi
2950 movl $154,%ecx
2951 .long 0xa548f3fc
2953 movq %r9,%rsi
2954 xorq %rcx,%rcx
2955 movq 8(%rsi),%rdx
2956 movq 0(%rsi),%r8
2957 movq 16(%rsi),%r9
2958 movq 40(%rsi),%r10
2959 leaq 56(%rsi),%r11
2960 leaq 24(%rsi),%r12
2961 movq %r10,32(%rsp)
2962 movq %r11,40(%rsp)
2963 movq %r12,48(%rsp)
2964 movq %rcx,56(%rsp)
2965 call *__imp_RtlVirtualUnwind(%rip)
2967 movl $1,%eax
2968 addq $64,%rsp
2969 popfq
2970 popq %r15
2971 popq %r14
2972 popq %r13
2973 popq %r12
2974 popq %rbp
2975 popq %rbx
2976 popq %rdi
2977 popq %rsi
2978 .byte 0xf3,0xc3
2981 .section .pdata
2982 .p2align 2
2983 .rva .LSEH_begin_aesni_ecb_encrypt
2984 .rva .LSEH_end_aesni_ecb_encrypt
2985 .rva .LSEH_info_ecb
2987 .rva .LSEH_begin_aesni_ccm64_encrypt_blocks
2988 .rva .LSEH_end_aesni_ccm64_encrypt_blocks
2989 .rva .LSEH_info_ccm64_enc
2991 .rva .LSEH_begin_aesni_ccm64_decrypt_blocks
2992 .rva .LSEH_end_aesni_ccm64_decrypt_blocks
2993 .rva .LSEH_info_ccm64_dec
2995 .rva .LSEH_begin_aesni_ctr32_encrypt_blocks
2996 .rva .LSEH_end_aesni_ctr32_encrypt_blocks
2997 .rva .LSEH_info_ctr32
2999 .rva .LSEH_begin_aesni_xts_encrypt
3000 .rva .LSEH_end_aesni_xts_encrypt
3001 .rva .LSEH_info_xts_enc
3003 .rva .LSEH_begin_aesni_xts_decrypt
3004 .rva .LSEH_end_aesni_xts_decrypt
3005 .rva .LSEH_info_xts_dec
3006 .rva .LSEH_begin_aesni_cbc_encrypt
3007 .rva .LSEH_end_aesni_cbc_encrypt
3008 .rva .LSEH_info_cbc
3010 .rva aesni_set_decrypt_key
3011 .rva .LSEH_end_set_decrypt_key
3012 .rva .LSEH_info_key
3014 .rva aesni_set_encrypt_key
3015 .rva .LSEH_end_set_encrypt_key
3016 .rva .LSEH_info_key
3017 .section .xdata
3018 .p2align 3
3019 .LSEH_info_ecb:
3020 .byte 9,0,0,0
3021 .rva ecb_se_handler
3022 .LSEH_info_ccm64_enc:
3023 .byte 9,0,0,0
3024 .rva ccm64_se_handler
3025 .rva .Lccm64_enc_body,.Lccm64_enc_ret
3026 .LSEH_info_ccm64_dec:
3027 .byte 9,0,0,0
3028 .rva ccm64_se_handler
3029 .rva .Lccm64_dec_body,.Lccm64_dec_ret
3030 .LSEH_info_ctr32:
3031 .byte 9,0,0,0
3032 .rva ctr32_se_handler
3033 .LSEH_info_xts_enc:
3034 .byte 9,0,0,0
3035 .rva xts_se_handler
3036 .rva .Lxts_enc_body,.Lxts_enc_epilogue
3037 .LSEH_info_xts_dec:
3038 .byte 9,0,0,0
3039 .rva xts_se_handler
3040 .rva .Lxts_dec_body,.Lxts_dec_epilogue
3041 .LSEH_info_cbc:
3042 .byte 9,0,0,0
3043 .rva cbc_se_handler
3044 .LSEH_info_key:
3045 .byte 0x01,0x04,0x01,0x00
3046 .byte 0x04,0x02,0x00,0x00