if_iwm - Recognize IWM_FW_PAGING_BLOCK_CMD wide cmd response correctly.
[dragonfly.git] / lib / libcrypto / asm / aesni-x86_64.s
blob5c801bc53c6b685999b1da84d2e1b8c763a25beb
1 .text
3 .globl aesni_encrypt
4 .type aesni_encrypt,@function
5 .align 16
6 aesni_encrypt:
7 movups (%rdi),%xmm2
8 movl 240(%rdx),%eax
9 movups (%rdx),%xmm0
10 movups 16(%rdx),%xmm1
11 leaq 32(%rdx),%rdx
12 xorps %xmm0,%xmm2
13 .Loop_enc1_1:
14 .byte 102,15,56,220,209
15 decl %eax
16 movups (%rdx),%xmm1
17 leaq 16(%rdx),%rdx
18 jnz .Loop_enc1_1
19 .byte 102,15,56,221,209
20 pxor %xmm0,%xmm0
21 pxor %xmm1,%xmm1
22 movups %xmm2,(%rsi)
23 pxor %xmm2,%xmm2
24 .byte 0xf3,0xc3
25 .size aesni_encrypt,.-aesni_encrypt
27 .globl aesni_decrypt
28 .type aesni_decrypt,@function
29 .align 16
30 aesni_decrypt:
31 movups (%rdi),%xmm2
32 movl 240(%rdx),%eax
33 movups (%rdx),%xmm0
34 movups 16(%rdx),%xmm1
35 leaq 32(%rdx),%rdx
36 xorps %xmm0,%xmm2
37 .Loop_dec1_2:
38 .byte 102,15,56,222,209
39 decl %eax
40 movups (%rdx),%xmm1
41 leaq 16(%rdx),%rdx
42 jnz .Loop_dec1_2
43 .byte 102,15,56,223,209
44 pxor %xmm0,%xmm0
45 pxor %xmm1,%xmm1
46 movups %xmm2,(%rsi)
47 pxor %xmm2,%xmm2
48 .byte 0xf3,0xc3
49 .size aesni_decrypt, .-aesni_decrypt
50 .type _aesni_encrypt2,@function
51 .align 16
52 _aesni_encrypt2:
53 movups (%rcx),%xmm0
54 shll $4,%eax
55 movups 16(%rcx),%xmm1
56 xorps %xmm0,%xmm2
57 xorps %xmm0,%xmm3
58 movups 32(%rcx),%xmm0
59 leaq 32(%rcx,%rax,1),%rcx
60 negq %rax
61 addq $16,%rax
63 .Lenc_loop2:
64 .byte 102,15,56,220,209
65 .byte 102,15,56,220,217
66 movups (%rcx,%rax,1),%xmm1
67 addq $32,%rax
68 .byte 102,15,56,220,208
69 .byte 102,15,56,220,216
70 movups -16(%rcx,%rax,1),%xmm0
71 jnz .Lenc_loop2
73 .byte 102,15,56,220,209
74 .byte 102,15,56,220,217
75 .byte 102,15,56,221,208
76 .byte 102,15,56,221,216
77 .byte 0xf3,0xc3
78 .size _aesni_encrypt2,.-_aesni_encrypt2
79 .type _aesni_decrypt2,@function
80 .align 16
81 _aesni_decrypt2:
82 movups (%rcx),%xmm0
83 shll $4,%eax
84 movups 16(%rcx),%xmm1
85 xorps %xmm0,%xmm2
86 xorps %xmm0,%xmm3
87 movups 32(%rcx),%xmm0
88 leaq 32(%rcx,%rax,1),%rcx
89 negq %rax
90 addq $16,%rax
92 .Ldec_loop2:
93 .byte 102,15,56,222,209
94 .byte 102,15,56,222,217
95 movups (%rcx,%rax,1),%xmm1
96 addq $32,%rax
97 .byte 102,15,56,222,208
98 .byte 102,15,56,222,216
99 movups -16(%rcx,%rax,1),%xmm0
100 jnz .Ldec_loop2
102 .byte 102,15,56,222,209
103 .byte 102,15,56,222,217
104 .byte 102,15,56,223,208
105 .byte 102,15,56,223,216
106 .byte 0xf3,0xc3
107 .size _aesni_decrypt2,.-_aesni_decrypt2
108 .type _aesni_encrypt3,@function
109 .align 16
110 _aesni_encrypt3:
111 movups (%rcx),%xmm0
112 shll $4,%eax
113 movups 16(%rcx),%xmm1
114 xorps %xmm0,%xmm2
115 xorps %xmm0,%xmm3
116 xorps %xmm0,%xmm4
117 movups 32(%rcx),%xmm0
118 leaq 32(%rcx,%rax,1),%rcx
119 negq %rax
120 addq $16,%rax
122 .Lenc_loop3:
123 .byte 102,15,56,220,209
124 .byte 102,15,56,220,217
125 .byte 102,15,56,220,225
126 movups (%rcx,%rax,1),%xmm1
127 addq $32,%rax
128 .byte 102,15,56,220,208
129 .byte 102,15,56,220,216
130 .byte 102,15,56,220,224
131 movups -16(%rcx,%rax,1),%xmm0
132 jnz .Lenc_loop3
134 .byte 102,15,56,220,209
135 .byte 102,15,56,220,217
136 .byte 102,15,56,220,225
137 .byte 102,15,56,221,208
138 .byte 102,15,56,221,216
139 .byte 102,15,56,221,224
140 .byte 0xf3,0xc3
141 .size _aesni_encrypt3,.-_aesni_encrypt3
142 .type _aesni_decrypt3,@function
143 .align 16
144 _aesni_decrypt3:
145 movups (%rcx),%xmm0
146 shll $4,%eax
147 movups 16(%rcx),%xmm1
148 xorps %xmm0,%xmm2
149 xorps %xmm0,%xmm3
150 xorps %xmm0,%xmm4
151 movups 32(%rcx),%xmm0
152 leaq 32(%rcx,%rax,1),%rcx
153 negq %rax
154 addq $16,%rax
156 .Ldec_loop3:
157 .byte 102,15,56,222,209
158 .byte 102,15,56,222,217
159 .byte 102,15,56,222,225
160 movups (%rcx,%rax,1),%xmm1
161 addq $32,%rax
162 .byte 102,15,56,222,208
163 .byte 102,15,56,222,216
164 .byte 102,15,56,222,224
165 movups -16(%rcx,%rax,1),%xmm0
166 jnz .Ldec_loop3
168 .byte 102,15,56,222,209
169 .byte 102,15,56,222,217
170 .byte 102,15,56,222,225
171 .byte 102,15,56,223,208
172 .byte 102,15,56,223,216
173 .byte 102,15,56,223,224
174 .byte 0xf3,0xc3
175 .size _aesni_decrypt3,.-_aesni_decrypt3
176 .type _aesni_encrypt4,@function
177 .align 16
178 _aesni_encrypt4:
179 movups (%rcx),%xmm0
180 shll $4,%eax
181 movups 16(%rcx),%xmm1
182 xorps %xmm0,%xmm2
183 xorps %xmm0,%xmm3
184 xorps %xmm0,%xmm4
185 xorps %xmm0,%xmm5
186 movups 32(%rcx),%xmm0
187 leaq 32(%rcx,%rax,1),%rcx
188 negq %rax
189 .byte 0x0f,0x1f,0x00
190 addq $16,%rax
192 .Lenc_loop4:
193 .byte 102,15,56,220,209
194 .byte 102,15,56,220,217
195 .byte 102,15,56,220,225
196 .byte 102,15,56,220,233
197 movups (%rcx,%rax,1),%xmm1
198 addq $32,%rax
199 .byte 102,15,56,220,208
200 .byte 102,15,56,220,216
201 .byte 102,15,56,220,224
202 .byte 102,15,56,220,232
203 movups -16(%rcx,%rax,1),%xmm0
204 jnz .Lenc_loop4
206 .byte 102,15,56,220,209
207 .byte 102,15,56,220,217
208 .byte 102,15,56,220,225
209 .byte 102,15,56,220,233
210 .byte 102,15,56,221,208
211 .byte 102,15,56,221,216
212 .byte 102,15,56,221,224
213 .byte 102,15,56,221,232
214 .byte 0xf3,0xc3
215 .size _aesni_encrypt4,.-_aesni_encrypt4
216 .type _aesni_decrypt4,@function
217 .align 16
218 _aesni_decrypt4:
219 movups (%rcx),%xmm0
220 shll $4,%eax
221 movups 16(%rcx),%xmm1
222 xorps %xmm0,%xmm2
223 xorps %xmm0,%xmm3
224 xorps %xmm0,%xmm4
225 xorps %xmm0,%xmm5
226 movups 32(%rcx),%xmm0
227 leaq 32(%rcx,%rax,1),%rcx
228 negq %rax
229 .byte 0x0f,0x1f,0x00
230 addq $16,%rax
232 .Ldec_loop4:
233 .byte 102,15,56,222,209
234 .byte 102,15,56,222,217
235 .byte 102,15,56,222,225
236 .byte 102,15,56,222,233
237 movups (%rcx,%rax,1),%xmm1
238 addq $32,%rax
239 .byte 102,15,56,222,208
240 .byte 102,15,56,222,216
241 .byte 102,15,56,222,224
242 .byte 102,15,56,222,232
243 movups -16(%rcx,%rax,1),%xmm0
244 jnz .Ldec_loop4
246 .byte 102,15,56,222,209
247 .byte 102,15,56,222,217
248 .byte 102,15,56,222,225
249 .byte 102,15,56,222,233
250 .byte 102,15,56,223,208
251 .byte 102,15,56,223,216
252 .byte 102,15,56,223,224
253 .byte 102,15,56,223,232
254 .byte 0xf3,0xc3
255 .size _aesni_decrypt4,.-_aesni_decrypt4
256 .type _aesni_encrypt6,@function
257 .align 16
258 _aesni_encrypt6:
259 movups (%rcx),%xmm0
260 shll $4,%eax
261 movups 16(%rcx),%xmm1
262 xorps %xmm0,%xmm2
263 pxor %xmm0,%xmm3
264 pxor %xmm0,%xmm4
265 .byte 102,15,56,220,209
266 leaq 32(%rcx,%rax,1),%rcx
267 negq %rax
268 .byte 102,15,56,220,217
269 pxor %xmm0,%xmm5
270 pxor %xmm0,%xmm6
271 .byte 102,15,56,220,225
272 pxor %xmm0,%xmm7
273 movups (%rcx,%rax,1),%xmm0
274 addq $16,%rax
275 jmp .Lenc_loop6_enter
276 .align 16
277 .Lenc_loop6:
278 .byte 102,15,56,220,209
279 .byte 102,15,56,220,217
280 .byte 102,15,56,220,225
281 .Lenc_loop6_enter:
282 .byte 102,15,56,220,233
283 .byte 102,15,56,220,241
284 .byte 102,15,56,220,249
285 movups (%rcx,%rax,1),%xmm1
286 addq $32,%rax
287 .byte 102,15,56,220,208
288 .byte 102,15,56,220,216
289 .byte 102,15,56,220,224
290 .byte 102,15,56,220,232
291 .byte 102,15,56,220,240
292 .byte 102,15,56,220,248
293 movups -16(%rcx,%rax,1),%xmm0
294 jnz .Lenc_loop6
296 .byte 102,15,56,220,209
297 .byte 102,15,56,220,217
298 .byte 102,15,56,220,225
299 .byte 102,15,56,220,233
300 .byte 102,15,56,220,241
301 .byte 102,15,56,220,249
302 .byte 102,15,56,221,208
303 .byte 102,15,56,221,216
304 .byte 102,15,56,221,224
305 .byte 102,15,56,221,232
306 .byte 102,15,56,221,240
307 .byte 102,15,56,221,248
308 .byte 0xf3,0xc3
309 .size _aesni_encrypt6,.-_aesni_encrypt6
310 .type _aesni_decrypt6,@function
311 .align 16
312 _aesni_decrypt6:
313 movups (%rcx),%xmm0
314 shll $4,%eax
315 movups 16(%rcx),%xmm1
316 xorps %xmm0,%xmm2
317 pxor %xmm0,%xmm3
318 pxor %xmm0,%xmm4
319 .byte 102,15,56,222,209
320 leaq 32(%rcx,%rax,1),%rcx
321 negq %rax
322 .byte 102,15,56,222,217
323 pxor %xmm0,%xmm5
324 pxor %xmm0,%xmm6
325 .byte 102,15,56,222,225
326 pxor %xmm0,%xmm7
327 movups (%rcx,%rax,1),%xmm0
328 addq $16,%rax
329 jmp .Ldec_loop6_enter
330 .align 16
331 .Ldec_loop6:
332 .byte 102,15,56,222,209
333 .byte 102,15,56,222,217
334 .byte 102,15,56,222,225
335 .Ldec_loop6_enter:
336 .byte 102,15,56,222,233
337 .byte 102,15,56,222,241
338 .byte 102,15,56,222,249
339 movups (%rcx,%rax,1),%xmm1
340 addq $32,%rax
341 .byte 102,15,56,222,208
342 .byte 102,15,56,222,216
343 .byte 102,15,56,222,224
344 .byte 102,15,56,222,232
345 .byte 102,15,56,222,240
346 .byte 102,15,56,222,248
347 movups -16(%rcx,%rax,1),%xmm0
348 jnz .Ldec_loop6
350 .byte 102,15,56,222,209
351 .byte 102,15,56,222,217
352 .byte 102,15,56,222,225
353 .byte 102,15,56,222,233
354 .byte 102,15,56,222,241
355 .byte 102,15,56,222,249
356 .byte 102,15,56,223,208
357 .byte 102,15,56,223,216
358 .byte 102,15,56,223,224
359 .byte 102,15,56,223,232
360 .byte 102,15,56,223,240
361 .byte 102,15,56,223,248
362 .byte 0xf3,0xc3
363 .size _aesni_decrypt6,.-_aesni_decrypt6
364 .type _aesni_encrypt8,@function
365 .align 16
366 _aesni_encrypt8:
367 movups (%rcx),%xmm0
368 shll $4,%eax
369 movups 16(%rcx),%xmm1
370 xorps %xmm0,%xmm2
371 xorps %xmm0,%xmm3
372 pxor %xmm0,%xmm4
373 pxor %xmm0,%xmm5
374 pxor %xmm0,%xmm6
375 leaq 32(%rcx,%rax,1),%rcx
376 negq %rax
377 .byte 102,15,56,220,209
378 pxor %xmm0,%xmm7
379 pxor %xmm0,%xmm8
380 .byte 102,15,56,220,217
381 pxor %xmm0,%xmm9
382 movups (%rcx,%rax,1),%xmm0
383 addq $16,%rax
384 jmp .Lenc_loop8_inner
385 .align 16
386 .Lenc_loop8:
387 .byte 102,15,56,220,209
388 .byte 102,15,56,220,217
389 .Lenc_loop8_inner:
390 .byte 102,15,56,220,225
391 .byte 102,15,56,220,233
392 .byte 102,15,56,220,241
393 .byte 102,15,56,220,249
394 .byte 102,68,15,56,220,193
395 .byte 102,68,15,56,220,201
396 .Lenc_loop8_enter:
397 movups (%rcx,%rax,1),%xmm1
398 addq $32,%rax
399 .byte 102,15,56,220,208
400 .byte 102,15,56,220,216
401 .byte 102,15,56,220,224
402 .byte 102,15,56,220,232
403 .byte 102,15,56,220,240
404 .byte 102,15,56,220,248
405 .byte 102,68,15,56,220,192
406 .byte 102,68,15,56,220,200
407 movups -16(%rcx,%rax,1),%xmm0
408 jnz .Lenc_loop8
410 .byte 102,15,56,220,209
411 .byte 102,15,56,220,217
412 .byte 102,15,56,220,225
413 .byte 102,15,56,220,233
414 .byte 102,15,56,220,241
415 .byte 102,15,56,220,249
416 .byte 102,68,15,56,220,193
417 .byte 102,68,15,56,220,201
418 .byte 102,15,56,221,208
419 .byte 102,15,56,221,216
420 .byte 102,15,56,221,224
421 .byte 102,15,56,221,232
422 .byte 102,15,56,221,240
423 .byte 102,15,56,221,248
424 .byte 102,68,15,56,221,192
425 .byte 102,68,15,56,221,200
426 .byte 0xf3,0xc3
427 .size _aesni_encrypt8,.-_aesni_encrypt8
428 .type _aesni_decrypt8,@function
429 .align 16
430 _aesni_decrypt8:
431 movups (%rcx),%xmm0
432 shll $4,%eax
433 movups 16(%rcx),%xmm1
434 xorps %xmm0,%xmm2
435 xorps %xmm0,%xmm3
436 pxor %xmm0,%xmm4
437 pxor %xmm0,%xmm5
438 pxor %xmm0,%xmm6
439 leaq 32(%rcx,%rax,1),%rcx
440 negq %rax
441 .byte 102,15,56,222,209
442 pxor %xmm0,%xmm7
443 pxor %xmm0,%xmm8
444 .byte 102,15,56,222,217
445 pxor %xmm0,%xmm9
446 movups (%rcx,%rax,1),%xmm0
447 addq $16,%rax
448 jmp .Ldec_loop8_inner
449 .align 16
450 .Ldec_loop8:
451 .byte 102,15,56,222,209
452 .byte 102,15,56,222,217
453 .Ldec_loop8_inner:
454 .byte 102,15,56,222,225
455 .byte 102,15,56,222,233
456 .byte 102,15,56,222,241
457 .byte 102,15,56,222,249
458 .byte 102,68,15,56,222,193
459 .byte 102,68,15,56,222,201
460 .Ldec_loop8_enter:
461 movups (%rcx,%rax,1),%xmm1
462 addq $32,%rax
463 .byte 102,15,56,222,208
464 .byte 102,15,56,222,216
465 .byte 102,15,56,222,224
466 .byte 102,15,56,222,232
467 .byte 102,15,56,222,240
468 .byte 102,15,56,222,248
469 .byte 102,68,15,56,222,192
470 .byte 102,68,15,56,222,200
471 movups -16(%rcx,%rax,1),%xmm0
472 jnz .Ldec_loop8
474 .byte 102,15,56,222,209
475 .byte 102,15,56,222,217
476 .byte 102,15,56,222,225
477 .byte 102,15,56,222,233
478 .byte 102,15,56,222,241
479 .byte 102,15,56,222,249
480 .byte 102,68,15,56,222,193
481 .byte 102,68,15,56,222,201
482 .byte 102,15,56,223,208
483 .byte 102,15,56,223,216
484 .byte 102,15,56,223,224
485 .byte 102,15,56,223,232
486 .byte 102,15,56,223,240
487 .byte 102,15,56,223,248
488 .byte 102,68,15,56,223,192
489 .byte 102,68,15,56,223,200
490 .byte 0xf3,0xc3
491 .size _aesni_decrypt8,.-_aesni_decrypt8
492 .globl aesni_ecb_encrypt
493 .type aesni_ecb_encrypt,@function
494 .align 16
495 aesni_ecb_encrypt:
496 andq $-16,%rdx
497 jz .Lecb_ret
499 movl 240(%rcx),%eax
500 movups (%rcx),%xmm0
501 movq %rcx,%r11
502 movl %eax,%r10d
503 testl %r8d,%r8d
504 jz .Lecb_decrypt
506 cmpq $0x80,%rdx
507 jb .Lecb_enc_tail
509 movdqu (%rdi),%xmm2
510 movdqu 16(%rdi),%xmm3
511 movdqu 32(%rdi),%xmm4
512 movdqu 48(%rdi),%xmm5
513 movdqu 64(%rdi),%xmm6
514 movdqu 80(%rdi),%xmm7
515 movdqu 96(%rdi),%xmm8
516 movdqu 112(%rdi),%xmm9
517 leaq 128(%rdi),%rdi
518 subq $0x80,%rdx
519 jmp .Lecb_enc_loop8_enter
520 .align 16
521 .Lecb_enc_loop8:
522 movups %xmm2,(%rsi)
523 movq %r11,%rcx
524 movdqu (%rdi),%xmm2
525 movl %r10d,%eax
526 movups %xmm3,16(%rsi)
527 movdqu 16(%rdi),%xmm3
528 movups %xmm4,32(%rsi)
529 movdqu 32(%rdi),%xmm4
530 movups %xmm5,48(%rsi)
531 movdqu 48(%rdi),%xmm5
532 movups %xmm6,64(%rsi)
533 movdqu 64(%rdi),%xmm6
534 movups %xmm7,80(%rsi)
535 movdqu 80(%rdi),%xmm7
536 movups %xmm8,96(%rsi)
537 movdqu 96(%rdi),%xmm8
538 movups %xmm9,112(%rsi)
539 leaq 128(%rsi),%rsi
540 movdqu 112(%rdi),%xmm9
541 leaq 128(%rdi),%rdi
542 .Lecb_enc_loop8_enter:
544 call _aesni_encrypt8
546 subq $0x80,%rdx
547 jnc .Lecb_enc_loop8
549 movups %xmm2,(%rsi)
550 movq %r11,%rcx
551 movups %xmm3,16(%rsi)
552 movl %r10d,%eax
553 movups %xmm4,32(%rsi)
554 movups %xmm5,48(%rsi)
555 movups %xmm6,64(%rsi)
556 movups %xmm7,80(%rsi)
557 movups %xmm8,96(%rsi)
558 movups %xmm9,112(%rsi)
559 leaq 128(%rsi),%rsi
560 addq $0x80,%rdx
561 jz .Lecb_ret
563 .Lecb_enc_tail:
564 movups (%rdi),%xmm2
565 cmpq $0x20,%rdx
566 jb .Lecb_enc_one
567 movups 16(%rdi),%xmm3
568 je .Lecb_enc_two
569 movups 32(%rdi),%xmm4
570 cmpq $0x40,%rdx
571 jb .Lecb_enc_three
572 movups 48(%rdi),%xmm5
573 je .Lecb_enc_four
574 movups 64(%rdi),%xmm6
575 cmpq $0x60,%rdx
576 jb .Lecb_enc_five
577 movups 80(%rdi),%xmm7
578 je .Lecb_enc_six
579 movdqu 96(%rdi),%xmm8
580 xorps %xmm9,%xmm9
581 call _aesni_encrypt8
582 movups %xmm2,(%rsi)
583 movups %xmm3,16(%rsi)
584 movups %xmm4,32(%rsi)
585 movups %xmm5,48(%rsi)
586 movups %xmm6,64(%rsi)
587 movups %xmm7,80(%rsi)
588 movups %xmm8,96(%rsi)
589 jmp .Lecb_ret
590 .align 16
591 .Lecb_enc_one:
592 movups (%rcx),%xmm0
593 movups 16(%rcx),%xmm1
594 leaq 32(%rcx),%rcx
595 xorps %xmm0,%xmm2
596 .Loop_enc1_3:
597 .byte 102,15,56,220,209
598 decl %eax
599 movups (%rcx),%xmm1
600 leaq 16(%rcx),%rcx
601 jnz .Loop_enc1_3
602 .byte 102,15,56,221,209
603 movups %xmm2,(%rsi)
604 jmp .Lecb_ret
605 .align 16
606 .Lecb_enc_two:
607 call _aesni_encrypt2
608 movups %xmm2,(%rsi)
609 movups %xmm3,16(%rsi)
610 jmp .Lecb_ret
611 .align 16
612 .Lecb_enc_three:
613 call _aesni_encrypt3
614 movups %xmm2,(%rsi)
615 movups %xmm3,16(%rsi)
616 movups %xmm4,32(%rsi)
617 jmp .Lecb_ret
618 .align 16
619 .Lecb_enc_four:
620 call _aesni_encrypt4
621 movups %xmm2,(%rsi)
622 movups %xmm3,16(%rsi)
623 movups %xmm4,32(%rsi)
624 movups %xmm5,48(%rsi)
625 jmp .Lecb_ret
626 .align 16
627 .Lecb_enc_five:
628 xorps %xmm7,%xmm7
629 call _aesni_encrypt6
630 movups %xmm2,(%rsi)
631 movups %xmm3,16(%rsi)
632 movups %xmm4,32(%rsi)
633 movups %xmm5,48(%rsi)
634 movups %xmm6,64(%rsi)
635 jmp .Lecb_ret
636 .align 16
637 .Lecb_enc_six:
638 call _aesni_encrypt6
639 movups %xmm2,(%rsi)
640 movups %xmm3,16(%rsi)
641 movups %xmm4,32(%rsi)
642 movups %xmm5,48(%rsi)
643 movups %xmm6,64(%rsi)
644 movups %xmm7,80(%rsi)
645 jmp .Lecb_ret
647 .align 16
648 .Lecb_decrypt:
649 cmpq $0x80,%rdx
650 jb .Lecb_dec_tail
652 movdqu (%rdi),%xmm2
653 movdqu 16(%rdi),%xmm3
654 movdqu 32(%rdi),%xmm4
655 movdqu 48(%rdi),%xmm5
656 movdqu 64(%rdi),%xmm6
657 movdqu 80(%rdi),%xmm7
658 movdqu 96(%rdi),%xmm8
659 movdqu 112(%rdi),%xmm9
660 leaq 128(%rdi),%rdi
661 subq $0x80,%rdx
662 jmp .Lecb_dec_loop8_enter
663 .align 16
664 .Lecb_dec_loop8:
665 movups %xmm2,(%rsi)
666 movq %r11,%rcx
667 movdqu (%rdi),%xmm2
668 movl %r10d,%eax
669 movups %xmm3,16(%rsi)
670 movdqu 16(%rdi),%xmm3
671 movups %xmm4,32(%rsi)
672 movdqu 32(%rdi),%xmm4
673 movups %xmm5,48(%rsi)
674 movdqu 48(%rdi),%xmm5
675 movups %xmm6,64(%rsi)
676 movdqu 64(%rdi),%xmm6
677 movups %xmm7,80(%rsi)
678 movdqu 80(%rdi),%xmm7
679 movups %xmm8,96(%rsi)
680 movdqu 96(%rdi),%xmm8
681 movups %xmm9,112(%rsi)
682 leaq 128(%rsi),%rsi
683 movdqu 112(%rdi),%xmm9
684 leaq 128(%rdi),%rdi
685 .Lecb_dec_loop8_enter:
687 call _aesni_decrypt8
689 movups (%r11),%xmm0
690 subq $0x80,%rdx
691 jnc .Lecb_dec_loop8
693 movups %xmm2,(%rsi)
694 pxor %xmm2,%xmm2
695 movq %r11,%rcx
696 movups %xmm3,16(%rsi)
697 pxor %xmm3,%xmm3
698 movl %r10d,%eax
699 movups %xmm4,32(%rsi)
700 pxor %xmm4,%xmm4
701 movups %xmm5,48(%rsi)
702 pxor %xmm5,%xmm5
703 movups %xmm6,64(%rsi)
704 pxor %xmm6,%xmm6
705 movups %xmm7,80(%rsi)
706 pxor %xmm7,%xmm7
707 movups %xmm8,96(%rsi)
708 pxor %xmm8,%xmm8
709 movups %xmm9,112(%rsi)
710 pxor %xmm9,%xmm9
711 leaq 128(%rsi),%rsi
712 addq $0x80,%rdx
713 jz .Lecb_ret
715 .Lecb_dec_tail:
716 movups (%rdi),%xmm2
717 cmpq $0x20,%rdx
718 jb .Lecb_dec_one
719 movups 16(%rdi),%xmm3
720 je .Lecb_dec_two
721 movups 32(%rdi),%xmm4
722 cmpq $0x40,%rdx
723 jb .Lecb_dec_three
724 movups 48(%rdi),%xmm5
725 je .Lecb_dec_four
726 movups 64(%rdi),%xmm6
727 cmpq $0x60,%rdx
728 jb .Lecb_dec_five
729 movups 80(%rdi),%xmm7
730 je .Lecb_dec_six
731 movups 96(%rdi),%xmm8
732 movups (%rcx),%xmm0
733 xorps %xmm9,%xmm9
734 call _aesni_decrypt8
735 movups %xmm2,(%rsi)
736 pxor %xmm2,%xmm2
737 movups %xmm3,16(%rsi)
738 pxor %xmm3,%xmm3
739 movups %xmm4,32(%rsi)
740 pxor %xmm4,%xmm4
741 movups %xmm5,48(%rsi)
742 pxor %xmm5,%xmm5
743 movups %xmm6,64(%rsi)
744 pxor %xmm6,%xmm6
745 movups %xmm7,80(%rsi)
746 pxor %xmm7,%xmm7
747 movups %xmm8,96(%rsi)
748 pxor %xmm8,%xmm8
749 pxor %xmm9,%xmm9
750 jmp .Lecb_ret
751 .align 16
752 .Lecb_dec_one:
753 movups (%rcx),%xmm0
754 movups 16(%rcx),%xmm1
755 leaq 32(%rcx),%rcx
756 xorps %xmm0,%xmm2
757 .Loop_dec1_4:
758 .byte 102,15,56,222,209
759 decl %eax
760 movups (%rcx),%xmm1
761 leaq 16(%rcx),%rcx
762 jnz .Loop_dec1_4
763 .byte 102,15,56,223,209
764 movups %xmm2,(%rsi)
765 pxor %xmm2,%xmm2
766 jmp .Lecb_ret
767 .align 16
768 .Lecb_dec_two:
769 call _aesni_decrypt2
770 movups %xmm2,(%rsi)
771 pxor %xmm2,%xmm2
772 movups %xmm3,16(%rsi)
773 pxor %xmm3,%xmm3
774 jmp .Lecb_ret
775 .align 16
776 .Lecb_dec_three:
777 call _aesni_decrypt3
778 movups %xmm2,(%rsi)
779 pxor %xmm2,%xmm2
780 movups %xmm3,16(%rsi)
781 pxor %xmm3,%xmm3
782 movups %xmm4,32(%rsi)
783 pxor %xmm4,%xmm4
784 jmp .Lecb_ret
785 .align 16
786 .Lecb_dec_four:
787 call _aesni_decrypt4
788 movups %xmm2,(%rsi)
789 pxor %xmm2,%xmm2
790 movups %xmm3,16(%rsi)
791 pxor %xmm3,%xmm3
792 movups %xmm4,32(%rsi)
793 pxor %xmm4,%xmm4
794 movups %xmm5,48(%rsi)
795 pxor %xmm5,%xmm5
796 jmp .Lecb_ret
797 .align 16
798 .Lecb_dec_five:
799 xorps %xmm7,%xmm7
800 call _aesni_decrypt6
801 movups %xmm2,(%rsi)
802 pxor %xmm2,%xmm2
803 movups %xmm3,16(%rsi)
804 pxor %xmm3,%xmm3
805 movups %xmm4,32(%rsi)
806 pxor %xmm4,%xmm4
807 movups %xmm5,48(%rsi)
808 pxor %xmm5,%xmm5
809 movups %xmm6,64(%rsi)
810 pxor %xmm6,%xmm6
811 pxor %xmm7,%xmm7
812 jmp .Lecb_ret
813 .align 16
814 .Lecb_dec_six:
815 call _aesni_decrypt6
816 movups %xmm2,(%rsi)
817 pxor %xmm2,%xmm2
818 movups %xmm3,16(%rsi)
819 pxor %xmm3,%xmm3
820 movups %xmm4,32(%rsi)
821 pxor %xmm4,%xmm4
822 movups %xmm5,48(%rsi)
823 pxor %xmm5,%xmm5
824 movups %xmm6,64(%rsi)
825 pxor %xmm6,%xmm6
826 movups %xmm7,80(%rsi)
827 pxor %xmm7,%xmm7
829 .Lecb_ret:
830 xorps %xmm0,%xmm0
831 pxor %xmm1,%xmm1
832 .byte 0xf3,0xc3
833 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt
834 .globl aesni_ccm64_encrypt_blocks
835 .type aesni_ccm64_encrypt_blocks,@function
836 .align 16
837 aesni_ccm64_encrypt_blocks:
838 movl 240(%rcx),%eax
839 movdqu (%r8),%xmm6
840 movdqa .Lincrement64(%rip),%xmm9
841 movdqa .Lbswap_mask(%rip),%xmm7
843 shll $4,%eax
844 movl $16,%r10d
845 leaq 0(%rcx),%r11
846 movdqu (%r9),%xmm3
847 movdqa %xmm6,%xmm2
848 leaq 32(%rcx,%rax,1),%rcx
849 .byte 102,15,56,0,247
850 subq %rax,%r10
851 jmp .Lccm64_enc_outer
852 .align 16
853 .Lccm64_enc_outer:
854 movups (%r11),%xmm0
855 movq %r10,%rax
856 movups (%rdi),%xmm8
858 xorps %xmm0,%xmm2
859 movups 16(%r11),%xmm1
860 xorps %xmm8,%xmm0
861 xorps %xmm0,%xmm3
862 movups 32(%r11),%xmm0
864 .Lccm64_enc2_loop:
865 .byte 102,15,56,220,209
866 .byte 102,15,56,220,217
867 movups (%rcx,%rax,1),%xmm1
868 addq $32,%rax
869 .byte 102,15,56,220,208
870 .byte 102,15,56,220,216
871 movups -16(%rcx,%rax,1),%xmm0
872 jnz .Lccm64_enc2_loop
873 .byte 102,15,56,220,209
874 .byte 102,15,56,220,217
875 paddq %xmm9,%xmm6
876 decq %rdx
877 .byte 102,15,56,221,208
878 .byte 102,15,56,221,216
880 leaq 16(%rdi),%rdi
881 xorps %xmm2,%xmm8
882 movdqa %xmm6,%xmm2
883 movups %xmm8,(%rsi)
884 .byte 102,15,56,0,215
885 leaq 16(%rsi),%rsi
886 jnz .Lccm64_enc_outer
888 pxor %xmm0,%xmm0
889 pxor %xmm1,%xmm1
890 pxor %xmm2,%xmm2
891 movups %xmm3,(%r9)
892 pxor %xmm3,%xmm3
893 pxor %xmm8,%xmm8
894 pxor %xmm6,%xmm6
895 .byte 0xf3,0xc3
896 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
897 .globl aesni_ccm64_decrypt_blocks
898 .type aesni_ccm64_decrypt_blocks,@function
899 .align 16
900 aesni_ccm64_decrypt_blocks:
901 movl 240(%rcx),%eax
902 movups (%r8),%xmm6
903 movdqu (%r9),%xmm3
904 movdqa .Lincrement64(%rip),%xmm9
905 movdqa .Lbswap_mask(%rip),%xmm7
907 movaps %xmm6,%xmm2
908 movl %eax,%r10d
909 movq %rcx,%r11
910 .byte 102,15,56,0,247
911 movups (%rcx),%xmm0
912 movups 16(%rcx),%xmm1
913 leaq 32(%rcx),%rcx
914 xorps %xmm0,%xmm2
915 .Loop_enc1_5:
916 .byte 102,15,56,220,209
917 decl %eax
918 movups (%rcx),%xmm1
919 leaq 16(%rcx),%rcx
920 jnz .Loop_enc1_5
921 .byte 102,15,56,221,209
922 shll $4,%r10d
923 movl $16,%eax
924 movups (%rdi),%xmm8
925 paddq %xmm9,%xmm6
926 leaq 16(%rdi),%rdi
927 subq %r10,%rax
928 leaq 32(%r11,%r10,1),%rcx
929 movq %rax,%r10
930 jmp .Lccm64_dec_outer
931 .align 16
932 .Lccm64_dec_outer:
933 xorps %xmm2,%xmm8
934 movdqa %xmm6,%xmm2
935 movups %xmm8,(%rsi)
936 leaq 16(%rsi),%rsi
937 .byte 102,15,56,0,215
939 subq $1,%rdx
940 jz .Lccm64_dec_break
942 movups (%r11),%xmm0
943 movq %r10,%rax
944 movups 16(%r11),%xmm1
945 xorps %xmm0,%xmm8
946 xorps %xmm0,%xmm2
947 xorps %xmm8,%xmm3
948 movups 32(%r11),%xmm0
949 jmp .Lccm64_dec2_loop
950 .align 16
951 .Lccm64_dec2_loop:
952 .byte 102,15,56,220,209
953 .byte 102,15,56,220,217
954 movups (%rcx,%rax,1),%xmm1
955 addq $32,%rax
956 .byte 102,15,56,220,208
957 .byte 102,15,56,220,216
958 movups -16(%rcx,%rax,1),%xmm0
959 jnz .Lccm64_dec2_loop
960 movups (%rdi),%xmm8
961 paddq %xmm9,%xmm6
962 .byte 102,15,56,220,209
963 .byte 102,15,56,220,217
964 .byte 102,15,56,221,208
965 .byte 102,15,56,221,216
966 leaq 16(%rdi),%rdi
967 jmp .Lccm64_dec_outer
969 .align 16
970 .Lccm64_dec_break:
972 movl 240(%r11),%eax
973 movups (%r11),%xmm0
974 movups 16(%r11),%xmm1
975 xorps %xmm0,%xmm8
976 leaq 32(%r11),%r11
977 xorps %xmm8,%xmm3
978 .Loop_enc1_6:
979 .byte 102,15,56,220,217
980 decl %eax
981 movups (%r11),%xmm1
982 leaq 16(%r11),%r11
983 jnz .Loop_enc1_6
984 .byte 102,15,56,221,217
985 pxor %xmm0,%xmm0
986 pxor %xmm1,%xmm1
987 pxor %xmm2,%xmm2
988 movups %xmm3,(%r9)
989 pxor %xmm3,%xmm3
990 pxor %xmm8,%xmm8
991 pxor %xmm6,%xmm6
992 .byte 0xf3,0xc3
993 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
994 .globl aesni_ctr32_encrypt_blocks
995 .type aesni_ctr32_encrypt_blocks,@function
996 .align 16
997 aesni_ctr32_encrypt_blocks:
998 cmpq $1,%rdx
999 jne .Lctr32_bulk
1003 movups (%r8),%xmm2
1004 movups (%rdi),%xmm3
1005 movl 240(%rcx),%edx
1006 movups (%rcx),%xmm0
1007 movups 16(%rcx),%xmm1
1008 leaq 32(%rcx),%rcx
1009 xorps %xmm0,%xmm2
1010 .Loop_enc1_7:
1011 .byte 102,15,56,220,209
1012 decl %edx
1013 movups (%rcx),%xmm1
1014 leaq 16(%rcx),%rcx
1015 jnz .Loop_enc1_7
1016 .byte 102,15,56,221,209
1017 pxor %xmm0,%xmm0
1018 pxor %xmm1,%xmm1
1019 xorps %xmm3,%xmm2
1020 pxor %xmm3,%xmm3
1021 movups %xmm2,(%rsi)
1022 xorps %xmm2,%xmm2
1023 jmp .Lctr32_epilogue
1025 .align 16
1026 .Lctr32_bulk:
1027 leaq (%rsp),%rax
1028 pushq %rbp
1029 subq $128,%rsp
1030 andq $-16,%rsp
1031 leaq -8(%rax),%rbp
1036 movdqu (%r8),%xmm2
1037 movdqu (%rcx),%xmm0
1038 movl 12(%r8),%r8d
1039 pxor %xmm0,%xmm2
1040 movl 12(%rcx),%r11d
1041 movdqa %xmm2,0(%rsp)
1042 bswapl %r8d
1043 movdqa %xmm2,%xmm3
1044 movdqa %xmm2,%xmm4
1045 movdqa %xmm2,%xmm5
1046 movdqa %xmm2,64(%rsp)
1047 movdqa %xmm2,80(%rsp)
1048 movdqa %xmm2,96(%rsp)
1049 movq %rdx,%r10
1050 movdqa %xmm2,112(%rsp)
1052 leaq 1(%r8),%rax
1053 leaq 2(%r8),%rdx
1054 bswapl %eax
1055 bswapl %edx
1056 xorl %r11d,%eax
1057 xorl %r11d,%edx
1058 .byte 102,15,58,34,216,3
1059 leaq 3(%r8),%rax
1060 movdqa %xmm3,16(%rsp)
1061 .byte 102,15,58,34,226,3
1062 bswapl %eax
1063 movq %r10,%rdx
1064 leaq 4(%r8),%r10
1065 movdqa %xmm4,32(%rsp)
1066 xorl %r11d,%eax
1067 bswapl %r10d
1068 .byte 102,15,58,34,232,3
1069 xorl %r11d,%r10d
1070 movdqa %xmm5,48(%rsp)
1071 leaq 5(%r8),%r9
1072 movl %r10d,64+12(%rsp)
1073 bswapl %r9d
1074 leaq 6(%r8),%r10
1075 movl 240(%rcx),%eax
1076 xorl %r11d,%r9d
1077 bswapl %r10d
1078 movl %r9d,80+12(%rsp)
1079 xorl %r11d,%r10d
1080 leaq 7(%r8),%r9
1081 movl %r10d,96+12(%rsp)
1082 bswapl %r9d
1083 movl OPENSSL_ia32cap_P+4(%rip),%r10d
1084 xorl %r11d,%r9d
1085 andl $71303168,%r10d
1086 movl %r9d,112+12(%rsp)
1088 movups 16(%rcx),%xmm1
1090 movdqa 64(%rsp),%xmm6
1091 movdqa 80(%rsp),%xmm7
1093 cmpq $8,%rdx
1094 jb .Lctr32_tail
1096 subq $6,%rdx
1097 cmpl $4194304,%r10d
1098 je .Lctr32_6x
1100 leaq 128(%rcx),%rcx
1101 subq $2,%rdx
1102 jmp .Lctr32_loop8
1104 .align 16
1105 .Lctr32_6x:
1106 shll $4,%eax
1107 movl $48,%r10d
1108 bswapl %r11d
1109 leaq 32(%rcx,%rax,1),%rcx
1110 subq %rax,%r10
1111 jmp .Lctr32_loop6
1113 .align 16
1114 .Lctr32_loop6:
1115 addl $6,%r8d
1116 movups -48(%rcx,%r10,1),%xmm0
1117 .byte 102,15,56,220,209
1118 movl %r8d,%eax
1119 xorl %r11d,%eax
1120 .byte 102,15,56,220,217
1121 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1122 leal 1(%r8),%eax
1123 .byte 102,15,56,220,225
1124 xorl %r11d,%eax
1125 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1126 .byte 102,15,56,220,233
1127 leal 2(%r8),%eax
1128 xorl %r11d,%eax
1129 .byte 102,15,56,220,241
1130 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1131 leal 3(%r8),%eax
1132 .byte 102,15,56,220,249
1133 movups -32(%rcx,%r10,1),%xmm1
1134 xorl %r11d,%eax
1136 .byte 102,15,56,220,208
1137 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1138 leal 4(%r8),%eax
1139 .byte 102,15,56,220,216
1140 xorl %r11d,%eax
1141 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1142 .byte 102,15,56,220,224
1143 leal 5(%r8),%eax
1144 xorl %r11d,%eax
1145 .byte 102,15,56,220,232
1146 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1147 movq %r10,%rax
1148 .byte 102,15,56,220,240
1149 .byte 102,15,56,220,248
1150 movups -16(%rcx,%r10,1),%xmm0
1152 call .Lenc_loop6
1154 movdqu (%rdi),%xmm8
1155 movdqu 16(%rdi),%xmm9
1156 movdqu 32(%rdi),%xmm10
1157 movdqu 48(%rdi),%xmm11
1158 movdqu 64(%rdi),%xmm12
1159 movdqu 80(%rdi),%xmm13
1160 leaq 96(%rdi),%rdi
1161 movups -64(%rcx,%r10,1),%xmm1
1162 pxor %xmm2,%xmm8
1163 movaps 0(%rsp),%xmm2
1164 pxor %xmm3,%xmm9
1165 movaps 16(%rsp),%xmm3
1166 pxor %xmm4,%xmm10
1167 movaps 32(%rsp),%xmm4
1168 pxor %xmm5,%xmm11
1169 movaps 48(%rsp),%xmm5
1170 pxor %xmm6,%xmm12
1171 movaps 64(%rsp),%xmm6
1172 pxor %xmm7,%xmm13
1173 movaps 80(%rsp),%xmm7
1174 movdqu %xmm8,(%rsi)
1175 movdqu %xmm9,16(%rsi)
1176 movdqu %xmm10,32(%rsi)
1177 movdqu %xmm11,48(%rsi)
1178 movdqu %xmm12,64(%rsi)
1179 movdqu %xmm13,80(%rsi)
1180 leaq 96(%rsi),%rsi
1182 subq $6,%rdx
1183 jnc .Lctr32_loop6
1185 addq $6,%rdx
1186 jz .Lctr32_done
1188 leal -48(%r10),%eax
1189 leaq -80(%rcx,%r10,1),%rcx
1190 negl %eax
1191 shrl $4,%eax
1192 jmp .Lctr32_tail
1194 .align 32
1195 .Lctr32_loop8:
1196 addl $8,%r8d
1197 movdqa 96(%rsp),%xmm8
1198 .byte 102,15,56,220,209
1199 movl %r8d,%r9d
1200 movdqa 112(%rsp),%xmm9
1201 .byte 102,15,56,220,217
1202 bswapl %r9d
1203 movups 32-128(%rcx),%xmm0
1204 .byte 102,15,56,220,225
1205 xorl %r11d,%r9d
1207 .byte 102,15,56,220,233
1208 movl %r9d,0+12(%rsp)
1209 leaq 1(%r8),%r9
1210 .byte 102,15,56,220,241
1211 .byte 102,15,56,220,249
1212 .byte 102,68,15,56,220,193
1213 .byte 102,68,15,56,220,201
1214 movups 48-128(%rcx),%xmm1
1215 bswapl %r9d
1216 .byte 102,15,56,220,208
1217 .byte 102,15,56,220,216
1218 xorl %r11d,%r9d
1219 .byte 0x66,0x90
1220 .byte 102,15,56,220,224
1221 .byte 102,15,56,220,232
1222 movl %r9d,16+12(%rsp)
1223 leaq 2(%r8),%r9
1224 .byte 102,15,56,220,240
1225 .byte 102,15,56,220,248
1226 .byte 102,68,15,56,220,192
1227 .byte 102,68,15,56,220,200
1228 movups 64-128(%rcx),%xmm0
1229 bswapl %r9d
1230 .byte 102,15,56,220,209
1231 .byte 102,15,56,220,217
1232 xorl %r11d,%r9d
1233 .byte 0x66,0x90
1234 .byte 102,15,56,220,225
1235 .byte 102,15,56,220,233
1236 movl %r9d,32+12(%rsp)
1237 leaq 3(%r8),%r9
1238 .byte 102,15,56,220,241
1239 .byte 102,15,56,220,249
1240 .byte 102,68,15,56,220,193
1241 .byte 102,68,15,56,220,201
1242 movups 80-128(%rcx),%xmm1
1243 bswapl %r9d
1244 .byte 102,15,56,220,208
1245 .byte 102,15,56,220,216
1246 xorl %r11d,%r9d
1247 .byte 0x66,0x90
1248 .byte 102,15,56,220,224
1249 .byte 102,15,56,220,232
1250 movl %r9d,48+12(%rsp)
1251 leaq 4(%r8),%r9
1252 .byte 102,15,56,220,240
1253 .byte 102,15,56,220,248
1254 .byte 102,68,15,56,220,192
1255 .byte 102,68,15,56,220,200
1256 movups 96-128(%rcx),%xmm0
1257 bswapl %r9d
1258 .byte 102,15,56,220,209
1259 .byte 102,15,56,220,217
1260 xorl %r11d,%r9d
1261 .byte 0x66,0x90
1262 .byte 102,15,56,220,225
1263 .byte 102,15,56,220,233
1264 movl %r9d,64+12(%rsp)
1265 leaq 5(%r8),%r9
1266 .byte 102,15,56,220,241
1267 .byte 102,15,56,220,249
1268 .byte 102,68,15,56,220,193
1269 .byte 102,68,15,56,220,201
1270 movups 112-128(%rcx),%xmm1
1271 bswapl %r9d
1272 .byte 102,15,56,220,208
1273 .byte 102,15,56,220,216
1274 xorl %r11d,%r9d
1275 .byte 0x66,0x90
1276 .byte 102,15,56,220,224
1277 .byte 102,15,56,220,232
1278 movl %r9d,80+12(%rsp)
1279 leaq 6(%r8),%r9
1280 .byte 102,15,56,220,240
1281 .byte 102,15,56,220,248
1282 .byte 102,68,15,56,220,192
1283 .byte 102,68,15,56,220,200
1284 movups 128-128(%rcx),%xmm0
1285 bswapl %r9d
1286 .byte 102,15,56,220,209
1287 .byte 102,15,56,220,217
1288 xorl %r11d,%r9d
1289 .byte 0x66,0x90
1290 .byte 102,15,56,220,225
1291 .byte 102,15,56,220,233
1292 movl %r9d,96+12(%rsp)
1293 leaq 7(%r8),%r9
1294 .byte 102,15,56,220,241
1295 .byte 102,15,56,220,249
1296 .byte 102,68,15,56,220,193
1297 .byte 102,68,15,56,220,201
1298 movups 144-128(%rcx),%xmm1
1299 bswapl %r9d
1300 .byte 102,15,56,220,208
1301 .byte 102,15,56,220,216
1302 .byte 102,15,56,220,224
1303 xorl %r11d,%r9d
1304 movdqu 0(%rdi),%xmm10
1305 .byte 102,15,56,220,232
1306 movl %r9d,112+12(%rsp)
1307 cmpl $11,%eax
1308 .byte 102,15,56,220,240
1309 .byte 102,15,56,220,248
1310 .byte 102,68,15,56,220,192
1311 .byte 102,68,15,56,220,200
1312 movups 160-128(%rcx),%xmm0
1314 jb .Lctr32_enc_done
1316 .byte 102,15,56,220,209
1317 .byte 102,15,56,220,217
1318 .byte 102,15,56,220,225
1319 .byte 102,15,56,220,233
1320 .byte 102,15,56,220,241
1321 .byte 102,15,56,220,249
1322 .byte 102,68,15,56,220,193
1323 .byte 102,68,15,56,220,201
1324 movups 176-128(%rcx),%xmm1
1326 .byte 102,15,56,220,208
1327 .byte 102,15,56,220,216
1328 .byte 102,15,56,220,224
1329 .byte 102,15,56,220,232
1330 .byte 102,15,56,220,240
1331 .byte 102,15,56,220,248
1332 .byte 102,68,15,56,220,192
1333 .byte 102,68,15,56,220,200
1334 movups 192-128(%rcx),%xmm0
1335 je .Lctr32_enc_done
1337 .byte 102,15,56,220,209
1338 .byte 102,15,56,220,217
1339 .byte 102,15,56,220,225
1340 .byte 102,15,56,220,233
1341 .byte 102,15,56,220,241
1342 .byte 102,15,56,220,249
1343 .byte 102,68,15,56,220,193
1344 .byte 102,68,15,56,220,201
1345 movups 208-128(%rcx),%xmm1
1347 .byte 102,15,56,220,208
1348 .byte 102,15,56,220,216
1349 .byte 102,15,56,220,224
1350 .byte 102,15,56,220,232
1351 .byte 102,15,56,220,240
1352 .byte 102,15,56,220,248
1353 .byte 102,68,15,56,220,192
1354 .byte 102,68,15,56,220,200
1355 movups 224-128(%rcx),%xmm0
1356 jmp .Lctr32_enc_done
1358 .align 16
1359 .Lctr32_enc_done:
1360 movdqu 16(%rdi),%xmm11
1361 pxor %xmm0,%xmm10
1362 movdqu 32(%rdi),%xmm12
1363 pxor %xmm0,%xmm11
1364 movdqu 48(%rdi),%xmm13
1365 pxor %xmm0,%xmm12
1366 movdqu 64(%rdi),%xmm14
1367 pxor %xmm0,%xmm13
1368 movdqu 80(%rdi),%xmm15
1369 pxor %xmm0,%xmm14
1370 pxor %xmm0,%xmm15
1371 .byte 102,15,56,220,209
1372 .byte 102,15,56,220,217
1373 .byte 102,15,56,220,225
1374 .byte 102,15,56,220,233
1375 .byte 102,15,56,220,241
1376 .byte 102,15,56,220,249
1377 .byte 102,68,15,56,220,193
1378 .byte 102,68,15,56,220,201
1379 movdqu 96(%rdi),%xmm1
1380 leaq 128(%rdi),%rdi
1382 .byte 102,65,15,56,221,210
1383 pxor %xmm0,%xmm1
1384 movdqu 112-128(%rdi),%xmm10
1385 .byte 102,65,15,56,221,219
1386 pxor %xmm0,%xmm10
1387 movdqa 0(%rsp),%xmm11
1388 .byte 102,65,15,56,221,228
1389 .byte 102,65,15,56,221,237
1390 movdqa 16(%rsp),%xmm12
1391 movdqa 32(%rsp),%xmm13
1392 .byte 102,65,15,56,221,246
1393 .byte 102,65,15,56,221,255
1394 movdqa 48(%rsp),%xmm14
1395 movdqa 64(%rsp),%xmm15
1396 .byte 102,68,15,56,221,193
1397 movdqa 80(%rsp),%xmm0
1398 movups 16-128(%rcx),%xmm1
1399 .byte 102,69,15,56,221,202
1401 movups %xmm2,(%rsi)
1402 movdqa %xmm11,%xmm2
1403 movups %xmm3,16(%rsi)
1404 movdqa %xmm12,%xmm3
1405 movups %xmm4,32(%rsi)
1406 movdqa %xmm13,%xmm4
1407 movups %xmm5,48(%rsi)
1408 movdqa %xmm14,%xmm5
1409 movups %xmm6,64(%rsi)
1410 movdqa %xmm15,%xmm6
1411 movups %xmm7,80(%rsi)
1412 movdqa %xmm0,%xmm7
1413 movups %xmm8,96(%rsi)
1414 movups %xmm9,112(%rsi)
1415 leaq 128(%rsi),%rsi
1417 subq $8,%rdx
1418 jnc .Lctr32_loop8
1420 addq $8,%rdx
1421 jz .Lctr32_done
1422 leaq -128(%rcx),%rcx
1424 .Lctr32_tail:
1427 leaq 16(%rcx),%rcx
1428 cmpq $4,%rdx
1429 jb .Lctr32_loop3
1430 je .Lctr32_loop4
1433 shll $4,%eax
1434 movdqa 96(%rsp),%xmm8
1435 pxor %xmm9,%xmm9
1437 movups 16(%rcx),%xmm0
1438 .byte 102,15,56,220,209
1439 .byte 102,15,56,220,217
1440 leaq 32-16(%rcx,%rax,1),%rcx
1441 negq %rax
1442 .byte 102,15,56,220,225
1443 addq $16,%rax
1444 movups (%rdi),%xmm10
1445 .byte 102,15,56,220,233
1446 .byte 102,15,56,220,241
1447 movups 16(%rdi),%xmm11
1448 movups 32(%rdi),%xmm12
1449 .byte 102,15,56,220,249
1450 .byte 102,68,15,56,220,193
1452 call .Lenc_loop8_enter
1454 movdqu 48(%rdi),%xmm13
1455 pxor %xmm10,%xmm2
1456 movdqu 64(%rdi),%xmm10
1457 pxor %xmm11,%xmm3
1458 movdqu %xmm2,(%rsi)
1459 pxor %xmm12,%xmm4
1460 movdqu %xmm3,16(%rsi)
1461 pxor %xmm13,%xmm5
1462 movdqu %xmm4,32(%rsi)
1463 pxor %xmm10,%xmm6
1464 movdqu %xmm5,48(%rsi)
1465 movdqu %xmm6,64(%rsi)
1466 cmpq $6,%rdx
1467 jb .Lctr32_done
1469 movups 80(%rdi),%xmm11
1470 xorps %xmm11,%xmm7
1471 movups %xmm7,80(%rsi)
1472 je .Lctr32_done
1474 movups 96(%rdi),%xmm12
1475 xorps %xmm12,%xmm8
1476 movups %xmm8,96(%rsi)
1477 jmp .Lctr32_done
1479 .align 32
1480 .Lctr32_loop4:
1481 .byte 102,15,56,220,209
1482 leaq 16(%rcx),%rcx
1483 decl %eax
1484 .byte 102,15,56,220,217
1485 .byte 102,15,56,220,225
1486 .byte 102,15,56,220,233
1487 movups (%rcx),%xmm1
1488 jnz .Lctr32_loop4
1489 .byte 102,15,56,221,209
1490 .byte 102,15,56,221,217
1491 movups (%rdi),%xmm10
1492 movups 16(%rdi),%xmm11
1493 .byte 102,15,56,221,225
1494 .byte 102,15,56,221,233
1495 movups 32(%rdi),%xmm12
1496 movups 48(%rdi),%xmm13
1498 xorps %xmm10,%xmm2
1499 movups %xmm2,(%rsi)
1500 xorps %xmm11,%xmm3
1501 movups %xmm3,16(%rsi)
1502 pxor %xmm12,%xmm4
1503 movdqu %xmm4,32(%rsi)
1504 pxor %xmm13,%xmm5
1505 movdqu %xmm5,48(%rsi)
1506 jmp .Lctr32_done
1508 .align 32
1509 .Lctr32_loop3:
1510 .byte 102,15,56,220,209
1511 leaq 16(%rcx),%rcx
1512 decl %eax
1513 .byte 102,15,56,220,217
1514 .byte 102,15,56,220,225
1515 movups (%rcx),%xmm1
1516 jnz .Lctr32_loop3
1517 .byte 102,15,56,221,209
1518 .byte 102,15,56,221,217
1519 .byte 102,15,56,221,225
1521 movups (%rdi),%xmm10
1522 xorps %xmm10,%xmm2
1523 movups %xmm2,(%rsi)
1524 cmpq $2,%rdx
1525 jb .Lctr32_done
1527 movups 16(%rdi),%xmm11
1528 xorps %xmm11,%xmm3
1529 movups %xmm3,16(%rsi)
1530 je .Lctr32_done
1532 movups 32(%rdi),%xmm12
1533 xorps %xmm12,%xmm4
1534 movups %xmm4,32(%rsi)
1536 .Lctr32_done:
1537 xorps %xmm0,%xmm0
1538 xorl %r11d,%r11d
1539 pxor %xmm1,%xmm1
1540 pxor %xmm2,%xmm2
1541 pxor %xmm3,%xmm3
1542 pxor %xmm4,%xmm4
1543 pxor %xmm5,%xmm5
1544 pxor %xmm6,%xmm6
1545 pxor %xmm7,%xmm7
1546 movaps %xmm0,0(%rsp)
1547 pxor %xmm8,%xmm8
1548 movaps %xmm0,16(%rsp)
1549 pxor %xmm9,%xmm9
1550 movaps %xmm0,32(%rsp)
1551 pxor %xmm10,%xmm10
1552 movaps %xmm0,48(%rsp)
1553 pxor %xmm11,%xmm11
1554 movaps %xmm0,64(%rsp)
1555 pxor %xmm12,%xmm12
1556 movaps %xmm0,80(%rsp)
1557 pxor %xmm13,%xmm13
1558 movaps %xmm0,96(%rsp)
1559 pxor %xmm14,%xmm14
1560 movaps %xmm0,112(%rsp)
1561 pxor %xmm15,%xmm15
1562 leaq (%rbp),%rsp
1563 popq %rbp
1564 .Lctr32_epilogue:
1565 .byte 0xf3,0xc3
1566 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1567 .globl aesni_xts_encrypt
1568 .type aesni_xts_encrypt,@function
1569 .align 16
1570 aesni_xts_encrypt:
1571 leaq (%rsp),%rax
1572 pushq %rbp
1573 subq $112,%rsp
1574 andq $-16,%rsp
1575 leaq -8(%rax),%rbp
1576 movups (%r9),%xmm2
1577 movl 240(%r8),%eax
1578 movl 240(%rcx),%r10d
1579 movups (%r8),%xmm0
1580 movups 16(%r8),%xmm1
1581 leaq 32(%r8),%r8
1582 xorps %xmm0,%xmm2
1583 .Loop_enc1_8:
1584 .byte 102,15,56,220,209
1585 decl %eax
1586 movups (%r8),%xmm1
1587 leaq 16(%r8),%r8
1588 jnz .Loop_enc1_8
1589 .byte 102,15,56,221,209
1590 movups (%rcx),%xmm0
1591 movq %rcx,%r11
1592 movl %r10d,%eax
1593 shll $4,%r10d
1594 movq %rdx,%r9
1595 andq $-16,%rdx
1597 movups 16(%rcx,%r10,1),%xmm1
1599 movdqa .Lxts_magic(%rip),%xmm8
1600 movdqa %xmm2,%xmm15
1601 pshufd $0x5f,%xmm2,%xmm9
1602 pxor %xmm0,%xmm1
1603 movdqa %xmm9,%xmm14
1604 paddd %xmm9,%xmm9
1605 movdqa %xmm15,%xmm10
1606 psrad $31,%xmm14
1607 paddq %xmm15,%xmm15
1608 pand %xmm8,%xmm14
1609 pxor %xmm0,%xmm10
1610 pxor %xmm14,%xmm15
1611 movdqa %xmm9,%xmm14
1612 paddd %xmm9,%xmm9
1613 movdqa %xmm15,%xmm11
1614 psrad $31,%xmm14
1615 paddq %xmm15,%xmm15
1616 pand %xmm8,%xmm14
1617 pxor %xmm0,%xmm11
1618 pxor %xmm14,%xmm15
1619 movdqa %xmm9,%xmm14
1620 paddd %xmm9,%xmm9
1621 movdqa %xmm15,%xmm12
1622 psrad $31,%xmm14
1623 paddq %xmm15,%xmm15
1624 pand %xmm8,%xmm14
1625 pxor %xmm0,%xmm12
1626 pxor %xmm14,%xmm15
1627 movdqa %xmm9,%xmm14
1628 paddd %xmm9,%xmm9
1629 movdqa %xmm15,%xmm13
1630 psrad $31,%xmm14
1631 paddq %xmm15,%xmm15
1632 pand %xmm8,%xmm14
1633 pxor %xmm0,%xmm13
1634 pxor %xmm14,%xmm15
1635 movdqa %xmm15,%xmm14
1636 psrad $31,%xmm9
1637 paddq %xmm15,%xmm15
1638 pand %xmm8,%xmm9
1639 pxor %xmm0,%xmm14
1640 pxor %xmm9,%xmm15
1641 movaps %xmm1,96(%rsp)
1643 subq $96,%rdx
1644 jc .Lxts_enc_short
1646 movl $16+96,%eax
1647 leaq 32(%r11,%r10,1),%rcx
1648 subq %r10,%rax
1649 movups 16(%r11),%xmm1
1650 movq %rax,%r10
1651 leaq .Lxts_magic(%rip),%r8
1652 jmp .Lxts_enc_grandloop
1654 .align 32
1655 .Lxts_enc_grandloop:
1656 movdqu 0(%rdi),%xmm2
1657 movdqa %xmm0,%xmm8
1658 movdqu 16(%rdi),%xmm3
1659 pxor %xmm10,%xmm2
1660 movdqu 32(%rdi),%xmm4
1661 pxor %xmm11,%xmm3
1662 .byte 102,15,56,220,209
1663 movdqu 48(%rdi),%xmm5
1664 pxor %xmm12,%xmm4
1665 .byte 102,15,56,220,217
1666 movdqu 64(%rdi),%xmm6
1667 pxor %xmm13,%xmm5
1668 .byte 102,15,56,220,225
1669 movdqu 80(%rdi),%xmm7
1670 pxor %xmm15,%xmm8
1671 movdqa 96(%rsp),%xmm9
1672 pxor %xmm14,%xmm6
1673 .byte 102,15,56,220,233
1674 movups 32(%r11),%xmm0
1675 leaq 96(%rdi),%rdi
1676 pxor %xmm8,%xmm7
1678 pxor %xmm9,%xmm10
1679 .byte 102,15,56,220,241
1680 pxor %xmm9,%xmm11
1681 movdqa %xmm10,0(%rsp)
1682 .byte 102,15,56,220,249
1683 movups 48(%r11),%xmm1
1684 pxor %xmm9,%xmm12
1686 .byte 102,15,56,220,208
1687 pxor %xmm9,%xmm13
1688 movdqa %xmm11,16(%rsp)
1689 .byte 102,15,56,220,216
1690 pxor %xmm9,%xmm14
1691 movdqa %xmm12,32(%rsp)
1692 .byte 102,15,56,220,224
1693 .byte 102,15,56,220,232
1694 pxor %xmm9,%xmm8
1695 movdqa %xmm14,64(%rsp)
1696 .byte 102,15,56,220,240
1697 .byte 102,15,56,220,248
1698 movups 64(%r11),%xmm0
1699 movdqa %xmm8,80(%rsp)
1700 pshufd $0x5f,%xmm15,%xmm9
1701 jmp .Lxts_enc_loop6
1702 .align 32
1703 .Lxts_enc_loop6:
1704 .byte 102,15,56,220,209
1705 .byte 102,15,56,220,217
1706 .byte 102,15,56,220,225
1707 .byte 102,15,56,220,233
1708 .byte 102,15,56,220,241
1709 .byte 102,15,56,220,249
1710 movups -64(%rcx,%rax,1),%xmm1
1711 addq $32,%rax
1713 .byte 102,15,56,220,208
1714 .byte 102,15,56,220,216
1715 .byte 102,15,56,220,224
1716 .byte 102,15,56,220,232
1717 .byte 102,15,56,220,240
1718 .byte 102,15,56,220,248
1719 movups -80(%rcx,%rax,1),%xmm0
1720 jnz .Lxts_enc_loop6
1722 movdqa (%r8),%xmm8
1723 movdqa %xmm9,%xmm14
1724 paddd %xmm9,%xmm9
1725 .byte 102,15,56,220,209
1726 paddq %xmm15,%xmm15
1727 psrad $31,%xmm14
1728 .byte 102,15,56,220,217
1729 pand %xmm8,%xmm14
1730 movups (%r11),%xmm10
1731 .byte 102,15,56,220,225
1732 .byte 102,15,56,220,233
1733 .byte 102,15,56,220,241
1734 pxor %xmm14,%xmm15
1735 movaps %xmm10,%xmm11
1736 .byte 102,15,56,220,249
1737 movups -64(%rcx),%xmm1
1739 movdqa %xmm9,%xmm14
1740 .byte 102,15,56,220,208
1741 paddd %xmm9,%xmm9
1742 pxor %xmm15,%xmm10
1743 .byte 102,15,56,220,216
1744 psrad $31,%xmm14
1745 paddq %xmm15,%xmm15
1746 .byte 102,15,56,220,224
1747 .byte 102,15,56,220,232
1748 pand %xmm8,%xmm14
1749 movaps %xmm11,%xmm12
1750 .byte 102,15,56,220,240
1751 pxor %xmm14,%xmm15
1752 movdqa %xmm9,%xmm14
1753 .byte 102,15,56,220,248
1754 movups -48(%rcx),%xmm0
1756 paddd %xmm9,%xmm9
1757 .byte 102,15,56,220,209
1758 pxor %xmm15,%xmm11
1759 psrad $31,%xmm14
1760 .byte 102,15,56,220,217
1761 paddq %xmm15,%xmm15
1762 pand %xmm8,%xmm14
1763 .byte 102,15,56,220,225
1764 .byte 102,15,56,220,233
1765 movdqa %xmm13,48(%rsp)
1766 pxor %xmm14,%xmm15
1767 .byte 102,15,56,220,241
1768 movaps %xmm12,%xmm13
1769 movdqa %xmm9,%xmm14
1770 .byte 102,15,56,220,249
1771 movups -32(%rcx),%xmm1
1773 paddd %xmm9,%xmm9
1774 .byte 102,15,56,220,208
1775 pxor %xmm15,%xmm12
1776 psrad $31,%xmm14
1777 .byte 102,15,56,220,216
1778 paddq %xmm15,%xmm15
1779 pand %xmm8,%xmm14
1780 .byte 102,15,56,220,224
1781 .byte 102,15,56,220,232
1782 .byte 102,15,56,220,240
1783 pxor %xmm14,%xmm15
1784 movaps %xmm13,%xmm14
1785 .byte 102,15,56,220,248
1787 movdqa %xmm9,%xmm0
1788 paddd %xmm9,%xmm9
1789 .byte 102,15,56,220,209
1790 pxor %xmm15,%xmm13
1791 psrad $31,%xmm0
1792 .byte 102,15,56,220,217
1793 paddq %xmm15,%xmm15
1794 pand %xmm8,%xmm0
1795 .byte 102,15,56,220,225
1796 .byte 102,15,56,220,233
1797 pxor %xmm0,%xmm15
1798 movups (%r11),%xmm0
1799 .byte 102,15,56,220,241
1800 .byte 102,15,56,220,249
1801 movups 16(%r11),%xmm1
1803 pxor %xmm15,%xmm14
1804 .byte 102,15,56,221,84,36,0
1805 psrad $31,%xmm9
1806 paddq %xmm15,%xmm15
1807 .byte 102,15,56,221,92,36,16
1808 .byte 102,15,56,221,100,36,32
1809 pand %xmm8,%xmm9
1810 movq %r10,%rax
1811 .byte 102,15,56,221,108,36,48
1812 .byte 102,15,56,221,116,36,64
1813 .byte 102,15,56,221,124,36,80
1814 pxor %xmm9,%xmm15
1816 leaq 96(%rsi),%rsi
1817 movups %xmm2,-96(%rsi)
1818 movups %xmm3,-80(%rsi)
1819 movups %xmm4,-64(%rsi)
1820 movups %xmm5,-48(%rsi)
1821 movups %xmm6,-32(%rsi)
1822 movups %xmm7,-16(%rsi)
1823 subq $96,%rdx
1824 jnc .Lxts_enc_grandloop
1826 movl $16+96,%eax
1827 subl %r10d,%eax
1828 movq %r11,%rcx
1829 shrl $4,%eax
1831 .Lxts_enc_short:
1833 movl %eax,%r10d
1834 pxor %xmm0,%xmm10
1835 addq $96,%rdx
1836 jz .Lxts_enc_done
1838 pxor %xmm0,%xmm11
1839 cmpq $0x20,%rdx
1840 jb .Lxts_enc_one
1841 pxor %xmm0,%xmm12
1842 je .Lxts_enc_two
1844 pxor %xmm0,%xmm13
1845 cmpq $0x40,%rdx
1846 jb .Lxts_enc_three
1847 pxor %xmm0,%xmm14
1848 je .Lxts_enc_four
1850 movdqu (%rdi),%xmm2
1851 movdqu 16(%rdi),%xmm3
1852 movdqu 32(%rdi),%xmm4
1853 pxor %xmm10,%xmm2
1854 movdqu 48(%rdi),%xmm5
1855 pxor %xmm11,%xmm3
1856 movdqu 64(%rdi),%xmm6
1857 leaq 80(%rdi),%rdi
1858 pxor %xmm12,%xmm4
1859 pxor %xmm13,%xmm5
1860 pxor %xmm14,%xmm6
1861 pxor %xmm7,%xmm7
1863 call _aesni_encrypt6
1865 xorps %xmm10,%xmm2
1866 movdqa %xmm15,%xmm10
1867 xorps %xmm11,%xmm3
1868 xorps %xmm12,%xmm4
1869 movdqu %xmm2,(%rsi)
1870 xorps %xmm13,%xmm5
1871 movdqu %xmm3,16(%rsi)
1872 xorps %xmm14,%xmm6
1873 movdqu %xmm4,32(%rsi)
1874 movdqu %xmm5,48(%rsi)
1875 movdqu %xmm6,64(%rsi)
1876 leaq 80(%rsi),%rsi
1877 jmp .Lxts_enc_done
1879 .align 16
1880 .Lxts_enc_one:
1881 movups (%rdi),%xmm2
1882 leaq 16(%rdi),%rdi
1883 xorps %xmm10,%xmm2
1884 movups (%rcx),%xmm0
1885 movups 16(%rcx),%xmm1
1886 leaq 32(%rcx),%rcx
1887 xorps %xmm0,%xmm2
1888 .Loop_enc1_9:
1889 .byte 102,15,56,220,209
1890 decl %eax
1891 movups (%rcx),%xmm1
1892 leaq 16(%rcx),%rcx
1893 jnz .Loop_enc1_9
1894 .byte 102,15,56,221,209
1895 xorps %xmm10,%xmm2
1896 movdqa %xmm11,%xmm10
1897 movups %xmm2,(%rsi)
1898 leaq 16(%rsi),%rsi
1899 jmp .Lxts_enc_done
1901 .align 16
1902 .Lxts_enc_two:
1903 movups (%rdi),%xmm2
1904 movups 16(%rdi),%xmm3
1905 leaq 32(%rdi),%rdi
1906 xorps %xmm10,%xmm2
1907 xorps %xmm11,%xmm3
1909 call _aesni_encrypt2
1911 xorps %xmm10,%xmm2
1912 movdqa %xmm12,%xmm10
1913 xorps %xmm11,%xmm3
1914 movups %xmm2,(%rsi)
1915 movups %xmm3,16(%rsi)
1916 leaq 32(%rsi),%rsi
1917 jmp .Lxts_enc_done
1919 .align 16
1920 .Lxts_enc_three:
1921 movups (%rdi),%xmm2
1922 movups 16(%rdi),%xmm3
1923 movups 32(%rdi),%xmm4
1924 leaq 48(%rdi),%rdi
1925 xorps %xmm10,%xmm2
1926 xorps %xmm11,%xmm3
1927 xorps %xmm12,%xmm4
1929 call _aesni_encrypt3
1931 xorps %xmm10,%xmm2
1932 movdqa %xmm13,%xmm10
1933 xorps %xmm11,%xmm3
1934 xorps %xmm12,%xmm4
1935 movups %xmm2,(%rsi)
1936 movups %xmm3,16(%rsi)
1937 movups %xmm4,32(%rsi)
1938 leaq 48(%rsi),%rsi
1939 jmp .Lxts_enc_done
1941 .align 16
1942 .Lxts_enc_four:
1943 movups (%rdi),%xmm2
1944 movups 16(%rdi),%xmm3
1945 movups 32(%rdi),%xmm4
1946 xorps %xmm10,%xmm2
1947 movups 48(%rdi),%xmm5
1948 leaq 64(%rdi),%rdi
1949 xorps %xmm11,%xmm3
1950 xorps %xmm12,%xmm4
1951 xorps %xmm13,%xmm5
1953 call _aesni_encrypt4
1955 pxor %xmm10,%xmm2
1956 movdqa %xmm14,%xmm10
1957 pxor %xmm11,%xmm3
1958 pxor %xmm12,%xmm4
1959 movdqu %xmm2,(%rsi)
1960 pxor %xmm13,%xmm5
1961 movdqu %xmm3,16(%rsi)
1962 movdqu %xmm4,32(%rsi)
1963 movdqu %xmm5,48(%rsi)
1964 leaq 64(%rsi),%rsi
1965 jmp .Lxts_enc_done
1967 .align 16
1968 .Lxts_enc_done:
1969 andq $15,%r9
1970 jz .Lxts_enc_ret
1971 movq %r9,%rdx
1973 .Lxts_enc_steal:
1974 movzbl (%rdi),%eax
1975 movzbl -16(%rsi),%ecx
1976 leaq 1(%rdi),%rdi
1977 movb %al,-16(%rsi)
1978 movb %cl,0(%rsi)
1979 leaq 1(%rsi),%rsi
1980 subq $1,%rdx
1981 jnz .Lxts_enc_steal
1983 subq %r9,%rsi
1984 movq %r11,%rcx
1985 movl %r10d,%eax
1987 movups -16(%rsi),%xmm2
1988 xorps %xmm10,%xmm2
1989 movups (%rcx),%xmm0
1990 movups 16(%rcx),%xmm1
1991 leaq 32(%rcx),%rcx
1992 xorps %xmm0,%xmm2
1993 .Loop_enc1_10:
1994 .byte 102,15,56,220,209
1995 decl %eax
1996 movups (%rcx),%xmm1
1997 leaq 16(%rcx),%rcx
1998 jnz .Loop_enc1_10
1999 .byte 102,15,56,221,209
2000 xorps %xmm10,%xmm2
2001 movups %xmm2,-16(%rsi)
2003 .Lxts_enc_ret:
2004 xorps %xmm0,%xmm0
2005 pxor %xmm1,%xmm1
2006 pxor %xmm2,%xmm2
2007 pxor %xmm3,%xmm3
2008 pxor %xmm4,%xmm4
2009 pxor %xmm5,%xmm5
2010 pxor %xmm6,%xmm6
2011 pxor %xmm7,%xmm7
2012 movaps %xmm0,0(%rsp)
2013 pxor %xmm8,%xmm8
2014 movaps %xmm0,16(%rsp)
2015 pxor %xmm9,%xmm9
2016 movaps %xmm0,32(%rsp)
2017 pxor %xmm10,%xmm10
2018 movaps %xmm0,48(%rsp)
2019 pxor %xmm11,%xmm11
2020 movaps %xmm0,64(%rsp)
2021 pxor %xmm12,%xmm12
2022 movaps %xmm0,80(%rsp)
2023 pxor %xmm13,%xmm13
2024 movaps %xmm0,96(%rsp)
2025 pxor %xmm14,%xmm14
2026 pxor %xmm15,%xmm15
2027 leaq (%rbp),%rsp
2028 popq %rbp
2029 .Lxts_enc_epilogue:
2030 .byte 0xf3,0xc3
2031 .size aesni_xts_encrypt,.-aesni_xts_encrypt
2032 .globl aesni_xts_decrypt
2033 .type aesni_xts_decrypt,@function
2034 .align 16
2035 aesni_xts_decrypt:
2036 leaq (%rsp),%rax
2037 pushq %rbp
2038 subq $112,%rsp
2039 andq $-16,%rsp
2040 leaq -8(%rax),%rbp
2041 movups (%r9),%xmm2
2042 movl 240(%r8),%eax
2043 movl 240(%rcx),%r10d
2044 movups (%r8),%xmm0
2045 movups 16(%r8),%xmm1
2046 leaq 32(%r8),%r8
2047 xorps %xmm0,%xmm2
2048 .Loop_enc1_11:
2049 .byte 102,15,56,220,209
2050 decl %eax
2051 movups (%r8),%xmm1
2052 leaq 16(%r8),%r8
2053 jnz .Loop_enc1_11
2054 .byte 102,15,56,221,209
2055 xorl %eax,%eax
2056 testq $15,%rdx
2057 setnz %al
2058 shlq $4,%rax
2059 subq %rax,%rdx
2061 movups (%rcx),%xmm0
2062 movq %rcx,%r11
2063 movl %r10d,%eax
2064 shll $4,%r10d
2065 movq %rdx,%r9
2066 andq $-16,%rdx
2068 movups 16(%rcx,%r10,1),%xmm1
2070 movdqa .Lxts_magic(%rip),%xmm8
2071 movdqa %xmm2,%xmm15
2072 pshufd $0x5f,%xmm2,%xmm9
2073 pxor %xmm0,%xmm1
2074 movdqa %xmm9,%xmm14
2075 paddd %xmm9,%xmm9
2076 movdqa %xmm15,%xmm10
2077 psrad $31,%xmm14
2078 paddq %xmm15,%xmm15
2079 pand %xmm8,%xmm14
2080 pxor %xmm0,%xmm10
2081 pxor %xmm14,%xmm15
2082 movdqa %xmm9,%xmm14
2083 paddd %xmm9,%xmm9
2084 movdqa %xmm15,%xmm11
2085 psrad $31,%xmm14
2086 paddq %xmm15,%xmm15
2087 pand %xmm8,%xmm14
2088 pxor %xmm0,%xmm11
2089 pxor %xmm14,%xmm15
2090 movdqa %xmm9,%xmm14
2091 paddd %xmm9,%xmm9
2092 movdqa %xmm15,%xmm12
2093 psrad $31,%xmm14
2094 paddq %xmm15,%xmm15
2095 pand %xmm8,%xmm14
2096 pxor %xmm0,%xmm12
2097 pxor %xmm14,%xmm15
2098 movdqa %xmm9,%xmm14
2099 paddd %xmm9,%xmm9
2100 movdqa %xmm15,%xmm13
2101 psrad $31,%xmm14
2102 paddq %xmm15,%xmm15
2103 pand %xmm8,%xmm14
2104 pxor %xmm0,%xmm13
2105 pxor %xmm14,%xmm15
2106 movdqa %xmm15,%xmm14
2107 psrad $31,%xmm9
2108 paddq %xmm15,%xmm15
2109 pand %xmm8,%xmm9
2110 pxor %xmm0,%xmm14
2111 pxor %xmm9,%xmm15
2112 movaps %xmm1,96(%rsp)
2114 subq $96,%rdx
2115 jc .Lxts_dec_short
2117 movl $16+96,%eax
2118 leaq 32(%r11,%r10,1),%rcx
2119 subq %r10,%rax
2120 movups 16(%r11),%xmm1
2121 movq %rax,%r10
2122 leaq .Lxts_magic(%rip),%r8
2123 jmp .Lxts_dec_grandloop
2125 .align 32
2126 .Lxts_dec_grandloop:
2127 movdqu 0(%rdi),%xmm2
2128 movdqa %xmm0,%xmm8
2129 movdqu 16(%rdi),%xmm3
2130 pxor %xmm10,%xmm2
2131 movdqu 32(%rdi),%xmm4
2132 pxor %xmm11,%xmm3
2133 .byte 102,15,56,222,209
2134 movdqu 48(%rdi),%xmm5
2135 pxor %xmm12,%xmm4
2136 .byte 102,15,56,222,217
2137 movdqu 64(%rdi),%xmm6
2138 pxor %xmm13,%xmm5
2139 .byte 102,15,56,222,225
2140 movdqu 80(%rdi),%xmm7
2141 pxor %xmm15,%xmm8
2142 movdqa 96(%rsp),%xmm9
2143 pxor %xmm14,%xmm6
2144 .byte 102,15,56,222,233
2145 movups 32(%r11),%xmm0
2146 leaq 96(%rdi),%rdi
2147 pxor %xmm8,%xmm7
2149 pxor %xmm9,%xmm10
2150 .byte 102,15,56,222,241
2151 pxor %xmm9,%xmm11
2152 movdqa %xmm10,0(%rsp)
2153 .byte 102,15,56,222,249
2154 movups 48(%r11),%xmm1
2155 pxor %xmm9,%xmm12
2157 .byte 102,15,56,222,208
2158 pxor %xmm9,%xmm13
2159 movdqa %xmm11,16(%rsp)
2160 .byte 102,15,56,222,216
2161 pxor %xmm9,%xmm14
2162 movdqa %xmm12,32(%rsp)
2163 .byte 102,15,56,222,224
2164 .byte 102,15,56,222,232
2165 pxor %xmm9,%xmm8
2166 movdqa %xmm14,64(%rsp)
2167 .byte 102,15,56,222,240
2168 .byte 102,15,56,222,248
2169 movups 64(%r11),%xmm0
2170 movdqa %xmm8,80(%rsp)
2171 pshufd $0x5f,%xmm15,%xmm9
2172 jmp .Lxts_dec_loop6
2173 .align 32
2174 .Lxts_dec_loop6:
2175 .byte 102,15,56,222,209
2176 .byte 102,15,56,222,217
2177 .byte 102,15,56,222,225
2178 .byte 102,15,56,222,233
2179 .byte 102,15,56,222,241
2180 .byte 102,15,56,222,249
2181 movups -64(%rcx,%rax,1),%xmm1
2182 addq $32,%rax
2184 .byte 102,15,56,222,208
2185 .byte 102,15,56,222,216
2186 .byte 102,15,56,222,224
2187 .byte 102,15,56,222,232
2188 .byte 102,15,56,222,240
2189 .byte 102,15,56,222,248
2190 movups -80(%rcx,%rax,1),%xmm0
2191 jnz .Lxts_dec_loop6
2193 movdqa (%r8),%xmm8
2194 movdqa %xmm9,%xmm14
2195 paddd %xmm9,%xmm9
2196 .byte 102,15,56,222,209
2197 paddq %xmm15,%xmm15
2198 psrad $31,%xmm14
2199 .byte 102,15,56,222,217
2200 pand %xmm8,%xmm14
2201 movups (%r11),%xmm10
2202 .byte 102,15,56,222,225
2203 .byte 102,15,56,222,233
2204 .byte 102,15,56,222,241
2205 pxor %xmm14,%xmm15
2206 movaps %xmm10,%xmm11
2207 .byte 102,15,56,222,249
2208 movups -64(%rcx),%xmm1
2210 movdqa %xmm9,%xmm14
2211 .byte 102,15,56,222,208
2212 paddd %xmm9,%xmm9
2213 pxor %xmm15,%xmm10
2214 .byte 102,15,56,222,216
2215 psrad $31,%xmm14
2216 paddq %xmm15,%xmm15
2217 .byte 102,15,56,222,224
2218 .byte 102,15,56,222,232
2219 pand %xmm8,%xmm14
2220 movaps %xmm11,%xmm12
2221 .byte 102,15,56,222,240
2222 pxor %xmm14,%xmm15
2223 movdqa %xmm9,%xmm14
2224 .byte 102,15,56,222,248
2225 movups -48(%rcx),%xmm0
2227 paddd %xmm9,%xmm9
2228 .byte 102,15,56,222,209
2229 pxor %xmm15,%xmm11
2230 psrad $31,%xmm14
2231 .byte 102,15,56,222,217
2232 paddq %xmm15,%xmm15
2233 pand %xmm8,%xmm14
2234 .byte 102,15,56,222,225
2235 .byte 102,15,56,222,233
2236 movdqa %xmm13,48(%rsp)
2237 pxor %xmm14,%xmm15
2238 .byte 102,15,56,222,241
2239 movaps %xmm12,%xmm13
2240 movdqa %xmm9,%xmm14
2241 .byte 102,15,56,222,249
2242 movups -32(%rcx),%xmm1
2244 paddd %xmm9,%xmm9
2245 .byte 102,15,56,222,208
2246 pxor %xmm15,%xmm12
2247 psrad $31,%xmm14
2248 .byte 102,15,56,222,216
2249 paddq %xmm15,%xmm15
2250 pand %xmm8,%xmm14
2251 .byte 102,15,56,222,224
2252 .byte 102,15,56,222,232
2253 .byte 102,15,56,222,240
2254 pxor %xmm14,%xmm15
2255 movaps %xmm13,%xmm14
2256 .byte 102,15,56,222,248
2258 movdqa %xmm9,%xmm0
2259 paddd %xmm9,%xmm9
2260 .byte 102,15,56,222,209
2261 pxor %xmm15,%xmm13
2262 psrad $31,%xmm0
2263 .byte 102,15,56,222,217
2264 paddq %xmm15,%xmm15
2265 pand %xmm8,%xmm0
2266 .byte 102,15,56,222,225
2267 .byte 102,15,56,222,233
2268 pxor %xmm0,%xmm15
2269 movups (%r11),%xmm0
2270 .byte 102,15,56,222,241
2271 .byte 102,15,56,222,249
2272 movups 16(%r11),%xmm1
2274 pxor %xmm15,%xmm14
2275 .byte 102,15,56,223,84,36,0
2276 psrad $31,%xmm9
2277 paddq %xmm15,%xmm15
2278 .byte 102,15,56,223,92,36,16
2279 .byte 102,15,56,223,100,36,32
2280 pand %xmm8,%xmm9
2281 movq %r10,%rax
2282 .byte 102,15,56,223,108,36,48
2283 .byte 102,15,56,223,116,36,64
2284 .byte 102,15,56,223,124,36,80
2285 pxor %xmm9,%xmm15
2287 leaq 96(%rsi),%rsi
2288 movups %xmm2,-96(%rsi)
2289 movups %xmm3,-80(%rsi)
2290 movups %xmm4,-64(%rsi)
2291 movups %xmm5,-48(%rsi)
2292 movups %xmm6,-32(%rsi)
2293 movups %xmm7,-16(%rsi)
2294 subq $96,%rdx
2295 jnc .Lxts_dec_grandloop
2297 movl $16+96,%eax
2298 subl %r10d,%eax
2299 movq %r11,%rcx
2300 shrl $4,%eax
2302 .Lxts_dec_short:
2304 movl %eax,%r10d
2305 pxor %xmm0,%xmm10
2306 pxor %xmm0,%xmm11
2307 addq $96,%rdx
2308 jz .Lxts_dec_done
2310 pxor %xmm0,%xmm12
2311 cmpq $0x20,%rdx
2312 jb .Lxts_dec_one
2313 pxor %xmm0,%xmm13
2314 je .Lxts_dec_two
2316 pxor %xmm0,%xmm14
2317 cmpq $0x40,%rdx
2318 jb .Lxts_dec_three
2319 je .Lxts_dec_four
2321 movdqu (%rdi),%xmm2
2322 movdqu 16(%rdi),%xmm3
2323 movdqu 32(%rdi),%xmm4
2324 pxor %xmm10,%xmm2
2325 movdqu 48(%rdi),%xmm5
2326 pxor %xmm11,%xmm3
2327 movdqu 64(%rdi),%xmm6
2328 leaq 80(%rdi),%rdi
2329 pxor %xmm12,%xmm4
2330 pxor %xmm13,%xmm5
2331 pxor %xmm14,%xmm6
2333 call _aesni_decrypt6
2335 xorps %xmm10,%xmm2
2336 xorps %xmm11,%xmm3
2337 xorps %xmm12,%xmm4
2338 movdqu %xmm2,(%rsi)
2339 xorps %xmm13,%xmm5
2340 movdqu %xmm3,16(%rsi)
2341 xorps %xmm14,%xmm6
2342 movdqu %xmm4,32(%rsi)
2343 pxor %xmm14,%xmm14
2344 movdqu %xmm5,48(%rsi)
2345 pcmpgtd %xmm15,%xmm14
2346 movdqu %xmm6,64(%rsi)
2347 leaq 80(%rsi),%rsi
2348 pshufd $0x13,%xmm14,%xmm11
2349 andq $15,%r9
2350 jz .Lxts_dec_ret
2352 movdqa %xmm15,%xmm10
2353 paddq %xmm15,%xmm15
2354 pand %xmm8,%xmm11
2355 pxor %xmm15,%xmm11
2356 jmp .Lxts_dec_done2
2358 .align 16
2359 .Lxts_dec_one:
2360 movups (%rdi),%xmm2
2361 leaq 16(%rdi),%rdi
2362 xorps %xmm10,%xmm2
2363 movups (%rcx),%xmm0
2364 movups 16(%rcx),%xmm1
2365 leaq 32(%rcx),%rcx
2366 xorps %xmm0,%xmm2
2367 .Loop_dec1_12:
2368 .byte 102,15,56,222,209
2369 decl %eax
2370 movups (%rcx),%xmm1
2371 leaq 16(%rcx),%rcx
2372 jnz .Loop_dec1_12
2373 .byte 102,15,56,223,209
2374 xorps %xmm10,%xmm2
2375 movdqa %xmm11,%xmm10
2376 movups %xmm2,(%rsi)
2377 movdqa %xmm12,%xmm11
2378 leaq 16(%rsi),%rsi
2379 jmp .Lxts_dec_done
2381 .align 16
2382 .Lxts_dec_two:
2383 movups (%rdi),%xmm2
2384 movups 16(%rdi),%xmm3
2385 leaq 32(%rdi),%rdi
2386 xorps %xmm10,%xmm2
2387 xorps %xmm11,%xmm3
2389 call _aesni_decrypt2
2391 xorps %xmm10,%xmm2
2392 movdqa %xmm12,%xmm10
2393 xorps %xmm11,%xmm3
2394 movdqa %xmm13,%xmm11
2395 movups %xmm2,(%rsi)
2396 movups %xmm3,16(%rsi)
2397 leaq 32(%rsi),%rsi
2398 jmp .Lxts_dec_done
2400 .align 16
2401 .Lxts_dec_three:
2402 movups (%rdi),%xmm2
2403 movups 16(%rdi),%xmm3
2404 movups 32(%rdi),%xmm4
2405 leaq 48(%rdi),%rdi
2406 xorps %xmm10,%xmm2
2407 xorps %xmm11,%xmm3
2408 xorps %xmm12,%xmm4
2410 call _aesni_decrypt3
2412 xorps %xmm10,%xmm2
2413 movdqa %xmm13,%xmm10
2414 xorps %xmm11,%xmm3
2415 movdqa %xmm14,%xmm11
2416 xorps %xmm12,%xmm4
2417 movups %xmm2,(%rsi)
2418 movups %xmm3,16(%rsi)
2419 movups %xmm4,32(%rsi)
2420 leaq 48(%rsi),%rsi
2421 jmp .Lxts_dec_done
2423 .align 16
2424 .Lxts_dec_four:
2425 movups (%rdi),%xmm2
2426 movups 16(%rdi),%xmm3
2427 movups 32(%rdi),%xmm4
2428 xorps %xmm10,%xmm2
2429 movups 48(%rdi),%xmm5
2430 leaq 64(%rdi),%rdi
2431 xorps %xmm11,%xmm3
2432 xorps %xmm12,%xmm4
2433 xorps %xmm13,%xmm5
2435 call _aesni_decrypt4
2437 pxor %xmm10,%xmm2
2438 movdqa %xmm14,%xmm10
2439 pxor %xmm11,%xmm3
2440 movdqa %xmm15,%xmm11
2441 pxor %xmm12,%xmm4
2442 movdqu %xmm2,(%rsi)
2443 pxor %xmm13,%xmm5
2444 movdqu %xmm3,16(%rsi)
2445 movdqu %xmm4,32(%rsi)
2446 movdqu %xmm5,48(%rsi)
2447 leaq 64(%rsi),%rsi
2448 jmp .Lxts_dec_done
2450 .align 16
2451 .Lxts_dec_done:
2452 andq $15,%r9
2453 jz .Lxts_dec_ret
2454 .Lxts_dec_done2:
2455 movq %r9,%rdx
2456 movq %r11,%rcx
2457 movl %r10d,%eax
2459 movups (%rdi),%xmm2
2460 xorps %xmm11,%xmm2
2461 movups (%rcx),%xmm0
2462 movups 16(%rcx),%xmm1
2463 leaq 32(%rcx),%rcx
2464 xorps %xmm0,%xmm2
2465 .Loop_dec1_13:
2466 .byte 102,15,56,222,209
2467 decl %eax
2468 movups (%rcx),%xmm1
2469 leaq 16(%rcx),%rcx
2470 jnz .Loop_dec1_13
2471 .byte 102,15,56,223,209
2472 xorps %xmm11,%xmm2
2473 movups %xmm2,(%rsi)
2475 .Lxts_dec_steal:
2476 movzbl 16(%rdi),%eax
2477 movzbl (%rsi),%ecx
2478 leaq 1(%rdi),%rdi
2479 movb %al,(%rsi)
2480 movb %cl,16(%rsi)
2481 leaq 1(%rsi),%rsi
2482 subq $1,%rdx
2483 jnz .Lxts_dec_steal
2485 subq %r9,%rsi
2486 movq %r11,%rcx
2487 movl %r10d,%eax
2489 movups (%rsi),%xmm2
2490 xorps %xmm10,%xmm2
2491 movups (%rcx),%xmm0
2492 movups 16(%rcx),%xmm1
2493 leaq 32(%rcx),%rcx
2494 xorps %xmm0,%xmm2
2495 .Loop_dec1_14:
2496 .byte 102,15,56,222,209
2497 decl %eax
2498 movups (%rcx),%xmm1
2499 leaq 16(%rcx),%rcx
2500 jnz .Loop_dec1_14
2501 .byte 102,15,56,223,209
2502 xorps %xmm10,%xmm2
2503 movups %xmm2,(%rsi)
2505 .Lxts_dec_ret:
2506 xorps %xmm0,%xmm0
2507 pxor %xmm1,%xmm1
2508 pxor %xmm2,%xmm2
2509 pxor %xmm3,%xmm3
2510 pxor %xmm4,%xmm4
2511 pxor %xmm5,%xmm5
2512 pxor %xmm6,%xmm6
2513 pxor %xmm7,%xmm7
2514 movaps %xmm0,0(%rsp)
2515 pxor %xmm8,%xmm8
2516 movaps %xmm0,16(%rsp)
2517 pxor %xmm9,%xmm9
2518 movaps %xmm0,32(%rsp)
2519 pxor %xmm10,%xmm10
2520 movaps %xmm0,48(%rsp)
2521 pxor %xmm11,%xmm11
2522 movaps %xmm0,64(%rsp)
2523 pxor %xmm12,%xmm12
2524 movaps %xmm0,80(%rsp)
2525 pxor %xmm13,%xmm13
2526 movaps %xmm0,96(%rsp)
2527 pxor %xmm14,%xmm14
2528 pxor %xmm15,%xmm15
2529 leaq (%rbp),%rsp
2530 popq %rbp
2531 .Lxts_dec_epilogue:
2532 .byte 0xf3,0xc3
2533 .size aesni_xts_decrypt,.-aesni_xts_decrypt
2534 .globl aesni_cbc_encrypt
2535 .type aesni_cbc_encrypt,@function
2536 .align 16
2537 aesni_cbc_encrypt:
2538 testq %rdx,%rdx
2539 jz .Lcbc_ret
2541 movl 240(%rcx),%r10d
2542 movq %rcx,%r11
2543 testl %r9d,%r9d
2544 jz .Lcbc_decrypt
2546 movups (%r8),%xmm2
2547 movl %r10d,%eax
2548 cmpq $16,%rdx
2549 jb .Lcbc_enc_tail
2550 subq $16,%rdx
2551 jmp .Lcbc_enc_loop
2552 .align 16
2553 .Lcbc_enc_loop:
2554 movups (%rdi),%xmm3
2555 leaq 16(%rdi),%rdi
2557 movups (%rcx),%xmm0
2558 movups 16(%rcx),%xmm1
2559 xorps %xmm0,%xmm3
2560 leaq 32(%rcx),%rcx
2561 xorps %xmm3,%xmm2
2562 .Loop_enc1_15:
2563 .byte 102,15,56,220,209
2564 decl %eax
2565 movups (%rcx),%xmm1
2566 leaq 16(%rcx),%rcx
2567 jnz .Loop_enc1_15
2568 .byte 102,15,56,221,209
2569 movl %r10d,%eax
2570 movq %r11,%rcx
2571 movups %xmm2,0(%rsi)
2572 leaq 16(%rsi),%rsi
2573 subq $16,%rdx
2574 jnc .Lcbc_enc_loop
2575 addq $16,%rdx
2576 jnz .Lcbc_enc_tail
2577 pxor %xmm0,%xmm0
2578 pxor %xmm1,%xmm1
2579 movups %xmm2,(%r8)
2580 pxor %xmm2,%xmm2
2581 pxor %xmm3,%xmm3
2582 jmp .Lcbc_ret
2584 .Lcbc_enc_tail:
2585 movq %rdx,%rcx
2586 xchgq %rdi,%rsi
2587 .long 0x9066A4F3
2588 movl $16,%ecx
2589 subq %rdx,%rcx
2590 xorl %eax,%eax
2591 .long 0x9066AAF3
2592 leaq -16(%rdi),%rdi
2593 movl %r10d,%eax
2594 movq %rdi,%rsi
2595 movq %r11,%rcx
2596 xorq %rdx,%rdx
2597 jmp .Lcbc_enc_loop
2599 .align 16
2600 .Lcbc_decrypt:
2601 cmpq $16,%rdx
2602 jne .Lcbc_decrypt_bulk
2606 movdqu (%rdi),%xmm2
2607 movdqu (%r8),%xmm3
2608 movdqa %xmm2,%xmm4
2609 movups (%rcx),%xmm0
2610 movups 16(%rcx),%xmm1
2611 leaq 32(%rcx),%rcx
2612 xorps %xmm0,%xmm2
2613 .Loop_dec1_16:
2614 .byte 102,15,56,222,209
2615 decl %r10d
2616 movups (%rcx),%xmm1
2617 leaq 16(%rcx),%rcx
2618 jnz .Loop_dec1_16
2619 .byte 102,15,56,223,209
2620 pxor %xmm0,%xmm0
2621 pxor %xmm1,%xmm1
2622 movdqu %xmm4,(%r8)
2623 xorps %xmm3,%xmm2
2624 pxor %xmm3,%xmm3
2625 movups %xmm2,(%rsi)
2626 pxor %xmm2,%xmm2
2627 jmp .Lcbc_ret
2628 .align 16
2629 .Lcbc_decrypt_bulk:
2630 leaq (%rsp),%rax
2631 pushq %rbp
2632 subq $16,%rsp
2633 andq $-16,%rsp
2634 leaq -8(%rax),%rbp
2635 movups (%r8),%xmm10
2636 movl %r10d,%eax
2637 cmpq $0x50,%rdx
2638 jbe .Lcbc_dec_tail
2640 movups (%rcx),%xmm0
2641 movdqu 0(%rdi),%xmm2
2642 movdqu 16(%rdi),%xmm3
2643 movdqa %xmm2,%xmm11
2644 movdqu 32(%rdi),%xmm4
2645 movdqa %xmm3,%xmm12
2646 movdqu 48(%rdi),%xmm5
2647 movdqa %xmm4,%xmm13
2648 movdqu 64(%rdi),%xmm6
2649 movdqa %xmm5,%xmm14
2650 movdqu 80(%rdi),%xmm7
2651 movdqa %xmm6,%xmm15
2652 movl OPENSSL_ia32cap_P+4(%rip),%r9d
2653 cmpq $0x70,%rdx
2654 jbe .Lcbc_dec_six_or_seven
2656 andl $71303168,%r9d
2657 subq $0x50,%rdx
2658 cmpl $4194304,%r9d
2659 je .Lcbc_dec_loop6_enter
2660 subq $0x20,%rdx
2661 leaq 112(%rcx),%rcx
2662 jmp .Lcbc_dec_loop8_enter
2663 .align 16
2664 .Lcbc_dec_loop8:
2665 movups %xmm9,(%rsi)
2666 leaq 16(%rsi),%rsi
2667 .Lcbc_dec_loop8_enter:
2668 movdqu 96(%rdi),%xmm8
2669 pxor %xmm0,%xmm2
2670 movdqu 112(%rdi),%xmm9
2671 pxor %xmm0,%xmm3
2672 movups 16-112(%rcx),%xmm1
2673 pxor %xmm0,%xmm4
2674 xorq %r11,%r11
2675 cmpq $0x70,%rdx
2676 pxor %xmm0,%xmm5
2677 pxor %xmm0,%xmm6
2678 pxor %xmm0,%xmm7
2679 pxor %xmm0,%xmm8
2681 .byte 102,15,56,222,209
2682 pxor %xmm0,%xmm9
2683 movups 32-112(%rcx),%xmm0
2684 .byte 102,15,56,222,217
2685 .byte 102,15,56,222,225
2686 .byte 102,15,56,222,233
2687 .byte 102,15,56,222,241
2688 .byte 102,15,56,222,249
2689 .byte 102,68,15,56,222,193
2690 setnc %r11b
2691 shlq $7,%r11
2692 .byte 102,68,15,56,222,201
2693 addq %rdi,%r11
2694 movups 48-112(%rcx),%xmm1
2695 .byte 102,15,56,222,208
2696 .byte 102,15,56,222,216
2697 .byte 102,15,56,222,224
2698 .byte 102,15,56,222,232
2699 .byte 102,15,56,222,240
2700 .byte 102,15,56,222,248
2701 .byte 102,68,15,56,222,192
2702 .byte 102,68,15,56,222,200
2703 movups 64-112(%rcx),%xmm0
2705 .byte 102,15,56,222,209
2706 .byte 102,15,56,222,217
2707 .byte 102,15,56,222,225
2708 .byte 102,15,56,222,233
2709 .byte 102,15,56,222,241
2710 .byte 102,15,56,222,249
2711 .byte 102,68,15,56,222,193
2712 .byte 102,68,15,56,222,201
2713 movups 80-112(%rcx),%xmm1
2715 .byte 102,15,56,222,208
2716 .byte 102,15,56,222,216
2717 .byte 102,15,56,222,224
2718 .byte 102,15,56,222,232
2719 .byte 102,15,56,222,240
2720 .byte 102,15,56,222,248
2721 .byte 102,68,15,56,222,192
2722 .byte 102,68,15,56,222,200
2723 movups 96-112(%rcx),%xmm0
2725 .byte 102,15,56,222,209
2726 .byte 102,15,56,222,217
2727 .byte 102,15,56,222,225
2728 .byte 102,15,56,222,233
2729 .byte 102,15,56,222,241
2730 .byte 102,15,56,222,249
2731 .byte 102,68,15,56,222,193
2732 .byte 102,68,15,56,222,201
2733 movups 112-112(%rcx),%xmm1
2735 .byte 102,15,56,222,208
2736 .byte 102,15,56,222,216
2737 .byte 102,15,56,222,224
2738 .byte 102,15,56,222,232
2739 .byte 102,15,56,222,240
2740 .byte 102,15,56,222,248
2741 .byte 102,68,15,56,222,192
2742 .byte 102,68,15,56,222,200
2743 movups 128-112(%rcx),%xmm0
2745 .byte 102,15,56,222,209
2746 .byte 102,15,56,222,217
2747 .byte 102,15,56,222,225
2748 .byte 102,15,56,222,233
2749 .byte 102,15,56,222,241
2750 .byte 102,15,56,222,249
2751 .byte 102,68,15,56,222,193
2752 .byte 102,68,15,56,222,201
2753 movups 144-112(%rcx),%xmm1
2754 cmpl $11,%eax
2755 .byte 102,15,56,222,208
2756 .byte 102,15,56,222,216
2757 .byte 102,15,56,222,224
2758 .byte 102,15,56,222,232
2759 .byte 102,15,56,222,240
2760 .byte 102,15,56,222,248
2761 .byte 102,68,15,56,222,192
2762 .byte 102,68,15,56,222,200
2763 movups 160-112(%rcx),%xmm0
2764 jb .Lcbc_dec_done
2765 .byte 102,15,56,222,209
2766 .byte 102,15,56,222,217
2767 .byte 102,15,56,222,225
2768 .byte 102,15,56,222,233
2769 .byte 102,15,56,222,241
2770 .byte 102,15,56,222,249
2771 .byte 102,68,15,56,222,193
2772 .byte 102,68,15,56,222,201
2773 movups 176-112(%rcx),%xmm1
2775 .byte 102,15,56,222,208
2776 .byte 102,15,56,222,216
2777 .byte 102,15,56,222,224
2778 .byte 102,15,56,222,232
2779 .byte 102,15,56,222,240
2780 .byte 102,15,56,222,248
2781 .byte 102,68,15,56,222,192
2782 .byte 102,68,15,56,222,200
2783 movups 192-112(%rcx),%xmm0
2784 je .Lcbc_dec_done
2785 .byte 102,15,56,222,209
2786 .byte 102,15,56,222,217
2787 .byte 102,15,56,222,225
2788 .byte 102,15,56,222,233
2789 .byte 102,15,56,222,241
2790 .byte 102,15,56,222,249
2791 .byte 102,68,15,56,222,193
2792 .byte 102,68,15,56,222,201
2793 movups 208-112(%rcx),%xmm1
2795 .byte 102,15,56,222,208
2796 .byte 102,15,56,222,216
2797 .byte 102,15,56,222,224
2798 .byte 102,15,56,222,232
2799 .byte 102,15,56,222,240
2800 .byte 102,15,56,222,248
2801 .byte 102,68,15,56,222,192
2802 .byte 102,68,15,56,222,200
2803 movups 224-112(%rcx),%xmm0
2804 jmp .Lcbc_dec_done
2805 .align 16
2806 .Lcbc_dec_done:
2807 .byte 102,15,56,222,209
2808 .byte 102,15,56,222,217
2809 pxor %xmm0,%xmm10
2810 pxor %xmm0,%xmm11
2811 .byte 102,15,56,222,225
2812 .byte 102,15,56,222,233
2813 pxor %xmm0,%xmm12
2814 pxor %xmm0,%xmm13
2815 .byte 102,15,56,222,241
2816 .byte 102,15,56,222,249
2817 pxor %xmm0,%xmm14
2818 pxor %xmm0,%xmm15
2819 .byte 102,68,15,56,222,193
2820 .byte 102,68,15,56,222,201
2821 movdqu 80(%rdi),%xmm1
2823 .byte 102,65,15,56,223,210
2824 movdqu 96(%rdi),%xmm10
2825 pxor %xmm0,%xmm1
2826 .byte 102,65,15,56,223,219
2827 pxor %xmm0,%xmm10
2828 movdqu 112(%rdi),%xmm0
2829 .byte 102,65,15,56,223,228
2830 leaq 128(%rdi),%rdi
2831 movdqu 0(%r11),%xmm11
2832 .byte 102,65,15,56,223,237
2833 .byte 102,65,15,56,223,246
2834 movdqu 16(%r11),%xmm12
2835 movdqu 32(%r11),%xmm13
2836 .byte 102,65,15,56,223,255
2837 .byte 102,68,15,56,223,193
2838 movdqu 48(%r11),%xmm14
2839 movdqu 64(%r11),%xmm15
2840 .byte 102,69,15,56,223,202
2841 movdqa %xmm0,%xmm10
2842 movdqu 80(%r11),%xmm1
2843 movups -112(%rcx),%xmm0
2845 movups %xmm2,(%rsi)
2846 movdqa %xmm11,%xmm2
2847 movups %xmm3,16(%rsi)
2848 movdqa %xmm12,%xmm3
2849 movups %xmm4,32(%rsi)
2850 movdqa %xmm13,%xmm4
2851 movups %xmm5,48(%rsi)
2852 movdqa %xmm14,%xmm5
2853 movups %xmm6,64(%rsi)
2854 movdqa %xmm15,%xmm6
2855 movups %xmm7,80(%rsi)
2856 movdqa %xmm1,%xmm7
2857 movups %xmm8,96(%rsi)
2858 leaq 112(%rsi),%rsi
2860 subq $0x80,%rdx
2861 ja .Lcbc_dec_loop8
2863 movaps %xmm9,%xmm2
2864 leaq -112(%rcx),%rcx
2865 addq $0x70,%rdx
2866 jle .Lcbc_dec_clear_tail_collected
2867 movups %xmm9,(%rsi)
2868 leaq 16(%rsi),%rsi
2869 cmpq $0x50,%rdx
2870 jbe .Lcbc_dec_tail
2872 movaps %xmm11,%xmm2
2873 .Lcbc_dec_six_or_seven:
2874 cmpq $0x60,%rdx
2875 ja .Lcbc_dec_seven
2877 movaps %xmm7,%xmm8
2878 call _aesni_decrypt6
2879 pxor %xmm10,%xmm2
2880 movaps %xmm8,%xmm10
2881 pxor %xmm11,%xmm3
2882 movdqu %xmm2,(%rsi)
2883 pxor %xmm12,%xmm4
2884 movdqu %xmm3,16(%rsi)
2885 pxor %xmm3,%xmm3
2886 pxor %xmm13,%xmm5
2887 movdqu %xmm4,32(%rsi)
2888 pxor %xmm4,%xmm4
2889 pxor %xmm14,%xmm6
2890 movdqu %xmm5,48(%rsi)
2891 pxor %xmm5,%xmm5
2892 pxor %xmm15,%xmm7
2893 movdqu %xmm6,64(%rsi)
2894 pxor %xmm6,%xmm6
2895 leaq 80(%rsi),%rsi
2896 movdqa %xmm7,%xmm2
2897 pxor %xmm7,%xmm7
2898 jmp .Lcbc_dec_tail_collected
2900 .align 16
2901 .Lcbc_dec_seven:
2902 movups 96(%rdi),%xmm8
2903 xorps %xmm9,%xmm9
2904 call _aesni_decrypt8
2905 movups 80(%rdi),%xmm9
2906 pxor %xmm10,%xmm2
2907 movups 96(%rdi),%xmm10
2908 pxor %xmm11,%xmm3
2909 movdqu %xmm2,(%rsi)
2910 pxor %xmm12,%xmm4
2911 movdqu %xmm3,16(%rsi)
2912 pxor %xmm3,%xmm3
2913 pxor %xmm13,%xmm5
2914 movdqu %xmm4,32(%rsi)
2915 pxor %xmm4,%xmm4
2916 pxor %xmm14,%xmm6
2917 movdqu %xmm5,48(%rsi)
2918 pxor %xmm5,%xmm5
2919 pxor %xmm15,%xmm7
2920 movdqu %xmm6,64(%rsi)
2921 pxor %xmm6,%xmm6
2922 pxor %xmm9,%xmm8
2923 movdqu %xmm7,80(%rsi)
2924 pxor %xmm7,%xmm7
2925 leaq 96(%rsi),%rsi
2926 movdqa %xmm8,%xmm2
2927 pxor %xmm8,%xmm8
2928 pxor %xmm9,%xmm9
2929 jmp .Lcbc_dec_tail_collected
2931 .align 16
2932 .Lcbc_dec_loop6:
2933 movups %xmm7,(%rsi)
2934 leaq 16(%rsi),%rsi
2935 movdqu 0(%rdi),%xmm2
2936 movdqu 16(%rdi),%xmm3
2937 movdqa %xmm2,%xmm11
2938 movdqu 32(%rdi),%xmm4
2939 movdqa %xmm3,%xmm12
2940 movdqu 48(%rdi),%xmm5
2941 movdqa %xmm4,%xmm13
2942 movdqu 64(%rdi),%xmm6
2943 movdqa %xmm5,%xmm14
2944 movdqu 80(%rdi),%xmm7
2945 movdqa %xmm6,%xmm15
2946 .Lcbc_dec_loop6_enter:
2947 leaq 96(%rdi),%rdi
2948 movdqa %xmm7,%xmm8
2950 call _aesni_decrypt6
2952 pxor %xmm10,%xmm2
2953 movdqa %xmm8,%xmm10
2954 pxor %xmm11,%xmm3
2955 movdqu %xmm2,(%rsi)
2956 pxor %xmm12,%xmm4
2957 movdqu %xmm3,16(%rsi)
2958 pxor %xmm13,%xmm5
2959 movdqu %xmm4,32(%rsi)
2960 pxor %xmm14,%xmm6
2961 movq %r11,%rcx
2962 movdqu %xmm5,48(%rsi)
2963 pxor %xmm15,%xmm7
2964 movl %r10d,%eax
2965 movdqu %xmm6,64(%rsi)
2966 leaq 80(%rsi),%rsi
2967 subq $0x60,%rdx
2968 ja .Lcbc_dec_loop6
2970 movdqa %xmm7,%xmm2
2971 addq $0x50,%rdx
2972 jle .Lcbc_dec_clear_tail_collected
2973 movups %xmm7,(%rsi)
2974 leaq 16(%rsi),%rsi
2976 .Lcbc_dec_tail:
2977 movups (%rdi),%xmm2
2978 subq $0x10,%rdx
2979 jbe .Lcbc_dec_one
2981 movups 16(%rdi),%xmm3
2982 movaps %xmm2,%xmm11
2983 subq $0x10,%rdx
2984 jbe .Lcbc_dec_two
2986 movups 32(%rdi),%xmm4
2987 movaps %xmm3,%xmm12
2988 subq $0x10,%rdx
2989 jbe .Lcbc_dec_three
2991 movups 48(%rdi),%xmm5
2992 movaps %xmm4,%xmm13
2993 subq $0x10,%rdx
2994 jbe .Lcbc_dec_four
2996 movups 64(%rdi),%xmm6
2997 movaps %xmm5,%xmm14
2998 movaps %xmm6,%xmm15
2999 xorps %xmm7,%xmm7
3000 call _aesni_decrypt6
3001 pxor %xmm10,%xmm2
3002 movaps %xmm15,%xmm10
3003 pxor %xmm11,%xmm3
3004 movdqu %xmm2,(%rsi)
3005 pxor %xmm12,%xmm4
3006 movdqu %xmm3,16(%rsi)
3007 pxor %xmm3,%xmm3
3008 pxor %xmm13,%xmm5
3009 movdqu %xmm4,32(%rsi)
3010 pxor %xmm4,%xmm4
3011 pxor %xmm14,%xmm6
3012 movdqu %xmm5,48(%rsi)
3013 pxor %xmm5,%xmm5
3014 leaq 64(%rsi),%rsi
3015 movdqa %xmm6,%xmm2
3016 pxor %xmm6,%xmm6
3017 pxor %xmm7,%xmm7
3018 subq $0x10,%rdx
3019 jmp .Lcbc_dec_tail_collected
3021 .align 16
3022 .Lcbc_dec_one:
3023 movaps %xmm2,%xmm11
3024 movups (%rcx),%xmm0
3025 movups 16(%rcx),%xmm1
3026 leaq 32(%rcx),%rcx
3027 xorps %xmm0,%xmm2
3028 .Loop_dec1_17:
3029 .byte 102,15,56,222,209
3030 decl %eax
3031 movups (%rcx),%xmm1
3032 leaq 16(%rcx),%rcx
3033 jnz .Loop_dec1_17
3034 .byte 102,15,56,223,209
3035 xorps %xmm10,%xmm2
3036 movaps %xmm11,%xmm10
3037 jmp .Lcbc_dec_tail_collected
3038 .align 16
3039 .Lcbc_dec_two:
3040 movaps %xmm3,%xmm12
3041 call _aesni_decrypt2
3042 pxor %xmm10,%xmm2
3043 movaps %xmm12,%xmm10
3044 pxor %xmm11,%xmm3
3045 movdqu %xmm2,(%rsi)
3046 movdqa %xmm3,%xmm2
3047 pxor %xmm3,%xmm3
3048 leaq 16(%rsi),%rsi
3049 jmp .Lcbc_dec_tail_collected
3050 .align 16
3051 .Lcbc_dec_three:
3052 movaps %xmm4,%xmm13
3053 call _aesni_decrypt3
3054 pxor %xmm10,%xmm2
3055 movaps %xmm13,%xmm10
3056 pxor %xmm11,%xmm3
3057 movdqu %xmm2,(%rsi)
3058 pxor %xmm12,%xmm4
3059 movdqu %xmm3,16(%rsi)
3060 pxor %xmm3,%xmm3
3061 movdqa %xmm4,%xmm2
3062 pxor %xmm4,%xmm4
3063 leaq 32(%rsi),%rsi
3064 jmp .Lcbc_dec_tail_collected
3065 .align 16
3066 .Lcbc_dec_four:
3067 movaps %xmm5,%xmm14
3068 call _aesni_decrypt4
3069 pxor %xmm10,%xmm2
3070 movaps %xmm14,%xmm10
3071 pxor %xmm11,%xmm3
3072 movdqu %xmm2,(%rsi)
3073 pxor %xmm12,%xmm4
3074 movdqu %xmm3,16(%rsi)
3075 pxor %xmm3,%xmm3
3076 pxor %xmm13,%xmm5
3077 movdqu %xmm4,32(%rsi)
3078 pxor %xmm4,%xmm4
3079 movdqa %xmm5,%xmm2
3080 pxor %xmm5,%xmm5
3081 leaq 48(%rsi),%rsi
3082 jmp .Lcbc_dec_tail_collected
3084 .align 16
3085 .Lcbc_dec_clear_tail_collected:
3086 pxor %xmm3,%xmm3
3087 pxor %xmm4,%xmm4
3088 pxor %xmm5,%xmm5
3089 pxor %xmm6,%xmm6
3090 pxor %xmm7,%xmm7
3091 pxor %xmm8,%xmm8
3092 pxor %xmm9,%xmm9
3093 .Lcbc_dec_tail_collected:
3094 movups %xmm10,(%r8)
3095 andq $15,%rdx
3096 jnz .Lcbc_dec_tail_partial
3097 movups %xmm2,(%rsi)
3098 pxor %xmm2,%xmm2
3099 jmp .Lcbc_dec_ret
3100 .align 16
3101 .Lcbc_dec_tail_partial:
3102 movaps %xmm2,(%rsp)
3103 pxor %xmm2,%xmm2
3104 movq $16,%rcx
3105 movq %rsi,%rdi
3106 subq %rdx,%rcx
3107 leaq (%rsp),%rsi
3108 .long 0x9066A4F3
3109 movdqa %xmm2,(%rsp)
3111 .Lcbc_dec_ret:
3112 xorps %xmm0,%xmm0
3113 pxor %xmm1,%xmm1
3114 leaq (%rbp),%rsp
3115 popq %rbp
3116 .Lcbc_ret:
3117 .byte 0xf3,0xc3
3118 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt
3119 .globl aesni_set_decrypt_key
3120 .type aesni_set_decrypt_key,@function
3121 .align 16
3122 aesni_set_decrypt_key:
3123 .byte 0x48,0x83,0xEC,0x08
3124 call __aesni_set_encrypt_key
3125 shll $4,%esi
3126 testl %eax,%eax
3127 jnz .Ldec_key_ret
3128 leaq 16(%rdx,%rsi,1),%rdi
3130 movups (%rdx),%xmm0
3131 movups (%rdi),%xmm1
3132 movups %xmm0,(%rdi)
3133 movups %xmm1,(%rdx)
3134 leaq 16(%rdx),%rdx
3135 leaq -16(%rdi),%rdi
3137 .Ldec_key_inverse:
3138 movups (%rdx),%xmm0
3139 movups (%rdi),%xmm1
3140 .byte 102,15,56,219,192
3141 .byte 102,15,56,219,201
3142 leaq 16(%rdx),%rdx
3143 leaq -16(%rdi),%rdi
3144 movups %xmm0,16(%rdi)
3145 movups %xmm1,-16(%rdx)
3146 cmpq %rdx,%rdi
3147 ja .Ldec_key_inverse
3149 movups (%rdx),%xmm0
3150 .byte 102,15,56,219,192
3151 pxor %xmm1,%xmm1
3152 movups %xmm0,(%rdi)
3153 pxor %xmm0,%xmm0
3154 .Ldec_key_ret:
3155 addq $8,%rsp
3156 .byte 0xf3,0xc3
3157 .LSEH_end_set_decrypt_key:
3158 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key
3159 .globl aesni_set_encrypt_key
3160 .type aesni_set_encrypt_key,@function
3161 .align 16
3162 aesni_set_encrypt_key:
3163 __aesni_set_encrypt_key:
3164 .byte 0x48,0x83,0xEC,0x08
3165 movq $-1,%rax
3166 testq %rdi,%rdi
3167 jz .Lenc_key_ret
3168 testq %rdx,%rdx
3169 jz .Lenc_key_ret
3171 movl $268437504,%r10d
3172 movups (%rdi),%xmm0
3173 xorps %xmm4,%xmm4
3174 andl OPENSSL_ia32cap_P+4(%rip),%r10d
3175 leaq 16(%rdx),%rax
3176 cmpl $256,%esi
3177 je .L14rounds
3178 cmpl $192,%esi
3179 je .L12rounds
3180 cmpl $128,%esi
3181 jne .Lbad_keybits
3183 .L10rounds:
3184 movl $9,%esi
3185 cmpl $268435456,%r10d
3186 je .L10rounds_alt
3188 movups %xmm0,(%rdx)
3189 .byte 102,15,58,223,200,1
3190 call .Lkey_expansion_128_cold
3191 .byte 102,15,58,223,200,2
3192 call .Lkey_expansion_128
3193 .byte 102,15,58,223,200,4
3194 call .Lkey_expansion_128
3195 .byte 102,15,58,223,200,8
3196 call .Lkey_expansion_128
3197 .byte 102,15,58,223,200,16
3198 call .Lkey_expansion_128
3199 .byte 102,15,58,223,200,32
3200 call .Lkey_expansion_128
3201 .byte 102,15,58,223,200,64
3202 call .Lkey_expansion_128
3203 .byte 102,15,58,223,200,128
3204 call .Lkey_expansion_128
3205 .byte 102,15,58,223,200,27
3206 call .Lkey_expansion_128
3207 .byte 102,15,58,223,200,54
3208 call .Lkey_expansion_128
3209 movups %xmm0,(%rax)
3210 movl %esi,80(%rax)
3211 xorl %eax,%eax
3212 jmp .Lenc_key_ret
3214 .align 16
3215 .L10rounds_alt:
3216 movdqa .Lkey_rotate(%rip),%xmm5
3217 movl $8,%r10d
3218 movdqa .Lkey_rcon1(%rip),%xmm4
3219 movdqa %xmm0,%xmm2
3220 movdqu %xmm0,(%rdx)
3221 jmp .Loop_key128
3223 .align 16
3224 .Loop_key128:
3225 .byte 102,15,56,0,197
3226 .byte 102,15,56,221,196
3227 pslld $1,%xmm4
3228 leaq 16(%rax),%rax
3230 movdqa %xmm2,%xmm3
3231 pslldq $4,%xmm2
3232 pxor %xmm2,%xmm3
3233 pslldq $4,%xmm2
3234 pxor %xmm2,%xmm3
3235 pslldq $4,%xmm2
3236 pxor %xmm3,%xmm2
3238 pxor %xmm2,%xmm0
3239 movdqu %xmm0,-16(%rax)
3240 movdqa %xmm0,%xmm2
3242 decl %r10d
3243 jnz .Loop_key128
3245 movdqa .Lkey_rcon1b(%rip),%xmm4
3247 .byte 102,15,56,0,197
3248 .byte 102,15,56,221,196
3249 pslld $1,%xmm4
3251 movdqa %xmm2,%xmm3
3252 pslldq $4,%xmm2
3253 pxor %xmm2,%xmm3
3254 pslldq $4,%xmm2
3255 pxor %xmm2,%xmm3
3256 pslldq $4,%xmm2
3257 pxor %xmm3,%xmm2
3259 pxor %xmm2,%xmm0
3260 movdqu %xmm0,(%rax)
3262 movdqa %xmm0,%xmm2
3263 .byte 102,15,56,0,197
3264 .byte 102,15,56,221,196
3266 movdqa %xmm2,%xmm3
3267 pslldq $4,%xmm2
3268 pxor %xmm2,%xmm3
3269 pslldq $4,%xmm2
3270 pxor %xmm2,%xmm3
3271 pslldq $4,%xmm2
3272 pxor %xmm3,%xmm2
3274 pxor %xmm2,%xmm0
3275 movdqu %xmm0,16(%rax)
3277 movl %esi,96(%rax)
3278 xorl %eax,%eax
3279 jmp .Lenc_key_ret
3281 .align 16
3282 .L12rounds:
3283 movq 16(%rdi),%xmm2
3284 movl $11,%esi
3285 cmpl $268435456,%r10d
3286 je .L12rounds_alt
3288 movups %xmm0,(%rdx)
3289 .byte 102,15,58,223,202,1
3290 call .Lkey_expansion_192a_cold
3291 .byte 102,15,58,223,202,2
3292 call .Lkey_expansion_192b
3293 .byte 102,15,58,223,202,4
3294 call .Lkey_expansion_192a
3295 .byte 102,15,58,223,202,8
3296 call .Lkey_expansion_192b
3297 .byte 102,15,58,223,202,16
3298 call .Lkey_expansion_192a
3299 .byte 102,15,58,223,202,32
3300 call .Lkey_expansion_192b
3301 .byte 102,15,58,223,202,64
3302 call .Lkey_expansion_192a
3303 .byte 102,15,58,223,202,128
3304 call .Lkey_expansion_192b
3305 movups %xmm0,(%rax)
3306 movl %esi,48(%rax)
3307 xorq %rax,%rax
3308 jmp .Lenc_key_ret
3310 .align 16
3311 .L12rounds_alt:
3312 movdqa .Lkey_rotate192(%rip),%xmm5
3313 movdqa .Lkey_rcon1(%rip),%xmm4
3314 movl $8,%r10d
3315 movdqu %xmm0,(%rdx)
3316 jmp .Loop_key192
3318 .align 16
3319 .Loop_key192:
3320 movq %xmm2,0(%rax)
3321 movdqa %xmm2,%xmm1
3322 .byte 102,15,56,0,213
3323 .byte 102,15,56,221,212
3324 pslld $1,%xmm4
3325 leaq 24(%rax),%rax
3327 movdqa %xmm0,%xmm3
3328 pslldq $4,%xmm0
3329 pxor %xmm0,%xmm3
3330 pslldq $4,%xmm0
3331 pxor %xmm0,%xmm3
3332 pslldq $4,%xmm0
3333 pxor %xmm3,%xmm0
3335 pshufd $0xff,%xmm0,%xmm3
3336 pxor %xmm1,%xmm3
3337 pslldq $4,%xmm1
3338 pxor %xmm1,%xmm3
3340 pxor %xmm2,%xmm0
3341 pxor %xmm3,%xmm2
3342 movdqu %xmm0,-16(%rax)
3344 decl %r10d
3345 jnz .Loop_key192
3347 movl %esi,32(%rax)
3348 xorl %eax,%eax
3349 jmp .Lenc_key_ret
3351 .align 16
3352 .L14rounds:
3353 movups 16(%rdi),%xmm2
3354 movl $13,%esi
3355 leaq 16(%rax),%rax
3356 cmpl $268435456,%r10d
3357 je .L14rounds_alt
3359 movups %xmm0,(%rdx)
3360 movups %xmm2,16(%rdx)
3361 .byte 102,15,58,223,202,1
3362 call .Lkey_expansion_256a_cold
3363 .byte 102,15,58,223,200,1
3364 call .Lkey_expansion_256b
3365 .byte 102,15,58,223,202,2
3366 call .Lkey_expansion_256a
3367 .byte 102,15,58,223,200,2
3368 call .Lkey_expansion_256b
3369 .byte 102,15,58,223,202,4
3370 call .Lkey_expansion_256a
3371 .byte 102,15,58,223,200,4
3372 call .Lkey_expansion_256b
3373 .byte 102,15,58,223,202,8
3374 call .Lkey_expansion_256a
3375 .byte 102,15,58,223,200,8
3376 call .Lkey_expansion_256b
3377 .byte 102,15,58,223,202,16
3378 call .Lkey_expansion_256a
3379 .byte 102,15,58,223,200,16
3380 call .Lkey_expansion_256b
3381 .byte 102,15,58,223,202,32
3382 call .Lkey_expansion_256a
3383 .byte 102,15,58,223,200,32
3384 call .Lkey_expansion_256b
3385 .byte 102,15,58,223,202,64
3386 call .Lkey_expansion_256a
3387 movups %xmm0,(%rax)
3388 movl %esi,16(%rax)
3389 xorq %rax,%rax
3390 jmp .Lenc_key_ret
3392 .align 16
3393 .L14rounds_alt:
3394 movdqa .Lkey_rotate(%rip),%xmm5
3395 movdqa .Lkey_rcon1(%rip),%xmm4
3396 movl $7,%r10d
3397 movdqu %xmm0,0(%rdx)
3398 movdqa %xmm2,%xmm1
3399 movdqu %xmm2,16(%rdx)
3400 jmp .Loop_key256
3402 .align 16
3403 .Loop_key256:
3404 .byte 102,15,56,0,213
3405 .byte 102,15,56,221,212
3407 movdqa %xmm0,%xmm3
3408 pslldq $4,%xmm0
3409 pxor %xmm0,%xmm3
3410 pslldq $4,%xmm0
3411 pxor %xmm0,%xmm3
3412 pslldq $4,%xmm0
3413 pxor %xmm3,%xmm0
3414 pslld $1,%xmm4
3416 pxor %xmm2,%xmm0
3417 movdqu %xmm0,(%rax)
3419 decl %r10d
3420 jz .Ldone_key256
3422 pshufd $0xff,%xmm0,%xmm2
3423 pxor %xmm3,%xmm3
3424 .byte 102,15,56,221,211
3426 movdqa %xmm1,%xmm3
3427 pslldq $4,%xmm1
3428 pxor %xmm1,%xmm3
3429 pslldq $4,%xmm1
3430 pxor %xmm1,%xmm3
3431 pslldq $4,%xmm1
3432 pxor %xmm3,%xmm1
3434 pxor %xmm1,%xmm2
3435 movdqu %xmm2,16(%rax)
3436 leaq 32(%rax),%rax
3437 movdqa %xmm2,%xmm1
3439 jmp .Loop_key256
3441 .Ldone_key256:
3442 movl %esi,16(%rax)
3443 xorl %eax,%eax
3444 jmp .Lenc_key_ret
3446 .align 16
3447 .Lbad_keybits:
3448 movq $-2,%rax
3449 .Lenc_key_ret:
3450 pxor %xmm0,%xmm0
3451 pxor %xmm1,%xmm1
3452 pxor %xmm2,%xmm2
3453 pxor %xmm3,%xmm3
3454 pxor %xmm4,%xmm4
3455 pxor %xmm5,%xmm5
3456 addq $8,%rsp
3457 .byte 0xf3,0xc3
3458 .LSEH_end_set_encrypt_key:
3460 .align 16
3461 .Lkey_expansion_128:
3462 movups %xmm0,(%rax)
3463 leaq 16(%rax),%rax
3464 .Lkey_expansion_128_cold:
3465 shufps $16,%xmm0,%xmm4
3466 xorps %xmm4,%xmm0
3467 shufps $140,%xmm0,%xmm4
3468 xorps %xmm4,%xmm0
3469 shufps $255,%xmm1,%xmm1
3470 xorps %xmm1,%xmm0
3471 .byte 0xf3,0xc3
3473 .align 16
3474 .Lkey_expansion_192a:
3475 movups %xmm0,(%rax)
3476 leaq 16(%rax),%rax
3477 .Lkey_expansion_192a_cold:
3478 movaps %xmm2,%xmm5
3479 .Lkey_expansion_192b_warm:
3480 shufps $16,%xmm0,%xmm4
3481 movdqa %xmm2,%xmm3
3482 xorps %xmm4,%xmm0
3483 shufps $140,%xmm0,%xmm4
3484 pslldq $4,%xmm3
3485 xorps %xmm4,%xmm0
3486 pshufd $85,%xmm1,%xmm1
3487 pxor %xmm3,%xmm2
3488 pxor %xmm1,%xmm0
3489 pshufd $255,%xmm0,%xmm3
3490 pxor %xmm3,%xmm2
3491 .byte 0xf3,0xc3
3493 .align 16
3494 .Lkey_expansion_192b:
3495 movaps %xmm0,%xmm3
3496 shufps $68,%xmm0,%xmm5
3497 movups %xmm5,(%rax)
3498 shufps $78,%xmm2,%xmm3
3499 movups %xmm3,16(%rax)
3500 leaq 32(%rax),%rax
3501 jmp .Lkey_expansion_192b_warm
3503 .align 16
3504 .Lkey_expansion_256a:
3505 movups %xmm2,(%rax)
3506 leaq 16(%rax),%rax
3507 .Lkey_expansion_256a_cold:
3508 shufps $16,%xmm0,%xmm4
3509 xorps %xmm4,%xmm0
3510 shufps $140,%xmm0,%xmm4
3511 xorps %xmm4,%xmm0
3512 shufps $255,%xmm1,%xmm1
3513 xorps %xmm1,%xmm0
3514 .byte 0xf3,0xc3
3516 .align 16
3517 .Lkey_expansion_256b:
3518 movups %xmm0,(%rax)
3519 leaq 16(%rax),%rax
3521 shufps $16,%xmm2,%xmm4
3522 xorps %xmm4,%xmm2
3523 shufps $140,%xmm2,%xmm4
3524 xorps %xmm4,%xmm2
3525 shufps $170,%xmm1,%xmm1
3526 xorps %xmm1,%xmm2
3527 .byte 0xf3,0xc3
3528 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key
3529 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
3530 .align 64
3531 .Lbswap_mask:
3532 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
3533 .Lincrement32:
3534 .long 6,6,6,0
3535 .Lincrement64:
3536 .long 1,0,0,0
3537 .Lxts_magic:
3538 .long 0x87,0,1,0
3539 .Lincrement1:
3540 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3541 .Lkey_rotate:
3542 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
3543 .Lkey_rotate192:
3544 .long 0x04070605,0x04070605,0x04070605,0x04070605
3545 .Lkey_rcon1:
3546 .long 1,1,1,1
3547 .Lkey_rcon1b:
3548 .long 0x1b,0x1b,0x1b,0x1b
3550 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
3551 .align 64
3552 .section .note.GNU-stack,"",%progbits