updated licenses
[gnutls.git] / lib / accelerated / x86 / coff / appro-aes-x86-coff.s
blobd00c02f2859bf19d2f4fbdede1852053f6bcf0ea
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # * Redistributions of source code must retain copyright notices,
9 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # * Neither the name of the Andy Polyakov nor the names of its
17 # copyright holder and contributors may be used to endorse or
18 # promote products derived from this software without specific
19 # prior written permission.
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # *** This file is auto-generated ***
40 .file "devel/perlasm/aesni-x86.s"
41 .text
42 .globl _aesni_encrypt
43 .def _aesni_encrypt; .scl 2; .type 32; .endef
44 .align 16
45 _aesni_encrypt:
46 .L_aesni_encrypt_begin:
47 movl 4(%esp),%eax
48 movl 12(%esp),%edx
49 movups (%eax),%xmm2
50 movl 240(%edx),%ecx
51 movl 8(%esp),%eax
52 movups (%edx),%xmm0
53 movups 16(%edx),%xmm1
54 leal 32(%edx),%edx
55 xorps %xmm0,%xmm2
56 .L000enc1_loop_1:
57 .byte 102,15,56,220,209
58 decl %ecx
59 movups (%edx),%xmm1
60 leal 16(%edx),%edx
61 jnz .L000enc1_loop_1
62 .byte 102,15,56,221,209
63 movups %xmm2,(%eax)
64 ret
65 .globl _aesni_decrypt
66 .def _aesni_decrypt; .scl 2; .type 32; .endef
67 .align 16
68 _aesni_decrypt:
69 .L_aesni_decrypt_begin:
70 movl 4(%esp),%eax
71 movl 12(%esp),%edx
72 movups (%eax),%xmm2
73 movl 240(%edx),%ecx
74 movl 8(%esp),%eax
75 movups (%edx),%xmm0
76 movups 16(%edx),%xmm1
77 leal 32(%edx),%edx
78 xorps %xmm0,%xmm2
79 .L001dec1_loop_2:
80 .byte 102,15,56,222,209
81 decl %ecx
82 movups (%edx),%xmm1
83 leal 16(%edx),%edx
84 jnz .L001dec1_loop_2
85 .byte 102,15,56,223,209
86 movups %xmm2,(%eax)
87 ret
88 .def __aesni_encrypt3; .scl 3; .type 32; .endef
89 .align 16
90 __aesni_encrypt3:
91 movups (%edx),%xmm0
92 shrl $1,%ecx
93 movups 16(%edx),%xmm1
94 leal 32(%edx),%edx
95 xorps %xmm0,%xmm2
96 pxor %xmm0,%xmm3
97 pxor %xmm0,%xmm4
98 movups (%edx),%xmm0
99 .L002enc3_loop:
100 .byte 102,15,56,220,209
101 .byte 102,15,56,220,217
102 decl %ecx
103 .byte 102,15,56,220,225
104 movups 16(%edx),%xmm1
105 .byte 102,15,56,220,208
106 .byte 102,15,56,220,216
107 leal 32(%edx),%edx
108 .byte 102,15,56,220,224
109 movups (%edx),%xmm0
110 jnz .L002enc3_loop
111 .byte 102,15,56,220,209
112 .byte 102,15,56,220,217
113 .byte 102,15,56,220,225
114 .byte 102,15,56,221,208
115 .byte 102,15,56,221,216
116 .byte 102,15,56,221,224
118 .def __aesni_decrypt3; .scl 3; .type 32; .endef
119 .align 16
120 __aesni_decrypt3:
121 movups (%edx),%xmm0
122 shrl $1,%ecx
123 movups 16(%edx),%xmm1
124 leal 32(%edx),%edx
125 xorps %xmm0,%xmm2
126 pxor %xmm0,%xmm3
127 pxor %xmm0,%xmm4
128 movups (%edx),%xmm0
129 .L003dec3_loop:
130 .byte 102,15,56,222,209
131 .byte 102,15,56,222,217
132 decl %ecx
133 .byte 102,15,56,222,225
134 movups 16(%edx),%xmm1
135 .byte 102,15,56,222,208
136 .byte 102,15,56,222,216
137 leal 32(%edx),%edx
138 .byte 102,15,56,222,224
139 movups (%edx),%xmm0
140 jnz .L003dec3_loop
141 .byte 102,15,56,222,209
142 .byte 102,15,56,222,217
143 .byte 102,15,56,222,225
144 .byte 102,15,56,223,208
145 .byte 102,15,56,223,216
146 .byte 102,15,56,223,224
148 .def __aesni_encrypt4; .scl 3; .type 32; .endef
149 .align 16
150 __aesni_encrypt4:
151 movups (%edx),%xmm0
152 movups 16(%edx),%xmm1
153 shrl $1,%ecx
154 leal 32(%edx),%edx
155 xorps %xmm0,%xmm2
156 pxor %xmm0,%xmm3
157 pxor %xmm0,%xmm4
158 pxor %xmm0,%xmm5
159 movups (%edx),%xmm0
160 .L004enc4_loop:
161 .byte 102,15,56,220,209
162 .byte 102,15,56,220,217
163 decl %ecx
164 .byte 102,15,56,220,225
165 .byte 102,15,56,220,233
166 movups 16(%edx),%xmm1
167 .byte 102,15,56,220,208
168 .byte 102,15,56,220,216
169 leal 32(%edx),%edx
170 .byte 102,15,56,220,224
171 .byte 102,15,56,220,232
172 movups (%edx),%xmm0
173 jnz .L004enc4_loop
174 .byte 102,15,56,220,209
175 .byte 102,15,56,220,217
176 .byte 102,15,56,220,225
177 .byte 102,15,56,220,233
178 .byte 102,15,56,221,208
179 .byte 102,15,56,221,216
180 .byte 102,15,56,221,224
181 .byte 102,15,56,221,232
183 .def __aesni_decrypt4; .scl 3; .type 32; .endef
184 .align 16
185 __aesni_decrypt4:
186 movups (%edx),%xmm0
187 movups 16(%edx),%xmm1
188 shrl $1,%ecx
189 leal 32(%edx),%edx
190 xorps %xmm0,%xmm2
191 pxor %xmm0,%xmm3
192 pxor %xmm0,%xmm4
193 pxor %xmm0,%xmm5
194 movups (%edx),%xmm0
195 .L005dec4_loop:
196 .byte 102,15,56,222,209
197 .byte 102,15,56,222,217
198 decl %ecx
199 .byte 102,15,56,222,225
200 .byte 102,15,56,222,233
201 movups 16(%edx),%xmm1
202 .byte 102,15,56,222,208
203 .byte 102,15,56,222,216
204 leal 32(%edx),%edx
205 .byte 102,15,56,222,224
206 .byte 102,15,56,222,232
207 movups (%edx),%xmm0
208 jnz .L005dec4_loop
209 .byte 102,15,56,222,209
210 .byte 102,15,56,222,217
211 .byte 102,15,56,222,225
212 .byte 102,15,56,222,233
213 .byte 102,15,56,223,208
214 .byte 102,15,56,223,216
215 .byte 102,15,56,223,224
216 .byte 102,15,56,223,232
218 .def __aesni_encrypt6; .scl 3; .type 32; .endef
219 .align 16
220 __aesni_encrypt6:
221 movups (%edx),%xmm0
222 shrl $1,%ecx
223 movups 16(%edx),%xmm1
224 leal 32(%edx),%edx
225 xorps %xmm0,%xmm2
226 pxor %xmm0,%xmm3
227 .byte 102,15,56,220,209
228 pxor %xmm0,%xmm4
229 .byte 102,15,56,220,217
230 pxor %xmm0,%xmm5
231 decl %ecx
232 .byte 102,15,56,220,225
233 pxor %xmm0,%xmm6
234 .byte 102,15,56,220,233
235 pxor %xmm0,%xmm7
236 .byte 102,15,56,220,241
237 movups (%edx),%xmm0
238 .byte 102,15,56,220,249
239 jmp .L_aesni_encrypt6_enter
240 .align 16
241 .L006enc6_loop:
242 .byte 102,15,56,220,209
243 .byte 102,15,56,220,217
244 decl %ecx
245 .byte 102,15,56,220,225
246 .byte 102,15,56,220,233
247 .byte 102,15,56,220,241
248 .byte 102,15,56,220,249
249 .align 16
250 .L_aesni_encrypt6_enter:
251 movups 16(%edx),%xmm1
252 .byte 102,15,56,220,208
253 .byte 102,15,56,220,216
254 leal 32(%edx),%edx
255 .byte 102,15,56,220,224
256 .byte 102,15,56,220,232
257 .byte 102,15,56,220,240
258 .byte 102,15,56,220,248
259 movups (%edx),%xmm0
260 jnz .L006enc6_loop
261 .byte 102,15,56,220,209
262 .byte 102,15,56,220,217
263 .byte 102,15,56,220,225
264 .byte 102,15,56,220,233
265 .byte 102,15,56,220,241
266 .byte 102,15,56,220,249
267 .byte 102,15,56,221,208
268 .byte 102,15,56,221,216
269 .byte 102,15,56,221,224
270 .byte 102,15,56,221,232
271 .byte 102,15,56,221,240
272 .byte 102,15,56,221,248
274 .def __aesni_decrypt6; .scl 3; .type 32; .endef
275 .align 16
276 __aesni_decrypt6:
277 movups (%edx),%xmm0
278 shrl $1,%ecx
279 movups 16(%edx),%xmm1
280 leal 32(%edx),%edx
281 xorps %xmm0,%xmm2
282 pxor %xmm0,%xmm3
283 .byte 102,15,56,222,209
284 pxor %xmm0,%xmm4
285 .byte 102,15,56,222,217
286 pxor %xmm0,%xmm5
287 decl %ecx
288 .byte 102,15,56,222,225
289 pxor %xmm0,%xmm6
290 .byte 102,15,56,222,233
291 pxor %xmm0,%xmm7
292 .byte 102,15,56,222,241
293 movups (%edx),%xmm0
294 .byte 102,15,56,222,249
295 jmp .L_aesni_decrypt6_enter
296 .align 16
297 .L007dec6_loop:
298 .byte 102,15,56,222,209
299 .byte 102,15,56,222,217
300 decl %ecx
301 .byte 102,15,56,222,225
302 .byte 102,15,56,222,233
303 .byte 102,15,56,222,241
304 .byte 102,15,56,222,249
305 .align 16
306 .L_aesni_decrypt6_enter:
307 movups 16(%edx),%xmm1
308 .byte 102,15,56,222,208
309 .byte 102,15,56,222,216
310 leal 32(%edx),%edx
311 .byte 102,15,56,222,224
312 .byte 102,15,56,222,232
313 .byte 102,15,56,222,240
314 .byte 102,15,56,222,248
315 movups (%edx),%xmm0
316 jnz .L007dec6_loop
317 .byte 102,15,56,222,209
318 .byte 102,15,56,222,217
319 .byte 102,15,56,222,225
320 .byte 102,15,56,222,233
321 .byte 102,15,56,222,241
322 .byte 102,15,56,222,249
323 .byte 102,15,56,223,208
324 .byte 102,15,56,223,216
325 .byte 102,15,56,223,224
326 .byte 102,15,56,223,232
327 .byte 102,15,56,223,240
328 .byte 102,15,56,223,248
330 .globl _aesni_ecb_encrypt
331 .def _aesni_ecb_encrypt; .scl 2; .type 32; .endef
332 .align 16
333 _aesni_ecb_encrypt:
334 .L_aesni_ecb_encrypt_begin:
335 pushl %ebp
336 pushl %ebx
337 pushl %esi
338 pushl %edi
339 movl 20(%esp),%esi
340 movl 24(%esp),%edi
341 movl 28(%esp),%eax
342 movl 32(%esp),%edx
343 movl 36(%esp),%ebx
344 andl $-16,%eax
345 jz .L008ecb_ret
346 movl 240(%edx),%ecx
347 testl %ebx,%ebx
348 jz .L009ecb_decrypt
349 movl %edx,%ebp
350 movl %ecx,%ebx
351 cmpl $96,%eax
352 jb .L010ecb_enc_tail
353 movdqu (%esi),%xmm2
354 movdqu 16(%esi),%xmm3
355 movdqu 32(%esi),%xmm4
356 movdqu 48(%esi),%xmm5
357 movdqu 64(%esi),%xmm6
358 movdqu 80(%esi),%xmm7
359 leal 96(%esi),%esi
360 subl $96,%eax
361 jmp .L011ecb_enc_loop6_enter
362 .align 16
363 .L012ecb_enc_loop6:
364 movups %xmm2,(%edi)
365 movdqu (%esi),%xmm2
366 movups %xmm3,16(%edi)
367 movdqu 16(%esi),%xmm3
368 movups %xmm4,32(%edi)
369 movdqu 32(%esi),%xmm4
370 movups %xmm5,48(%edi)
371 movdqu 48(%esi),%xmm5
372 movups %xmm6,64(%edi)
373 movdqu 64(%esi),%xmm6
374 movups %xmm7,80(%edi)
375 leal 96(%edi),%edi
376 movdqu 80(%esi),%xmm7
377 leal 96(%esi),%esi
378 .L011ecb_enc_loop6_enter:
379 call __aesni_encrypt6
380 movl %ebp,%edx
381 movl %ebx,%ecx
382 subl $96,%eax
383 jnc .L012ecb_enc_loop6
384 movups %xmm2,(%edi)
385 movups %xmm3,16(%edi)
386 movups %xmm4,32(%edi)
387 movups %xmm5,48(%edi)
388 movups %xmm6,64(%edi)
389 movups %xmm7,80(%edi)
390 leal 96(%edi),%edi
391 addl $96,%eax
392 jz .L008ecb_ret
393 .L010ecb_enc_tail:
394 movups (%esi),%xmm2
395 cmpl $32,%eax
396 jb .L013ecb_enc_one
397 movups 16(%esi),%xmm3
398 je .L014ecb_enc_two
399 movups 32(%esi),%xmm4
400 cmpl $64,%eax
401 jb .L015ecb_enc_three
402 movups 48(%esi),%xmm5
403 je .L016ecb_enc_four
404 movups 64(%esi),%xmm6
405 xorps %xmm7,%xmm7
406 call __aesni_encrypt6
407 movups %xmm2,(%edi)
408 movups %xmm3,16(%edi)
409 movups %xmm4,32(%edi)
410 movups %xmm5,48(%edi)
411 movups %xmm6,64(%edi)
412 jmp .L008ecb_ret
413 .align 16
414 .L013ecb_enc_one:
415 movups (%edx),%xmm0
416 movups 16(%edx),%xmm1
417 leal 32(%edx),%edx
418 xorps %xmm0,%xmm2
419 .L017enc1_loop_3:
420 .byte 102,15,56,220,209
421 decl %ecx
422 movups (%edx),%xmm1
423 leal 16(%edx),%edx
424 jnz .L017enc1_loop_3
425 .byte 102,15,56,221,209
426 movups %xmm2,(%edi)
427 jmp .L008ecb_ret
428 .align 16
429 .L014ecb_enc_two:
430 xorps %xmm4,%xmm4
431 call __aesni_encrypt3
432 movups %xmm2,(%edi)
433 movups %xmm3,16(%edi)
434 jmp .L008ecb_ret
435 .align 16
436 .L015ecb_enc_three:
437 call __aesni_encrypt3
438 movups %xmm2,(%edi)
439 movups %xmm3,16(%edi)
440 movups %xmm4,32(%edi)
441 jmp .L008ecb_ret
442 .align 16
443 .L016ecb_enc_four:
444 call __aesni_encrypt4
445 movups %xmm2,(%edi)
446 movups %xmm3,16(%edi)
447 movups %xmm4,32(%edi)
448 movups %xmm5,48(%edi)
449 jmp .L008ecb_ret
450 .align 16
451 .L009ecb_decrypt:
452 movl %edx,%ebp
453 movl %ecx,%ebx
454 cmpl $96,%eax
455 jb .L018ecb_dec_tail
456 movdqu (%esi),%xmm2
457 movdqu 16(%esi),%xmm3
458 movdqu 32(%esi),%xmm4
459 movdqu 48(%esi),%xmm5
460 movdqu 64(%esi),%xmm6
461 movdqu 80(%esi),%xmm7
462 leal 96(%esi),%esi
463 subl $96,%eax
464 jmp .L019ecb_dec_loop6_enter
465 .align 16
466 .L020ecb_dec_loop6:
467 movups %xmm2,(%edi)
468 movdqu (%esi),%xmm2
469 movups %xmm3,16(%edi)
470 movdqu 16(%esi),%xmm3
471 movups %xmm4,32(%edi)
472 movdqu 32(%esi),%xmm4
473 movups %xmm5,48(%edi)
474 movdqu 48(%esi),%xmm5
475 movups %xmm6,64(%edi)
476 movdqu 64(%esi),%xmm6
477 movups %xmm7,80(%edi)
478 leal 96(%edi),%edi
479 movdqu 80(%esi),%xmm7
480 leal 96(%esi),%esi
481 .L019ecb_dec_loop6_enter:
482 call __aesni_decrypt6
483 movl %ebp,%edx
484 movl %ebx,%ecx
485 subl $96,%eax
486 jnc .L020ecb_dec_loop6
487 movups %xmm2,(%edi)
488 movups %xmm3,16(%edi)
489 movups %xmm4,32(%edi)
490 movups %xmm5,48(%edi)
491 movups %xmm6,64(%edi)
492 movups %xmm7,80(%edi)
493 leal 96(%edi),%edi
494 addl $96,%eax
495 jz .L008ecb_ret
496 .L018ecb_dec_tail:
497 movups (%esi),%xmm2
498 cmpl $32,%eax
499 jb .L021ecb_dec_one
500 movups 16(%esi),%xmm3
501 je .L022ecb_dec_two
502 movups 32(%esi),%xmm4
503 cmpl $64,%eax
504 jb .L023ecb_dec_three
505 movups 48(%esi),%xmm5
506 je .L024ecb_dec_four
507 movups 64(%esi),%xmm6
508 xorps %xmm7,%xmm7
509 call __aesni_decrypt6
510 movups %xmm2,(%edi)
511 movups %xmm3,16(%edi)
512 movups %xmm4,32(%edi)
513 movups %xmm5,48(%edi)
514 movups %xmm6,64(%edi)
515 jmp .L008ecb_ret
516 .align 16
517 .L021ecb_dec_one:
518 movups (%edx),%xmm0
519 movups 16(%edx),%xmm1
520 leal 32(%edx),%edx
521 xorps %xmm0,%xmm2
522 .L025dec1_loop_4:
523 .byte 102,15,56,222,209
524 decl %ecx
525 movups (%edx),%xmm1
526 leal 16(%edx),%edx
527 jnz .L025dec1_loop_4
528 .byte 102,15,56,223,209
529 movups %xmm2,(%edi)
530 jmp .L008ecb_ret
531 .align 16
532 .L022ecb_dec_two:
533 xorps %xmm4,%xmm4
534 call __aesni_decrypt3
535 movups %xmm2,(%edi)
536 movups %xmm3,16(%edi)
537 jmp .L008ecb_ret
538 .align 16
539 .L023ecb_dec_three:
540 call __aesni_decrypt3
541 movups %xmm2,(%edi)
542 movups %xmm3,16(%edi)
543 movups %xmm4,32(%edi)
544 jmp .L008ecb_ret
545 .align 16
546 .L024ecb_dec_four:
547 call __aesni_decrypt4
548 movups %xmm2,(%edi)
549 movups %xmm3,16(%edi)
550 movups %xmm4,32(%edi)
551 movups %xmm5,48(%edi)
552 .L008ecb_ret:
553 popl %edi
554 popl %esi
555 popl %ebx
556 popl %ebp
558 .globl _aesni_ccm64_encrypt_blocks
559 .def _aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef
560 .align 16
561 _aesni_ccm64_encrypt_blocks:
562 .L_aesni_ccm64_encrypt_blocks_begin:
563 pushl %ebp
564 pushl %ebx
565 pushl %esi
566 pushl %edi
567 movl 20(%esp),%esi
568 movl 24(%esp),%edi
569 movl 28(%esp),%eax
570 movl 32(%esp),%edx
571 movl 36(%esp),%ebx
572 movl 40(%esp),%ecx
573 movl %esp,%ebp
574 subl $60,%esp
575 andl $-16,%esp
576 movl %ebp,48(%esp)
577 movdqu (%ebx),%xmm7
578 movdqu (%ecx),%xmm3
579 movl 240(%edx),%ecx
580 movl $202182159,(%esp)
581 movl $134810123,4(%esp)
582 movl $67438087,8(%esp)
583 movl $66051,12(%esp)
584 movl $1,%ebx
585 xorl %ebp,%ebp
586 movl %ebx,16(%esp)
587 movl %ebp,20(%esp)
588 movl %ebp,24(%esp)
589 movl %ebp,28(%esp)
590 shrl $1,%ecx
591 leal (%edx),%ebp
592 movdqa (%esp),%xmm5
593 movdqa %xmm7,%xmm2
594 movl %ecx,%ebx
595 .byte 102,15,56,0,253
596 .L026ccm64_enc_outer:
597 movups (%ebp),%xmm0
598 movl %ebx,%ecx
599 movups (%esi),%xmm6
600 xorps %xmm0,%xmm2
601 movups 16(%ebp),%xmm1
602 xorps %xmm6,%xmm0
603 leal 32(%ebp),%edx
604 xorps %xmm0,%xmm3
605 movups (%edx),%xmm0
606 .L027ccm64_enc2_loop:
607 .byte 102,15,56,220,209
608 decl %ecx
609 .byte 102,15,56,220,217
610 movups 16(%edx),%xmm1
611 .byte 102,15,56,220,208
612 leal 32(%edx),%edx
613 .byte 102,15,56,220,216
614 movups (%edx),%xmm0
615 jnz .L027ccm64_enc2_loop
616 .byte 102,15,56,220,209
617 .byte 102,15,56,220,217
618 paddq 16(%esp),%xmm7
619 .byte 102,15,56,221,208
620 .byte 102,15,56,221,216
621 decl %eax
622 leal 16(%esi),%esi
623 xorps %xmm2,%xmm6
624 movdqa %xmm7,%xmm2
625 movups %xmm6,(%edi)
626 leal 16(%edi),%edi
627 .byte 102,15,56,0,213
628 jnz .L026ccm64_enc_outer
629 movl 48(%esp),%esp
630 movl 40(%esp),%edi
631 movups %xmm3,(%edi)
632 popl %edi
633 popl %esi
634 popl %ebx
635 popl %ebp
637 .globl _aesni_ccm64_decrypt_blocks
638 .def _aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef
639 .align 16
640 _aesni_ccm64_decrypt_blocks:
641 .L_aesni_ccm64_decrypt_blocks_begin:
642 pushl %ebp
643 pushl %ebx
644 pushl %esi
645 pushl %edi
646 movl 20(%esp),%esi
647 movl 24(%esp),%edi
648 movl 28(%esp),%eax
649 movl 32(%esp),%edx
650 movl 36(%esp),%ebx
651 movl 40(%esp),%ecx
652 movl %esp,%ebp
653 subl $60,%esp
654 andl $-16,%esp
655 movl %ebp,48(%esp)
656 movdqu (%ebx),%xmm7
657 movdqu (%ecx),%xmm3
658 movl 240(%edx),%ecx
659 movl $202182159,(%esp)
660 movl $134810123,4(%esp)
661 movl $67438087,8(%esp)
662 movl $66051,12(%esp)
663 movl $1,%ebx
664 xorl %ebp,%ebp
665 movl %ebx,16(%esp)
666 movl %ebp,20(%esp)
667 movl %ebp,24(%esp)
668 movl %ebp,28(%esp)
669 movdqa (%esp),%xmm5
670 movdqa %xmm7,%xmm2
671 movl %edx,%ebp
672 movl %ecx,%ebx
673 .byte 102,15,56,0,253
674 movups (%edx),%xmm0
675 movups 16(%edx),%xmm1
676 leal 32(%edx),%edx
677 xorps %xmm0,%xmm2
678 .L028enc1_loop_5:
679 .byte 102,15,56,220,209
680 decl %ecx
681 movups (%edx),%xmm1
682 leal 16(%edx),%edx
683 jnz .L028enc1_loop_5
684 .byte 102,15,56,221,209
685 movups (%esi),%xmm6
686 paddq 16(%esp),%xmm7
687 leal 16(%esi),%esi
688 jmp .L029ccm64_dec_outer
689 .align 16
690 .L029ccm64_dec_outer:
691 xorps %xmm2,%xmm6
692 movdqa %xmm7,%xmm2
693 movl %ebx,%ecx
694 movups %xmm6,(%edi)
695 leal 16(%edi),%edi
696 .byte 102,15,56,0,213
697 subl $1,%eax
698 jz .L030ccm64_dec_break
699 movups (%ebp),%xmm0
700 shrl $1,%ecx
701 movups 16(%ebp),%xmm1
702 xorps %xmm0,%xmm6
703 leal 32(%ebp),%edx
704 xorps %xmm0,%xmm2
705 xorps %xmm6,%xmm3
706 movups (%edx),%xmm0
707 .L031ccm64_dec2_loop:
708 .byte 102,15,56,220,209
709 decl %ecx
710 .byte 102,15,56,220,217
711 movups 16(%edx),%xmm1
712 .byte 102,15,56,220,208
713 leal 32(%edx),%edx
714 .byte 102,15,56,220,216
715 movups (%edx),%xmm0
716 jnz .L031ccm64_dec2_loop
717 movups (%esi),%xmm6
718 paddq 16(%esp),%xmm7
719 .byte 102,15,56,220,209
720 .byte 102,15,56,220,217
721 leal 16(%esi),%esi
722 .byte 102,15,56,221,208
723 .byte 102,15,56,221,216
724 jmp .L029ccm64_dec_outer
725 .align 16
726 .L030ccm64_dec_break:
727 movl %ebp,%edx
728 movups (%edx),%xmm0
729 movups 16(%edx),%xmm1
730 xorps %xmm0,%xmm6
731 leal 32(%edx),%edx
732 xorps %xmm6,%xmm3
733 .L032enc1_loop_6:
734 .byte 102,15,56,220,217
735 decl %ecx
736 movups (%edx),%xmm1
737 leal 16(%edx),%edx
738 jnz .L032enc1_loop_6
739 .byte 102,15,56,221,217
740 movl 48(%esp),%esp
741 movl 40(%esp),%edi
742 movups %xmm3,(%edi)
743 popl %edi
744 popl %esi
745 popl %ebx
746 popl %ebp
748 .globl _aesni_ctr32_encrypt_blocks
749 .def _aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
750 .align 16
751 _aesni_ctr32_encrypt_blocks:
752 .L_aesni_ctr32_encrypt_blocks_begin:
753 pushl %ebp
754 pushl %ebx
755 pushl %esi
756 pushl %edi
757 movl 20(%esp),%esi
758 movl 24(%esp),%edi
759 movl 28(%esp),%eax
760 movl 32(%esp),%edx
761 movl 36(%esp),%ebx
762 movl %esp,%ebp
763 subl $88,%esp
764 andl $-16,%esp
765 movl %ebp,80(%esp)
766 cmpl $1,%eax
767 je .L033ctr32_one_shortcut
768 movdqu (%ebx),%xmm7
769 movl $202182159,(%esp)
770 movl $134810123,4(%esp)
771 movl $67438087,8(%esp)
772 movl $66051,12(%esp)
773 movl $6,%ecx
774 xorl %ebp,%ebp
775 movl %ecx,16(%esp)
776 movl %ecx,20(%esp)
777 movl %ecx,24(%esp)
778 movl %ebp,28(%esp)
779 .byte 102,15,58,22,251,3
780 .byte 102,15,58,34,253,3
781 movl 240(%edx),%ecx
782 bswap %ebx
783 pxor %xmm1,%xmm1
784 pxor %xmm0,%xmm0
785 movdqa (%esp),%xmm2
786 .byte 102,15,58,34,203,0
787 leal 3(%ebx),%ebp
788 .byte 102,15,58,34,197,0
789 incl %ebx
790 .byte 102,15,58,34,203,1
791 incl %ebp
792 .byte 102,15,58,34,197,1
793 incl %ebx
794 .byte 102,15,58,34,203,2
795 incl %ebp
796 .byte 102,15,58,34,197,2
797 movdqa %xmm1,48(%esp)
798 .byte 102,15,56,0,202
799 movdqa %xmm0,64(%esp)
800 .byte 102,15,56,0,194
801 pshufd $192,%xmm1,%xmm2
802 pshufd $128,%xmm1,%xmm3
803 cmpl $6,%eax
804 jb .L034ctr32_tail
805 movdqa %xmm7,32(%esp)
806 shrl $1,%ecx
807 movl %edx,%ebp
808 movl %ecx,%ebx
809 subl $6,%eax
810 jmp .L035ctr32_loop6
811 .align 16
812 .L035ctr32_loop6:
813 pshufd $64,%xmm1,%xmm4
814 movdqa 32(%esp),%xmm1
815 pshufd $192,%xmm0,%xmm5
816 por %xmm1,%xmm2
817 pshufd $128,%xmm0,%xmm6
818 por %xmm1,%xmm3
819 pshufd $64,%xmm0,%xmm7
820 por %xmm1,%xmm4
821 por %xmm1,%xmm5
822 por %xmm1,%xmm6
823 por %xmm1,%xmm7
824 movups (%ebp),%xmm0
825 movups 16(%ebp),%xmm1
826 leal 32(%ebp),%edx
827 decl %ecx
828 pxor %xmm0,%xmm2
829 pxor %xmm0,%xmm3
830 .byte 102,15,56,220,209
831 pxor %xmm0,%xmm4
832 .byte 102,15,56,220,217
833 pxor %xmm0,%xmm5
834 .byte 102,15,56,220,225
835 pxor %xmm0,%xmm6
836 .byte 102,15,56,220,233
837 pxor %xmm0,%xmm7
838 .byte 102,15,56,220,241
839 movups (%edx),%xmm0
840 .byte 102,15,56,220,249
841 call .L_aesni_encrypt6_enter
842 movups (%esi),%xmm1
843 movups 16(%esi),%xmm0
844 xorps %xmm1,%xmm2
845 movups 32(%esi),%xmm1
846 xorps %xmm0,%xmm3
847 movups %xmm2,(%edi)
848 movdqa 16(%esp),%xmm0
849 xorps %xmm1,%xmm4
850 movdqa 48(%esp),%xmm1
851 movups %xmm3,16(%edi)
852 movups %xmm4,32(%edi)
853 paddd %xmm0,%xmm1
854 paddd 64(%esp),%xmm0
855 movdqa (%esp),%xmm2
856 movups 48(%esi),%xmm3
857 movups 64(%esi),%xmm4
858 xorps %xmm3,%xmm5
859 movups 80(%esi),%xmm3
860 leal 96(%esi),%esi
861 movdqa %xmm1,48(%esp)
862 .byte 102,15,56,0,202
863 xorps %xmm4,%xmm6
864 movups %xmm5,48(%edi)
865 xorps %xmm3,%xmm7
866 movdqa %xmm0,64(%esp)
867 .byte 102,15,56,0,194
868 movups %xmm6,64(%edi)
869 pshufd $192,%xmm1,%xmm2
870 movups %xmm7,80(%edi)
871 leal 96(%edi),%edi
872 movl %ebx,%ecx
873 pshufd $128,%xmm1,%xmm3
874 subl $6,%eax
875 jnc .L035ctr32_loop6
876 addl $6,%eax
877 jz .L036ctr32_ret
878 movl %ebp,%edx
879 leal 1(,%ecx,2),%ecx
880 movdqa 32(%esp),%xmm7
881 .L034ctr32_tail:
882 por %xmm7,%xmm2
883 cmpl $2,%eax
884 jb .L037ctr32_one
885 pshufd $64,%xmm1,%xmm4
886 por %xmm7,%xmm3
887 je .L038ctr32_two
888 pshufd $192,%xmm0,%xmm5
889 por %xmm7,%xmm4
890 cmpl $4,%eax
891 jb .L039ctr32_three
892 pshufd $128,%xmm0,%xmm6
893 por %xmm7,%xmm5
894 je .L040ctr32_four
895 por %xmm7,%xmm6
896 call __aesni_encrypt6
897 movups (%esi),%xmm1
898 movups 16(%esi),%xmm0
899 xorps %xmm1,%xmm2
900 movups 32(%esi),%xmm1
901 xorps %xmm0,%xmm3
902 movups 48(%esi),%xmm0
903 xorps %xmm1,%xmm4
904 movups 64(%esi),%xmm1
905 xorps %xmm0,%xmm5
906 movups %xmm2,(%edi)
907 xorps %xmm1,%xmm6
908 movups %xmm3,16(%edi)
909 movups %xmm4,32(%edi)
910 movups %xmm5,48(%edi)
911 movups %xmm6,64(%edi)
912 jmp .L036ctr32_ret
913 .align 16
914 .L033ctr32_one_shortcut:
915 movups (%ebx),%xmm2
916 movl 240(%edx),%ecx
917 .L037ctr32_one:
918 movups (%edx),%xmm0
919 movups 16(%edx),%xmm1
920 leal 32(%edx),%edx
921 xorps %xmm0,%xmm2
922 .L041enc1_loop_7:
923 .byte 102,15,56,220,209
924 decl %ecx
925 movups (%edx),%xmm1
926 leal 16(%edx),%edx
927 jnz .L041enc1_loop_7
928 .byte 102,15,56,221,209
929 movups (%esi),%xmm6
930 xorps %xmm2,%xmm6
931 movups %xmm6,(%edi)
932 jmp .L036ctr32_ret
933 .align 16
934 .L038ctr32_two:
935 call __aesni_encrypt3
936 movups (%esi),%xmm5
937 movups 16(%esi),%xmm6
938 xorps %xmm5,%xmm2
939 xorps %xmm6,%xmm3
940 movups %xmm2,(%edi)
941 movups %xmm3,16(%edi)
942 jmp .L036ctr32_ret
943 .align 16
944 .L039ctr32_three:
945 call __aesni_encrypt3
946 movups (%esi),%xmm5
947 movups 16(%esi),%xmm6
948 xorps %xmm5,%xmm2
949 movups 32(%esi),%xmm7
950 xorps %xmm6,%xmm3
951 movups %xmm2,(%edi)
952 xorps %xmm7,%xmm4
953 movups %xmm3,16(%edi)
954 movups %xmm4,32(%edi)
955 jmp .L036ctr32_ret
956 .align 16
957 .L040ctr32_four:
958 call __aesni_encrypt4
959 movups (%esi),%xmm6
960 movups 16(%esi),%xmm7
961 movups 32(%esi),%xmm1
962 xorps %xmm6,%xmm2
963 movups 48(%esi),%xmm0
964 xorps %xmm7,%xmm3
965 movups %xmm2,(%edi)
966 xorps %xmm1,%xmm4
967 movups %xmm3,16(%edi)
968 xorps %xmm0,%xmm5
969 movups %xmm4,32(%edi)
970 movups %xmm5,48(%edi)
971 .L036ctr32_ret:
972 movl 80(%esp),%esp
973 popl %edi
974 popl %esi
975 popl %ebx
976 popl %ebp
978 .globl _aesni_xts_encrypt
979 .def _aesni_xts_encrypt; .scl 2; .type 32; .endef
980 .align 16
981 _aesni_xts_encrypt:
982 .L_aesni_xts_encrypt_begin:
983 pushl %ebp
984 pushl %ebx
985 pushl %esi
986 pushl %edi
987 movl 36(%esp),%edx
988 movl 40(%esp),%esi
989 movl 240(%edx),%ecx
990 movups (%esi),%xmm2
991 movups (%edx),%xmm0
992 movups 16(%edx),%xmm1
993 leal 32(%edx),%edx
994 xorps %xmm0,%xmm2
995 .L042enc1_loop_8:
996 .byte 102,15,56,220,209
997 decl %ecx
998 movups (%edx),%xmm1
999 leal 16(%edx),%edx
1000 jnz .L042enc1_loop_8
1001 .byte 102,15,56,221,209
1002 movl 20(%esp),%esi
1003 movl 24(%esp),%edi
1004 movl 28(%esp),%eax
1005 movl 32(%esp),%edx
1006 movl %esp,%ebp
1007 subl $120,%esp
1008 movl 240(%edx),%ecx
1009 andl $-16,%esp
1010 movl $135,96(%esp)
1011 movl $0,100(%esp)
1012 movl $1,104(%esp)
1013 movl $0,108(%esp)
1014 movl %eax,112(%esp)
1015 movl %ebp,116(%esp)
1016 movdqa %xmm2,%xmm1
1017 pxor %xmm0,%xmm0
1018 movdqa 96(%esp),%xmm3
1019 pcmpgtd %xmm1,%xmm0
1020 andl $-16,%eax
1021 movl %edx,%ebp
1022 movl %ecx,%ebx
1023 subl $96,%eax
1024 jc .L043xts_enc_short
1025 shrl $1,%ecx
1026 movl %ecx,%ebx
1027 jmp .L044xts_enc_loop6
1028 .align 16
1029 .L044xts_enc_loop6:
1030 pshufd $19,%xmm0,%xmm2
1031 pxor %xmm0,%xmm0
1032 movdqa %xmm1,(%esp)
1033 paddq %xmm1,%xmm1
1034 pand %xmm3,%xmm2
1035 pcmpgtd %xmm1,%xmm0
1036 pxor %xmm2,%xmm1
1037 pshufd $19,%xmm0,%xmm2
1038 pxor %xmm0,%xmm0
1039 movdqa %xmm1,16(%esp)
1040 paddq %xmm1,%xmm1
1041 pand %xmm3,%xmm2
1042 pcmpgtd %xmm1,%xmm0
1043 pxor %xmm2,%xmm1
1044 pshufd $19,%xmm0,%xmm2
1045 pxor %xmm0,%xmm0
1046 movdqa %xmm1,32(%esp)
1047 paddq %xmm1,%xmm1
1048 pand %xmm3,%xmm2
1049 pcmpgtd %xmm1,%xmm0
1050 pxor %xmm2,%xmm1
1051 pshufd $19,%xmm0,%xmm2
1052 pxor %xmm0,%xmm0
1053 movdqa %xmm1,48(%esp)
1054 paddq %xmm1,%xmm1
1055 pand %xmm3,%xmm2
1056 pcmpgtd %xmm1,%xmm0
1057 pxor %xmm2,%xmm1
1058 pshufd $19,%xmm0,%xmm7
1059 movdqa %xmm1,64(%esp)
1060 paddq %xmm1,%xmm1
1061 movups (%ebp),%xmm0
1062 pand %xmm3,%xmm7
1063 movups (%esi),%xmm2
1064 pxor %xmm1,%xmm7
1065 movdqu 16(%esi),%xmm3
1066 xorps %xmm0,%xmm2
1067 movdqu 32(%esi),%xmm4
1068 pxor %xmm0,%xmm3
1069 movdqu 48(%esi),%xmm5
1070 pxor %xmm0,%xmm4
1071 movdqu 64(%esi),%xmm6
1072 pxor %xmm0,%xmm5
1073 movdqu 80(%esi),%xmm1
1074 pxor %xmm0,%xmm6
1075 leal 96(%esi),%esi
1076 pxor (%esp),%xmm2
1077 movdqa %xmm7,80(%esp)
1078 pxor %xmm1,%xmm7
1079 movups 16(%ebp),%xmm1
1080 leal 32(%ebp),%edx
1081 pxor 16(%esp),%xmm3
1082 .byte 102,15,56,220,209
1083 pxor 32(%esp),%xmm4
1084 .byte 102,15,56,220,217
1085 pxor 48(%esp),%xmm5
1086 decl %ecx
1087 .byte 102,15,56,220,225
1088 pxor 64(%esp),%xmm6
1089 .byte 102,15,56,220,233
1090 pxor %xmm0,%xmm7
1091 .byte 102,15,56,220,241
1092 movups (%edx),%xmm0
1093 .byte 102,15,56,220,249
1094 call .L_aesni_encrypt6_enter
1095 movdqa 80(%esp),%xmm1
1096 pxor %xmm0,%xmm0
1097 xorps (%esp),%xmm2
1098 pcmpgtd %xmm1,%xmm0
1099 xorps 16(%esp),%xmm3
1100 movups %xmm2,(%edi)
1101 xorps 32(%esp),%xmm4
1102 movups %xmm3,16(%edi)
1103 xorps 48(%esp),%xmm5
1104 movups %xmm4,32(%edi)
1105 xorps 64(%esp),%xmm6
1106 movups %xmm5,48(%edi)
1107 xorps %xmm1,%xmm7
1108 movups %xmm6,64(%edi)
1109 pshufd $19,%xmm0,%xmm2
1110 movups %xmm7,80(%edi)
1111 leal 96(%edi),%edi
1112 movdqa 96(%esp),%xmm3
1113 pxor %xmm0,%xmm0
1114 paddq %xmm1,%xmm1
1115 pand %xmm3,%xmm2
1116 pcmpgtd %xmm1,%xmm0
1117 movl %ebx,%ecx
1118 pxor %xmm2,%xmm1
1119 subl $96,%eax
1120 jnc .L044xts_enc_loop6
1121 leal 1(,%ecx,2),%ecx
1122 movl %ebp,%edx
1123 movl %ecx,%ebx
1124 .L043xts_enc_short:
1125 addl $96,%eax
1126 jz .L045xts_enc_done6x
1127 movdqa %xmm1,%xmm5
1128 cmpl $32,%eax
1129 jb .L046xts_enc_one
1130 pshufd $19,%xmm0,%xmm2
1131 pxor %xmm0,%xmm0
1132 paddq %xmm1,%xmm1
1133 pand %xmm3,%xmm2
1134 pcmpgtd %xmm1,%xmm0
1135 pxor %xmm2,%xmm1
1136 je .L047xts_enc_two
1137 pshufd $19,%xmm0,%xmm2
1138 pxor %xmm0,%xmm0
1139 movdqa %xmm1,%xmm6
1140 paddq %xmm1,%xmm1
1141 pand %xmm3,%xmm2
1142 pcmpgtd %xmm1,%xmm0
1143 pxor %xmm2,%xmm1
1144 cmpl $64,%eax
1145 jb .L048xts_enc_three
1146 pshufd $19,%xmm0,%xmm2
1147 pxor %xmm0,%xmm0
1148 movdqa %xmm1,%xmm7
1149 paddq %xmm1,%xmm1
1150 pand %xmm3,%xmm2
1151 pcmpgtd %xmm1,%xmm0
1152 pxor %xmm2,%xmm1
1153 movdqa %xmm5,(%esp)
1154 movdqa %xmm6,16(%esp)
1155 je .L049xts_enc_four
1156 movdqa %xmm7,32(%esp)
1157 pshufd $19,%xmm0,%xmm7
1158 movdqa %xmm1,48(%esp)
1159 paddq %xmm1,%xmm1
1160 pand %xmm3,%xmm7
1161 pxor %xmm1,%xmm7
1162 movdqu (%esi),%xmm2
1163 movdqu 16(%esi),%xmm3
1164 movdqu 32(%esi),%xmm4
1165 pxor (%esp),%xmm2
1166 movdqu 48(%esi),%xmm5
1167 pxor 16(%esp),%xmm3
1168 movdqu 64(%esi),%xmm6
1169 pxor 32(%esp),%xmm4
1170 leal 80(%esi),%esi
1171 pxor 48(%esp),%xmm5
1172 movdqa %xmm7,64(%esp)
1173 pxor %xmm7,%xmm6
1174 call __aesni_encrypt6
1175 movaps 64(%esp),%xmm1
1176 xorps (%esp),%xmm2
1177 xorps 16(%esp),%xmm3
1178 xorps 32(%esp),%xmm4
1179 movups %xmm2,(%edi)
1180 xorps 48(%esp),%xmm5
1181 movups %xmm3,16(%edi)
1182 xorps %xmm1,%xmm6
1183 movups %xmm4,32(%edi)
1184 movups %xmm5,48(%edi)
1185 movups %xmm6,64(%edi)
1186 leal 80(%edi),%edi
1187 jmp .L050xts_enc_done
1188 .align 16
1189 .L046xts_enc_one:
1190 movups (%esi),%xmm2
1191 leal 16(%esi),%esi
1192 xorps %xmm5,%xmm2
1193 movups (%edx),%xmm0
1194 movups 16(%edx),%xmm1
1195 leal 32(%edx),%edx
1196 xorps %xmm0,%xmm2
1197 .L051enc1_loop_9:
1198 .byte 102,15,56,220,209
1199 decl %ecx
1200 movups (%edx),%xmm1
1201 leal 16(%edx),%edx
1202 jnz .L051enc1_loop_9
1203 .byte 102,15,56,221,209
1204 xorps %xmm5,%xmm2
1205 movups %xmm2,(%edi)
1206 leal 16(%edi),%edi
1207 movdqa %xmm5,%xmm1
1208 jmp .L050xts_enc_done
1209 .align 16
1210 .L047xts_enc_two:
1211 movaps %xmm1,%xmm6
1212 movups (%esi),%xmm2
1213 movups 16(%esi),%xmm3
1214 leal 32(%esi),%esi
1215 xorps %xmm5,%xmm2
1216 xorps %xmm6,%xmm3
1217 xorps %xmm4,%xmm4
1218 call __aesni_encrypt3
1219 xorps %xmm5,%xmm2
1220 xorps %xmm6,%xmm3
1221 movups %xmm2,(%edi)
1222 movups %xmm3,16(%edi)
1223 leal 32(%edi),%edi
1224 movdqa %xmm6,%xmm1
1225 jmp .L050xts_enc_done
1226 .align 16
1227 .L048xts_enc_three:
1228 movaps %xmm1,%xmm7
1229 movups (%esi),%xmm2
1230 movups 16(%esi),%xmm3
1231 movups 32(%esi),%xmm4
1232 leal 48(%esi),%esi
1233 xorps %xmm5,%xmm2
1234 xorps %xmm6,%xmm3
1235 xorps %xmm7,%xmm4
1236 call __aesni_encrypt3
1237 xorps %xmm5,%xmm2
1238 xorps %xmm6,%xmm3
1239 xorps %xmm7,%xmm4
1240 movups %xmm2,(%edi)
1241 movups %xmm3,16(%edi)
1242 movups %xmm4,32(%edi)
1243 leal 48(%edi),%edi
1244 movdqa %xmm7,%xmm1
1245 jmp .L050xts_enc_done
1246 .align 16
1247 .L049xts_enc_four:
1248 movaps %xmm1,%xmm6
1249 movups (%esi),%xmm2
1250 movups 16(%esi),%xmm3
1251 movups 32(%esi),%xmm4
1252 xorps (%esp),%xmm2
1253 movups 48(%esi),%xmm5
1254 leal 64(%esi),%esi
1255 xorps 16(%esp),%xmm3
1256 xorps %xmm7,%xmm4
1257 xorps %xmm6,%xmm5
1258 call __aesni_encrypt4
1259 xorps (%esp),%xmm2
1260 xorps 16(%esp),%xmm3
1261 xorps %xmm7,%xmm4
1262 movups %xmm2,(%edi)
1263 xorps %xmm6,%xmm5
1264 movups %xmm3,16(%edi)
1265 movups %xmm4,32(%edi)
1266 movups %xmm5,48(%edi)
1267 leal 64(%edi),%edi
1268 movdqa %xmm6,%xmm1
1269 jmp .L050xts_enc_done
1270 .align 16
1271 .L045xts_enc_done6x:
1272 movl 112(%esp),%eax
1273 andl $15,%eax
1274 jz .L052xts_enc_ret
1275 movdqa %xmm1,%xmm5
1276 movl %eax,112(%esp)
1277 jmp .L053xts_enc_steal
1278 .align 16
1279 .L050xts_enc_done:
1280 movl 112(%esp),%eax
1281 pxor %xmm0,%xmm0
1282 andl $15,%eax
1283 jz .L052xts_enc_ret
1284 pcmpgtd %xmm1,%xmm0
1285 movl %eax,112(%esp)
1286 pshufd $19,%xmm0,%xmm5
1287 paddq %xmm1,%xmm1
1288 pand 96(%esp),%xmm5
1289 pxor %xmm1,%xmm5
1290 .L053xts_enc_steal:
1291 movzbl (%esi),%ecx
1292 movzbl -16(%edi),%edx
1293 leal 1(%esi),%esi
1294 movb %cl,-16(%edi)
1295 movb %dl,(%edi)
1296 leal 1(%edi),%edi
1297 subl $1,%eax
1298 jnz .L053xts_enc_steal
1299 subl 112(%esp),%edi
1300 movl %ebp,%edx
1301 movl %ebx,%ecx
1302 movups -16(%edi),%xmm2
1303 xorps %xmm5,%xmm2
1304 movups (%edx),%xmm0
1305 movups 16(%edx),%xmm1
1306 leal 32(%edx),%edx
1307 xorps %xmm0,%xmm2
1308 .L054enc1_loop_10:
1309 .byte 102,15,56,220,209
1310 decl %ecx
1311 movups (%edx),%xmm1
1312 leal 16(%edx),%edx
1313 jnz .L054enc1_loop_10
1314 .byte 102,15,56,221,209
1315 xorps %xmm5,%xmm2
1316 movups %xmm2,-16(%edi)
1317 .L052xts_enc_ret:
1318 movl 116(%esp),%esp
1319 popl %edi
1320 popl %esi
1321 popl %ebx
1322 popl %ebp
1324 .globl _aesni_xts_decrypt
1325 .def _aesni_xts_decrypt; .scl 2; .type 32; .endef
1326 .align 16
1327 _aesni_xts_decrypt:
1328 .L_aesni_xts_decrypt_begin:
1329 pushl %ebp
1330 pushl %ebx
1331 pushl %esi
1332 pushl %edi
1333 movl 36(%esp),%edx
1334 movl 40(%esp),%esi
1335 movl 240(%edx),%ecx
1336 movups (%esi),%xmm2
1337 movups (%edx),%xmm0
1338 movups 16(%edx),%xmm1
1339 leal 32(%edx),%edx
1340 xorps %xmm0,%xmm2
1341 .L055enc1_loop_11:
1342 .byte 102,15,56,220,209
1343 decl %ecx
1344 movups (%edx),%xmm1
1345 leal 16(%edx),%edx
1346 jnz .L055enc1_loop_11
1347 .byte 102,15,56,221,209
1348 movl 20(%esp),%esi
1349 movl 24(%esp),%edi
1350 movl 28(%esp),%eax
1351 movl 32(%esp),%edx
1352 movl %esp,%ebp
1353 subl $120,%esp
1354 andl $-16,%esp
1355 xorl %ebx,%ebx
1356 testl $15,%eax
1357 setnz %bl
1358 shll $4,%ebx
1359 subl %ebx,%eax
1360 movl $135,96(%esp)
1361 movl $0,100(%esp)
1362 movl $1,104(%esp)
1363 movl $0,108(%esp)
1364 movl %eax,112(%esp)
1365 movl %ebp,116(%esp)
1366 movl 240(%edx),%ecx
1367 movl %edx,%ebp
1368 movl %ecx,%ebx
1369 movdqa %xmm2,%xmm1
1370 pxor %xmm0,%xmm0
1371 movdqa 96(%esp),%xmm3
1372 pcmpgtd %xmm1,%xmm0
1373 andl $-16,%eax
1374 subl $96,%eax
1375 jc .L056xts_dec_short
1376 shrl $1,%ecx
1377 movl %ecx,%ebx
1378 jmp .L057xts_dec_loop6
1379 .align 16
1380 .L057xts_dec_loop6:
1381 pshufd $19,%xmm0,%xmm2
1382 pxor %xmm0,%xmm0
1383 movdqa %xmm1,(%esp)
1384 paddq %xmm1,%xmm1
1385 pand %xmm3,%xmm2
1386 pcmpgtd %xmm1,%xmm0
1387 pxor %xmm2,%xmm1
1388 pshufd $19,%xmm0,%xmm2
1389 pxor %xmm0,%xmm0
1390 movdqa %xmm1,16(%esp)
1391 paddq %xmm1,%xmm1
1392 pand %xmm3,%xmm2
1393 pcmpgtd %xmm1,%xmm0
1394 pxor %xmm2,%xmm1
1395 pshufd $19,%xmm0,%xmm2
1396 pxor %xmm0,%xmm0
1397 movdqa %xmm1,32(%esp)
1398 paddq %xmm1,%xmm1
1399 pand %xmm3,%xmm2
1400 pcmpgtd %xmm1,%xmm0
1401 pxor %xmm2,%xmm1
1402 pshufd $19,%xmm0,%xmm2
1403 pxor %xmm0,%xmm0
1404 movdqa %xmm1,48(%esp)
1405 paddq %xmm1,%xmm1
1406 pand %xmm3,%xmm2
1407 pcmpgtd %xmm1,%xmm0
1408 pxor %xmm2,%xmm1
1409 pshufd $19,%xmm0,%xmm7
1410 movdqa %xmm1,64(%esp)
1411 paddq %xmm1,%xmm1
1412 movups (%ebp),%xmm0
1413 pand %xmm3,%xmm7
1414 movups (%esi),%xmm2
1415 pxor %xmm1,%xmm7
1416 movdqu 16(%esi),%xmm3
1417 xorps %xmm0,%xmm2
1418 movdqu 32(%esi),%xmm4
1419 pxor %xmm0,%xmm3
1420 movdqu 48(%esi),%xmm5
1421 pxor %xmm0,%xmm4
1422 movdqu 64(%esi),%xmm6
1423 pxor %xmm0,%xmm5
1424 movdqu 80(%esi),%xmm1
1425 pxor %xmm0,%xmm6
1426 leal 96(%esi),%esi
1427 pxor (%esp),%xmm2
1428 movdqa %xmm7,80(%esp)
1429 pxor %xmm1,%xmm7
1430 movups 16(%ebp),%xmm1
1431 leal 32(%ebp),%edx
1432 pxor 16(%esp),%xmm3
1433 .byte 102,15,56,222,209
1434 pxor 32(%esp),%xmm4
1435 .byte 102,15,56,222,217
1436 pxor 48(%esp),%xmm5
1437 decl %ecx
1438 .byte 102,15,56,222,225
1439 pxor 64(%esp),%xmm6
1440 .byte 102,15,56,222,233
1441 pxor %xmm0,%xmm7
1442 .byte 102,15,56,222,241
1443 movups (%edx),%xmm0
1444 .byte 102,15,56,222,249
1445 call .L_aesni_decrypt6_enter
1446 movdqa 80(%esp),%xmm1
1447 pxor %xmm0,%xmm0
1448 xorps (%esp),%xmm2
1449 pcmpgtd %xmm1,%xmm0
1450 xorps 16(%esp),%xmm3
1451 movups %xmm2,(%edi)
1452 xorps 32(%esp),%xmm4
1453 movups %xmm3,16(%edi)
1454 xorps 48(%esp),%xmm5
1455 movups %xmm4,32(%edi)
1456 xorps 64(%esp),%xmm6
1457 movups %xmm5,48(%edi)
1458 xorps %xmm1,%xmm7
1459 movups %xmm6,64(%edi)
1460 pshufd $19,%xmm0,%xmm2
1461 movups %xmm7,80(%edi)
1462 leal 96(%edi),%edi
1463 movdqa 96(%esp),%xmm3
1464 pxor %xmm0,%xmm0
1465 paddq %xmm1,%xmm1
1466 pand %xmm3,%xmm2
1467 pcmpgtd %xmm1,%xmm0
1468 movl %ebx,%ecx
1469 pxor %xmm2,%xmm1
1470 subl $96,%eax
1471 jnc .L057xts_dec_loop6
1472 leal 1(,%ecx,2),%ecx
1473 movl %ebp,%edx
1474 movl %ecx,%ebx
1475 .L056xts_dec_short:
1476 addl $96,%eax
1477 jz .L058xts_dec_done6x
1478 movdqa %xmm1,%xmm5
1479 cmpl $32,%eax
1480 jb .L059xts_dec_one
1481 pshufd $19,%xmm0,%xmm2
1482 pxor %xmm0,%xmm0
1483 paddq %xmm1,%xmm1
1484 pand %xmm3,%xmm2
1485 pcmpgtd %xmm1,%xmm0
1486 pxor %xmm2,%xmm1
1487 je .L060xts_dec_two
1488 pshufd $19,%xmm0,%xmm2
1489 pxor %xmm0,%xmm0
1490 movdqa %xmm1,%xmm6
1491 paddq %xmm1,%xmm1
1492 pand %xmm3,%xmm2
1493 pcmpgtd %xmm1,%xmm0
1494 pxor %xmm2,%xmm1
1495 cmpl $64,%eax
1496 jb .L061xts_dec_three
1497 pshufd $19,%xmm0,%xmm2
1498 pxor %xmm0,%xmm0
1499 movdqa %xmm1,%xmm7
1500 paddq %xmm1,%xmm1
1501 pand %xmm3,%xmm2
1502 pcmpgtd %xmm1,%xmm0
1503 pxor %xmm2,%xmm1
1504 movdqa %xmm5,(%esp)
1505 movdqa %xmm6,16(%esp)
1506 je .L062xts_dec_four
1507 movdqa %xmm7,32(%esp)
1508 pshufd $19,%xmm0,%xmm7
1509 movdqa %xmm1,48(%esp)
1510 paddq %xmm1,%xmm1
1511 pand %xmm3,%xmm7
1512 pxor %xmm1,%xmm7
1513 movdqu (%esi),%xmm2
1514 movdqu 16(%esi),%xmm3
1515 movdqu 32(%esi),%xmm4
1516 pxor (%esp),%xmm2
1517 movdqu 48(%esi),%xmm5
1518 pxor 16(%esp),%xmm3
1519 movdqu 64(%esi),%xmm6
1520 pxor 32(%esp),%xmm4
1521 leal 80(%esi),%esi
1522 pxor 48(%esp),%xmm5
1523 movdqa %xmm7,64(%esp)
1524 pxor %xmm7,%xmm6
1525 call __aesni_decrypt6
1526 movaps 64(%esp),%xmm1
1527 xorps (%esp),%xmm2
1528 xorps 16(%esp),%xmm3
1529 xorps 32(%esp),%xmm4
1530 movups %xmm2,(%edi)
1531 xorps 48(%esp),%xmm5
1532 movups %xmm3,16(%edi)
1533 xorps %xmm1,%xmm6
1534 movups %xmm4,32(%edi)
1535 movups %xmm5,48(%edi)
1536 movups %xmm6,64(%edi)
1537 leal 80(%edi),%edi
1538 jmp .L063xts_dec_done
1539 .align 16
1540 .L059xts_dec_one:
1541 movups (%esi),%xmm2
1542 leal 16(%esi),%esi
1543 xorps %xmm5,%xmm2
1544 movups (%edx),%xmm0
1545 movups 16(%edx),%xmm1
1546 leal 32(%edx),%edx
1547 xorps %xmm0,%xmm2
1548 .L064dec1_loop_12:
1549 .byte 102,15,56,222,209
1550 decl %ecx
1551 movups (%edx),%xmm1
1552 leal 16(%edx),%edx
1553 jnz .L064dec1_loop_12
1554 .byte 102,15,56,223,209
1555 xorps %xmm5,%xmm2
1556 movups %xmm2,(%edi)
1557 leal 16(%edi),%edi
1558 movdqa %xmm5,%xmm1
1559 jmp .L063xts_dec_done
1560 .align 16
1561 .L060xts_dec_two:
1562 movaps %xmm1,%xmm6
1563 movups (%esi),%xmm2
1564 movups 16(%esi),%xmm3
1565 leal 32(%esi),%esi
1566 xorps %xmm5,%xmm2
1567 xorps %xmm6,%xmm3
1568 call __aesni_decrypt3
1569 xorps %xmm5,%xmm2
1570 xorps %xmm6,%xmm3
1571 movups %xmm2,(%edi)
1572 movups %xmm3,16(%edi)
1573 leal 32(%edi),%edi
1574 movdqa %xmm6,%xmm1
1575 jmp .L063xts_dec_done
1576 .align 16
1577 .L061xts_dec_three:
1578 movaps %xmm1,%xmm7
1579 movups (%esi),%xmm2
1580 movups 16(%esi),%xmm3
1581 movups 32(%esi),%xmm4
1582 leal 48(%esi),%esi
1583 xorps %xmm5,%xmm2
1584 xorps %xmm6,%xmm3
1585 xorps %xmm7,%xmm4
1586 call __aesni_decrypt3
1587 xorps %xmm5,%xmm2
1588 xorps %xmm6,%xmm3
1589 xorps %xmm7,%xmm4
1590 movups %xmm2,(%edi)
1591 movups %xmm3,16(%edi)
1592 movups %xmm4,32(%edi)
1593 leal 48(%edi),%edi
1594 movdqa %xmm7,%xmm1
1595 jmp .L063xts_dec_done
1596 .align 16
1597 .L062xts_dec_four:
1598 movaps %xmm1,%xmm6
1599 movups (%esi),%xmm2
1600 movups 16(%esi),%xmm3
1601 movups 32(%esi),%xmm4
1602 xorps (%esp),%xmm2
1603 movups 48(%esi),%xmm5
1604 leal 64(%esi),%esi
1605 xorps 16(%esp),%xmm3
1606 xorps %xmm7,%xmm4
1607 xorps %xmm6,%xmm5
1608 call __aesni_decrypt4
1609 xorps (%esp),%xmm2
1610 xorps 16(%esp),%xmm3
1611 xorps %xmm7,%xmm4
1612 movups %xmm2,(%edi)
1613 xorps %xmm6,%xmm5
1614 movups %xmm3,16(%edi)
1615 movups %xmm4,32(%edi)
1616 movups %xmm5,48(%edi)
1617 leal 64(%edi),%edi
1618 movdqa %xmm6,%xmm1
1619 jmp .L063xts_dec_done
1620 .align 16
1621 .L058xts_dec_done6x:
1622 movl 112(%esp),%eax
1623 andl $15,%eax
1624 jz .L065xts_dec_ret
1625 movl %eax,112(%esp)
1626 jmp .L066xts_dec_only_one_more
1627 .align 16
1628 .L063xts_dec_done:
1629 movl 112(%esp),%eax
1630 pxor %xmm0,%xmm0
1631 andl $15,%eax
1632 jz .L065xts_dec_ret
1633 pcmpgtd %xmm1,%xmm0
1634 movl %eax,112(%esp)
1635 pshufd $19,%xmm0,%xmm2
1636 pxor %xmm0,%xmm0
1637 movdqa 96(%esp),%xmm3
1638 paddq %xmm1,%xmm1
1639 pand %xmm3,%xmm2
1640 pcmpgtd %xmm1,%xmm0
1641 pxor %xmm2,%xmm1
1642 .L066xts_dec_only_one_more:
1643 pshufd $19,%xmm0,%xmm5
1644 movdqa %xmm1,%xmm6
1645 paddq %xmm1,%xmm1
1646 pand %xmm3,%xmm5
1647 pxor %xmm1,%xmm5
1648 movl %ebp,%edx
1649 movl %ebx,%ecx
1650 movups (%esi),%xmm2
1651 xorps %xmm5,%xmm2
1652 movups (%edx),%xmm0
1653 movups 16(%edx),%xmm1
1654 leal 32(%edx),%edx
1655 xorps %xmm0,%xmm2
1656 .L067dec1_loop_13:
1657 .byte 102,15,56,222,209
1658 decl %ecx
1659 movups (%edx),%xmm1
1660 leal 16(%edx),%edx
1661 jnz .L067dec1_loop_13
1662 .byte 102,15,56,223,209
1663 xorps %xmm5,%xmm2
1664 movups %xmm2,(%edi)
1665 .L068xts_dec_steal:
1666 movzbl 16(%esi),%ecx
1667 movzbl (%edi),%edx
1668 leal 1(%esi),%esi
1669 movb %cl,(%edi)
1670 movb %dl,16(%edi)
1671 leal 1(%edi),%edi
1672 subl $1,%eax
1673 jnz .L068xts_dec_steal
1674 subl 112(%esp),%edi
1675 movl %ebp,%edx
1676 movl %ebx,%ecx
1677 movups (%edi),%xmm2
1678 xorps %xmm6,%xmm2
1679 movups (%edx),%xmm0
1680 movups 16(%edx),%xmm1
1681 leal 32(%edx),%edx
1682 xorps %xmm0,%xmm2
1683 .L069dec1_loop_14:
1684 .byte 102,15,56,222,209
1685 decl %ecx
1686 movups (%edx),%xmm1
1687 leal 16(%edx),%edx
1688 jnz .L069dec1_loop_14
1689 .byte 102,15,56,223,209
1690 xorps %xmm6,%xmm2
1691 movups %xmm2,(%edi)
1692 .L065xts_dec_ret:
1693 movl 116(%esp),%esp
1694 popl %edi
1695 popl %esi
1696 popl %ebx
1697 popl %ebp
1699 .globl _aesni_cbc_encrypt
1700 .def _aesni_cbc_encrypt; .scl 2; .type 32; .endef
1701 .align 16
1702 _aesni_cbc_encrypt:
1703 .L_aesni_cbc_encrypt_begin:
1704 pushl %ebp
1705 pushl %ebx
1706 pushl %esi
1707 pushl %edi
1708 movl 20(%esp),%esi
1709 movl %esp,%ebx
1710 movl 24(%esp),%edi
1711 subl $24,%ebx
1712 movl 28(%esp),%eax
1713 andl $-16,%ebx
1714 movl 32(%esp),%edx
1715 movl 36(%esp),%ebp
1716 testl %eax,%eax
1717 jz .L070cbc_abort
1718 cmpl $0,40(%esp)
1719 xchgl %esp,%ebx
1720 movups (%ebp),%xmm7
1721 movl 240(%edx),%ecx
1722 movl %edx,%ebp
1723 movl %ebx,16(%esp)
1724 movl %ecx,%ebx
1725 je .L071cbc_decrypt
1726 movaps %xmm7,%xmm2
1727 cmpl $16,%eax
1728 jb .L072cbc_enc_tail
1729 subl $16,%eax
1730 jmp .L073cbc_enc_loop
1731 .align 16
1732 .L073cbc_enc_loop:
1733 movups (%esi),%xmm7
1734 leal 16(%esi),%esi
1735 movups (%edx),%xmm0
1736 movups 16(%edx),%xmm1
1737 xorps %xmm0,%xmm7
1738 leal 32(%edx),%edx
1739 xorps %xmm7,%xmm2
1740 .L074enc1_loop_15:
1741 .byte 102,15,56,220,209
1742 decl %ecx
1743 movups (%edx),%xmm1
1744 leal 16(%edx),%edx
1745 jnz .L074enc1_loop_15
1746 .byte 102,15,56,221,209
1747 movl %ebx,%ecx
1748 movl %ebp,%edx
1749 movups %xmm2,(%edi)
1750 leal 16(%edi),%edi
1751 subl $16,%eax
1752 jnc .L073cbc_enc_loop
1753 addl $16,%eax
1754 jnz .L072cbc_enc_tail
1755 movaps %xmm2,%xmm7
1756 jmp .L075cbc_ret
1757 .L072cbc_enc_tail:
1758 movl %eax,%ecx
1759 .long 2767451785
1760 movl $16,%ecx
1761 subl %eax,%ecx
1762 xorl %eax,%eax
1763 .long 2868115081
1764 leal -16(%edi),%edi
1765 movl %ebx,%ecx
1766 movl %edi,%esi
1767 movl %ebp,%edx
1768 jmp .L073cbc_enc_loop
1769 .align 16
1770 .L071cbc_decrypt:
1771 cmpl $80,%eax
1772 jbe .L076cbc_dec_tail
1773 movaps %xmm7,(%esp)
1774 subl $80,%eax
1775 jmp .L077cbc_dec_loop6_enter
1776 .align 16
1777 .L078cbc_dec_loop6:
1778 movaps %xmm0,(%esp)
1779 movups %xmm7,(%edi)
1780 leal 16(%edi),%edi
1781 .L077cbc_dec_loop6_enter:
1782 movdqu (%esi),%xmm2
1783 movdqu 16(%esi),%xmm3
1784 movdqu 32(%esi),%xmm4
1785 movdqu 48(%esi),%xmm5
1786 movdqu 64(%esi),%xmm6
1787 movdqu 80(%esi),%xmm7
1788 call __aesni_decrypt6
1789 movups (%esi),%xmm1
1790 movups 16(%esi),%xmm0
1791 xorps (%esp),%xmm2
1792 xorps %xmm1,%xmm3
1793 movups 32(%esi),%xmm1
1794 xorps %xmm0,%xmm4
1795 movups 48(%esi),%xmm0
1796 xorps %xmm1,%xmm5
1797 movups 64(%esi),%xmm1
1798 xorps %xmm0,%xmm6
1799 movups 80(%esi),%xmm0
1800 xorps %xmm1,%xmm7
1801 movups %xmm2,(%edi)
1802 movups %xmm3,16(%edi)
1803 leal 96(%esi),%esi
1804 movups %xmm4,32(%edi)
1805 movl %ebx,%ecx
1806 movups %xmm5,48(%edi)
1807 movl %ebp,%edx
1808 movups %xmm6,64(%edi)
1809 leal 80(%edi),%edi
1810 subl $96,%eax
1811 ja .L078cbc_dec_loop6
1812 movaps %xmm7,%xmm2
1813 movaps %xmm0,%xmm7
1814 addl $80,%eax
1815 jle .L079cbc_dec_tail_collected
1816 movups %xmm2,(%edi)
1817 leal 16(%edi),%edi
1818 .L076cbc_dec_tail:
1819 movups (%esi),%xmm2
1820 movaps %xmm2,%xmm6
1821 cmpl $16,%eax
1822 jbe .L080cbc_dec_one
1823 movups 16(%esi),%xmm3
1824 movaps %xmm3,%xmm5
1825 cmpl $32,%eax
1826 jbe .L081cbc_dec_two
1827 movups 32(%esi),%xmm4
1828 cmpl $48,%eax
1829 jbe .L082cbc_dec_three
1830 movups 48(%esi),%xmm5
1831 cmpl $64,%eax
1832 jbe .L083cbc_dec_four
1833 movups 64(%esi),%xmm6
1834 movaps %xmm7,(%esp)
1835 movups (%esi),%xmm2
1836 xorps %xmm7,%xmm7
1837 call __aesni_decrypt6
1838 movups (%esi),%xmm1
1839 movups 16(%esi),%xmm0
1840 xorps (%esp),%xmm2
1841 xorps %xmm1,%xmm3
1842 movups 32(%esi),%xmm1
1843 xorps %xmm0,%xmm4
1844 movups 48(%esi),%xmm0
1845 xorps %xmm1,%xmm5
1846 movups 64(%esi),%xmm7
1847 xorps %xmm0,%xmm6
1848 movups %xmm2,(%edi)
1849 movups %xmm3,16(%edi)
1850 movups %xmm4,32(%edi)
1851 movups %xmm5,48(%edi)
1852 leal 64(%edi),%edi
1853 movaps %xmm6,%xmm2
1854 subl $80,%eax
1855 jmp .L079cbc_dec_tail_collected
1856 .align 16
1857 .L080cbc_dec_one:
1858 movups (%edx),%xmm0
1859 movups 16(%edx),%xmm1
1860 leal 32(%edx),%edx
1861 xorps %xmm0,%xmm2
1862 .L084dec1_loop_16:
1863 .byte 102,15,56,222,209
1864 decl %ecx
1865 movups (%edx),%xmm1
1866 leal 16(%edx),%edx
1867 jnz .L084dec1_loop_16
1868 .byte 102,15,56,223,209
1869 xorps %xmm7,%xmm2
1870 movaps %xmm6,%xmm7
1871 subl $16,%eax
1872 jmp .L079cbc_dec_tail_collected
1873 .align 16
1874 .L081cbc_dec_two:
1875 xorps %xmm4,%xmm4
1876 call __aesni_decrypt3
1877 xorps %xmm7,%xmm2
1878 xorps %xmm6,%xmm3
1879 movups %xmm2,(%edi)
1880 movaps %xmm3,%xmm2
1881 leal 16(%edi),%edi
1882 movaps %xmm5,%xmm7
1883 subl $32,%eax
1884 jmp .L079cbc_dec_tail_collected
1885 .align 16
1886 .L082cbc_dec_three:
1887 call __aesni_decrypt3
1888 xorps %xmm7,%xmm2
1889 xorps %xmm6,%xmm3
1890 xorps %xmm5,%xmm4
1891 movups %xmm2,(%edi)
1892 movaps %xmm4,%xmm2
1893 movups %xmm3,16(%edi)
1894 leal 32(%edi),%edi
1895 movups 32(%esi),%xmm7
1896 subl $48,%eax
1897 jmp .L079cbc_dec_tail_collected
1898 .align 16
1899 .L083cbc_dec_four:
1900 call __aesni_decrypt4
1901 movups 16(%esi),%xmm1
1902 movups 32(%esi),%xmm0
1903 xorps %xmm7,%xmm2
1904 movups 48(%esi),%xmm7
1905 xorps %xmm6,%xmm3
1906 movups %xmm2,(%edi)
1907 xorps %xmm1,%xmm4
1908 movups %xmm3,16(%edi)
1909 xorps %xmm0,%xmm5
1910 movups %xmm4,32(%edi)
1911 leal 48(%edi),%edi
1912 movaps %xmm5,%xmm2
1913 subl $64,%eax
1914 .L079cbc_dec_tail_collected:
1915 andl $15,%eax
1916 jnz .L085cbc_dec_tail_partial
1917 movups %xmm2,(%edi)
1918 jmp .L075cbc_ret
1919 .align 16
1920 .L085cbc_dec_tail_partial:
1921 movaps %xmm2,(%esp)
1922 movl $16,%ecx
1923 movl %esp,%esi
1924 subl %eax,%ecx
1925 .long 2767451785
1926 .L075cbc_ret:
1927 movl 16(%esp),%esp
1928 movl 36(%esp),%ebp
1929 movups %xmm7,(%ebp)
1930 .L070cbc_abort:
1931 popl %edi
1932 popl %esi
1933 popl %ebx
1934 popl %ebp
1936 .def __aesni_set_encrypt_key; .scl 3; .type 32; .endef
1937 .align 16
1938 __aesni_set_encrypt_key:
1939 testl %eax,%eax
1940 jz .L086bad_pointer
1941 testl %edx,%edx
1942 jz .L086bad_pointer
1943 movups (%eax),%xmm0
1944 xorps %xmm4,%xmm4
1945 leal 16(%edx),%edx
1946 cmpl $256,%ecx
1947 je .L08714rounds
1948 cmpl $192,%ecx
1949 je .L08812rounds
1950 cmpl $128,%ecx
1951 jne .L089bad_keybits
1952 .align 16
1953 .L09010rounds:
1954 movl $9,%ecx
1955 movups %xmm0,-16(%edx)
1956 .byte 102,15,58,223,200,1
1957 call .L091key_128_cold
1958 .byte 102,15,58,223,200,2
1959 call .L092key_128
1960 .byte 102,15,58,223,200,4
1961 call .L092key_128
1962 .byte 102,15,58,223,200,8
1963 call .L092key_128
1964 .byte 102,15,58,223,200,16
1965 call .L092key_128
1966 .byte 102,15,58,223,200,32
1967 call .L092key_128
1968 .byte 102,15,58,223,200,64
1969 call .L092key_128
1970 .byte 102,15,58,223,200,128
1971 call .L092key_128
1972 .byte 102,15,58,223,200,27
1973 call .L092key_128
1974 .byte 102,15,58,223,200,54
1975 call .L092key_128
1976 movups %xmm0,(%edx)
1977 movl %ecx,80(%edx)
1978 xorl %eax,%eax
1980 .align 16
1981 .L092key_128:
1982 movups %xmm0,(%edx)
1983 leal 16(%edx),%edx
1984 .L091key_128_cold:
1985 shufps $16,%xmm0,%xmm4
1986 xorps %xmm4,%xmm0
1987 shufps $140,%xmm0,%xmm4
1988 xorps %xmm4,%xmm0
1989 shufps $255,%xmm1,%xmm1
1990 xorps %xmm1,%xmm0
1992 .align 16
1993 .L08812rounds:
1994 movq 16(%eax),%xmm2
1995 movl $11,%ecx
1996 movups %xmm0,-16(%edx)
1997 .byte 102,15,58,223,202,1
1998 call .L093key_192a_cold
1999 .byte 102,15,58,223,202,2
2000 call .L094key_192b
2001 .byte 102,15,58,223,202,4
2002 call .L095key_192a
2003 .byte 102,15,58,223,202,8
2004 call .L094key_192b
2005 .byte 102,15,58,223,202,16
2006 call .L095key_192a
2007 .byte 102,15,58,223,202,32
2008 call .L094key_192b
2009 .byte 102,15,58,223,202,64
2010 call .L095key_192a
2011 .byte 102,15,58,223,202,128
2012 call .L094key_192b
2013 movups %xmm0,(%edx)
2014 movl %ecx,48(%edx)
2015 xorl %eax,%eax
2017 .align 16
2018 .L095key_192a:
2019 movups %xmm0,(%edx)
2020 leal 16(%edx),%edx
2021 .align 16
2022 .L093key_192a_cold:
2023 movaps %xmm2,%xmm5
2024 .L096key_192b_warm:
2025 shufps $16,%xmm0,%xmm4
2026 movdqa %xmm2,%xmm3
2027 xorps %xmm4,%xmm0
2028 shufps $140,%xmm0,%xmm4
2029 pslldq $4,%xmm3
2030 xorps %xmm4,%xmm0
2031 pshufd $85,%xmm1,%xmm1
2032 pxor %xmm3,%xmm2
2033 pxor %xmm1,%xmm0
2034 pshufd $255,%xmm0,%xmm3
2035 pxor %xmm3,%xmm2
2037 .align 16
2038 .L094key_192b:
2039 movaps %xmm0,%xmm3
2040 shufps $68,%xmm0,%xmm5
2041 movups %xmm5,(%edx)
2042 shufps $78,%xmm2,%xmm3
2043 movups %xmm3,16(%edx)
2044 leal 32(%edx),%edx
2045 jmp .L096key_192b_warm
2046 .align 16
2047 .L08714rounds:
2048 movups 16(%eax),%xmm2
2049 movl $13,%ecx
2050 leal 16(%edx),%edx
2051 movups %xmm0,-32(%edx)
2052 movups %xmm2,-16(%edx)
2053 .byte 102,15,58,223,202,1
2054 call .L097key_256a_cold
2055 .byte 102,15,58,223,200,1
2056 call .L098key_256b
2057 .byte 102,15,58,223,202,2
2058 call .L099key_256a
2059 .byte 102,15,58,223,200,2
2060 call .L098key_256b
2061 .byte 102,15,58,223,202,4
2062 call .L099key_256a
2063 .byte 102,15,58,223,200,4
2064 call .L098key_256b
2065 .byte 102,15,58,223,202,8
2066 call .L099key_256a
2067 .byte 102,15,58,223,200,8
2068 call .L098key_256b
2069 .byte 102,15,58,223,202,16
2070 call .L099key_256a
2071 .byte 102,15,58,223,200,16
2072 call .L098key_256b
2073 .byte 102,15,58,223,202,32
2074 call .L099key_256a
2075 .byte 102,15,58,223,200,32
2076 call .L098key_256b
2077 .byte 102,15,58,223,202,64
2078 call .L099key_256a
2079 movups %xmm0,(%edx)
2080 movl %ecx,16(%edx)
2081 xorl %eax,%eax
2083 .align 16
2084 .L099key_256a:
2085 movups %xmm2,(%edx)
2086 leal 16(%edx),%edx
2087 .L097key_256a_cold:
2088 shufps $16,%xmm0,%xmm4
2089 xorps %xmm4,%xmm0
2090 shufps $140,%xmm0,%xmm4
2091 xorps %xmm4,%xmm0
2092 shufps $255,%xmm1,%xmm1
2093 xorps %xmm1,%xmm0
2095 .align 16
2096 .L098key_256b:
2097 movups %xmm0,(%edx)
2098 leal 16(%edx),%edx
2099 shufps $16,%xmm2,%xmm4
2100 xorps %xmm4,%xmm2
2101 shufps $140,%xmm2,%xmm4
2102 xorps %xmm4,%xmm2
2103 shufps $170,%xmm1,%xmm1
2104 xorps %xmm1,%xmm2
2106 .align 4
2107 .L086bad_pointer:
2108 movl $-1,%eax
2110 .align 4
2111 .L089bad_keybits:
2112 movl $-2,%eax
2114 .globl _aesni_set_encrypt_key
2115 .def _aesni_set_encrypt_key; .scl 2; .type 32; .endef
2116 .align 16
2117 _aesni_set_encrypt_key:
2118 .L_aesni_set_encrypt_key_begin:
2119 movl 4(%esp),%eax
2120 movl 8(%esp),%ecx
2121 movl 12(%esp),%edx
2122 call __aesni_set_encrypt_key
2124 .globl _aesni_set_decrypt_key
2125 .def _aesni_set_decrypt_key; .scl 2; .type 32; .endef
2126 .align 16
2127 _aesni_set_decrypt_key:
2128 .L_aesni_set_decrypt_key_begin:
2129 movl 4(%esp),%eax
2130 movl 8(%esp),%ecx
2131 movl 12(%esp),%edx
2132 call __aesni_set_encrypt_key
2133 movl 12(%esp),%edx
2134 shll $4,%ecx
2135 testl %eax,%eax
2136 jnz .L100dec_key_ret
2137 leal 16(%edx,%ecx,1),%eax
2138 movups (%edx),%xmm0
2139 movups (%eax),%xmm1
2140 movups %xmm0,(%eax)
2141 movups %xmm1,(%edx)
2142 leal 16(%edx),%edx
2143 leal -16(%eax),%eax
2144 .L101dec_key_inverse:
2145 movups (%edx),%xmm0
2146 movups (%eax),%xmm1
2147 .byte 102,15,56,219,192
2148 .byte 102,15,56,219,201
2149 leal 16(%edx),%edx
2150 leal -16(%eax),%eax
2151 movups %xmm0,16(%eax)
2152 movups %xmm1,-16(%edx)
2153 cmpl %edx,%eax
2154 ja .L101dec_key_inverse
2155 movups (%edx),%xmm0
2156 .byte 102,15,56,219,192
2157 movups %xmm0,(%edx)
2158 xorl %eax,%eax
2159 .L100dec_key_ret:
2161 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2162 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2163 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2164 .byte 115,108,46,111,114,103,62,0