updated licenses
[gnutls.git] / lib / accelerated / x86 / coff / appro-aes-gcm-x86-64-coff.s
blobfa449d6953336fdc7579188f0cf87c579c38a192
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # * Redistributions of source code must retain copyright notices,
9 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # * Neither the name of the Andy Polyakov nor the names of its
17 # copyright holder and contributors may be used to endorse or
18 # promote products derived from this software without specific
19 # prior written permission.
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # *** This file is auto-generated ***
40 .text
42 .globl gcm_gmult_4bit
43 .def gcm_gmult_4bit; .scl 2; .type 32; .endef
44 .p2align 4
45 gcm_gmult_4bit:
46 movq %rdi,8(%rsp)
47 movq %rsi,16(%rsp)
48 movq %rsp,%rax
49 .LSEH_begin_gcm_gmult_4bit:
50 movq %rcx,%rdi
51 movq %rdx,%rsi
53 pushq %rbx
54 pushq %rbp
55 pushq %r12
56 .Lgmult_prologue:
58 movzbq 15(%rdi),%r8
59 leaq .Lrem_4bit(%rip),%r11
60 xorq %rax,%rax
61 xorq %rbx,%rbx
62 movb %r8b,%al
63 movb %r8b,%bl
64 shlb $4,%al
65 movq $14,%rcx
66 movq 8(%rsi,%rax,1),%r8
67 movq (%rsi,%rax,1),%r9
68 andb $240,%bl
69 movq %r8,%rdx
70 jmp .Loop1
72 .p2align 4
73 .Loop1:
74 shrq $4,%r8
75 andq $15,%rdx
76 movq %r9,%r10
77 movb (%rdi,%rcx,1),%al
78 shrq $4,%r9
79 xorq 8(%rsi,%rbx,1),%r8
80 shlq $60,%r10
81 xorq (%rsi,%rbx,1),%r9
82 movb %al,%bl
83 xorq (%r11,%rdx,8),%r9
84 movq %r8,%rdx
85 shlb $4,%al
86 xorq %r10,%r8
87 decq %rcx
88 js .Lbreak1
90 shrq $4,%r8
91 andq $15,%rdx
92 movq %r9,%r10
93 shrq $4,%r9
94 xorq 8(%rsi,%rax,1),%r8
95 shlq $60,%r10
96 xorq (%rsi,%rax,1),%r9
97 andb $240,%bl
98 xorq (%r11,%rdx,8),%r9
99 movq %r8,%rdx
100 xorq %r10,%r8
101 jmp .Loop1
103 .p2align 4
104 .Lbreak1:
105 shrq $4,%r8
106 andq $15,%rdx
107 movq %r9,%r10
108 shrq $4,%r9
109 xorq 8(%rsi,%rax,1),%r8
110 shlq $60,%r10
111 xorq (%rsi,%rax,1),%r9
112 andb $240,%bl
113 xorq (%r11,%rdx,8),%r9
114 movq %r8,%rdx
115 xorq %r10,%r8
117 shrq $4,%r8
118 andq $15,%rdx
119 movq %r9,%r10
120 shrq $4,%r9
121 xorq 8(%rsi,%rbx,1),%r8
122 shlq $60,%r10
123 xorq (%rsi,%rbx,1),%r9
124 xorq %r10,%r8
125 xorq (%r11,%rdx,8),%r9
127 bswapq %r8
128 bswapq %r9
129 movq %r8,8(%rdi)
130 movq %r9,(%rdi)
132 movq 16(%rsp),%rbx
133 leaq 24(%rsp),%rsp
134 .Lgmult_epilogue:
135 movq 8(%rsp),%rdi
136 movq 16(%rsp),%rsi
137 .byte 0xf3,0xc3
138 .LSEH_end_gcm_gmult_4bit:
139 .globl gcm_ghash_4bit
140 .def gcm_ghash_4bit; .scl 2; .type 32; .endef
141 .p2align 4
142 gcm_ghash_4bit:
143 movq %rdi,8(%rsp)
144 movq %rsi,16(%rsp)
145 movq %rsp,%rax
146 .LSEH_begin_gcm_ghash_4bit:
147 movq %rcx,%rdi
148 movq %rdx,%rsi
149 movq %r8,%rdx
150 movq %r9,%rcx
152 pushq %rbx
153 pushq %rbp
154 pushq %r12
155 pushq %r13
156 pushq %r14
157 pushq %r15
158 subq $280,%rsp
159 .Lghash_prologue:
160 movq %rdx,%r14
161 movq %rcx,%r15
162 subq $-128,%rsi
163 leaq 16+128(%rsp),%rbp
164 xorl %edx,%edx
165 movq 0+0-128(%rsi),%r8
166 movq 0+8-128(%rsi),%rax
167 movb %al,%dl
168 shrq $4,%rax
169 movq %r8,%r10
170 shrq $4,%r8
171 movq 16+0-128(%rsi),%r9
172 shlb $4,%dl
173 movq 16+8-128(%rsi),%rbx
174 shlq $60,%r10
175 movb %dl,0(%rsp)
176 orq %r10,%rax
177 movb %bl,%dl
178 shrq $4,%rbx
179 movq %r9,%r10
180 shrq $4,%r9
181 movq %r8,0(%rbp)
182 movq 32+0-128(%rsi),%r8
183 shlb $4,%dl
184 movq %rax,0-128(%rbp)
185 movq 32+8-128(%rsi),%rax
186 shlq $60,%r10
187 movb %dl,1(%rsp)
188 orq %r10,%rbx
189 movb %al,%dl
190 shrq $4,%rax
191 movq %r8,%r10
192 shrq $4,%r8
193 movq %r9,8(%rbp)
194 movq 48+0-128(%rsi),%r9
195 shlb $4,%dl
196 movq %rbx,8-128(%rbp)
197 movq 48+8-128(%rsi),%rbx
198 shlq $60,%r10
199 movb %dl,2(%rsp)
200 orq %r10,%rax
201 movb %bl,%dl
202 shrq $4,%rbx
203 movq %r9,%r10
204 shrq $4,%r9
205 movq %r8,16(%rbp)
206 movq 64+0-128(%rsi),%r8
207 shlb $4,%dl
208 movq %rax,16-128(%rbp)
209 movq 64+8-128(%rsi),%rax
210 shlq $60,%r10
211 movb %dl,3(%rsp)
212 orq %r10,%rbx
213 movb %al,%dl
214 shrq $4,%rax
215 movq %r8,%r10
216 shrq $4,%r8
217 movq %r9,24(%rbp)
218 movq 80+0-128(%rsi),%r9
219 shlb $4,%dl
220 movq %rbx,24-128(%rbp)
221 movq 80+8-128(%rsi),%rbx
222 shlq $60,%r10
223 movb %dl,4(%rsp)
224 orq %r10,%rax
225 movb %bl,%dl
226 shrq $4,%rbx
227 movq %r9,%r10
228 shrq $4,%r9
229 movq %r8,32(%rbp)
230 movq 96+0-128(%rsi),%r8
231 shlb $4,%dl
232 movq %rax,32-128(%rbp)
233 movq 96+8-128(%rsi),%rax
234 shlq $60,%r10
235 movb %dl,5(%rsp)
236 orq %r10,%rbx
237 movb %al,%dl
238 shrq $4,%rax
239 movq %r8,%r10
240 shrq $4,%r8
241 movq %r9,40(%rbp)
242 movq 112+0-128(%rsi),%r9
243 shlb $4,%dl
244 movq %rbx,40-128(%rbp)
245 movq 112+8-128(%rsi),%rbx
246 shlq $60,%r10
247 movb %dl,6(%rsp)
248 orq %r10,%rax
249 movb %bl,%dl
250 shrq $4,%rbx
251 movq %r9,%r10
252 shrq $4,%r9
253 movq %r8,48(%rbp)
254 movq 128+0-128(%rsi),%r8
255 shlb $4,%dl
256 movq %rax,48-128(%rbp)
257 movq 128+8-128(%rsi),%rax
258 shlq $60,%r10
259 movb %dl,7(%rsp)
260 orq %r10,%rbx
261 movb %al,%dl
262 shrq $4,%rax
263 movq %r8,%r10
264 shrq $4,%r8
265 movq %r9,56(%rbp)
266 movq 144+0-128(%rsi),%r9
267 shlb $4,%dl
268 movq %rbx,56-128(%rbp)
269 movq 144+8-128(%rsi),%rbx
270 shlq $60,%r10
271 movb %dl,8(%rsp)
272 orq %r10,%rax
273 movb %bl,%dl
274 shrq $4,%rbx
275 movq %r9,%r10
276 shrq $4,%r9
277 movq %r8,64(%rbp)
278 movq 160+0-128(%rsi),%r8
279 shlb $4,%dl
280 movq %rax,64-128(%rbp)
281 movq 160+8-128(%rsi),%rax
282 shlq $60,%r10
283 movb %dl,9(%rsp)
284 orq %r10,%rbx
285 movb %al,%dl
286 shrq $4,%rax
287 movq %r8,%r10
288 shrq $4,%r8
289 movq %r9,72(%rbp)
290 movq 176+0-128(%rsi),%r9
291 shlb $4,%dl
292 movq %rbx,72-128(%rbp)
293 movq 176+8-128(%rsi),%rbx
294 shlq $60,%r10
295 movb %dl,10(%rsp)
296 orq %r10,%rax
297 movb %bl,%dl
298 shrq $4,%rbx
299 movq %r9,%r10
300 shrq $4,%r9
301 movq %r8,80(%rbp)
302 movq 192+0-128(%rsi),%r8
303 shlb $4,%dl
304 movq %rax,80-128(%rbp)
305 movq 192+8-128(%rsi),%rax
306 shlq $60,%r10
307 movb %dl,11(%rsp)
308 orq %r10,%rbx
309 movb %al,%dl
310 shrq $4,%rax
311 movq %r8,%r10
312 shrq $4,%r8
313 movq %r9,88(%rbp)
314 movq 208+0-128(%rsi),%r9
315 shlb $4,%dl
316 movq %rbx,88-128(%rbp)
317 movq 208+8-128(%rsi),%rbx
318 shlq $60,%r10
319 movb %dl,12(%rsp)
320 orq %r10,%rax
321 movb %bl,%dl
322 shrq $4,%rbx
323 movq %r9,%r10
324 shrq $4,%r9
325 movq %r8,96(%rbp)
326 movq 224+0-128(%rsi),%r8
327 shlb $4,%dl
328 movq %rax,96-128(%rbp)
329 movq 224+8-128(%rsi),%rax
330 shlq $60,%r10
331 movb %dl,13(%rsp)
332 orq %r10,%rbx
333 movb %al,%dl
334 shrq $4,%rax
335 movq %r8,%r10
336 shrq $4,%r8
337 movq %r9,104(%rbp)
338 movq 240+0-128(%rsi),%r9
339 shlb $4,%dl
340 movq %rbx,104-128(%rbp)
341 movq 240+8-128(%rsi),%rbx
342 shlq $60,%r10
343 movb %dl,14(%rsp)
344 orq %r10,%rax
345 movb %bl,%dl
346 shrq $4,%rbx
347 movq %r9,%r10
348 shrq $4,%r9
349 movq %r8,112(%rbp)
350 shlb $4,%dl
351 movq %rax,112-128(%rbp)
352 shlq $60,%r10
353 movb %dl,15(%rsp)
354 orq %r10,%rbx
355 movq %r9,120(%rbp)
356 movq %rbx,120-128(%rbp)
357 addq $-128,%rsi
358 movq 8(%rdi),%r8
359 movq 0(%rdi),%r9
360 addq %r14,%r15
361 leaq .Lrem_8bit(%rip),%r11
362 jmp .Louter_loop
363 .p2align 4
364 .Louter_loop:
365 xorq (%r14),%r9
366 movq 8(%r14),%rdx
367 leaq 16(%r14),%r14
368 xorq %r8,%rdx
369 movq %r9,(%rdi)
370 movq %rdx,8(%rdi)
371 shrq $32,%rdx
372 xorq %rax,%rax
373 roll $8,%edx
374 movb %dl,%al
375 movzbl %dl,%ebx
376 shlb $4,%al
377 shrl $4,%ebx
378 roll $8,%edx
379 movq 8(%rsi,%rax,1),%r8
380 movq (%rsi,%rax,1),%r9
381 movb %dl,%al
382 movzbl %dl,%ecx
383 shlb $4,%al
384 movzbq (%rsp,%rbx,1),%r12
385 shrl $4,%ecx
386 xorq %r8,%r12
387 movq %r9,%r10
388 shrq $8,%r8
389 movzbq %r12b,%r12
390 shrq $8,%r9
391 xorq -128(%rbp,%rbx,8),%r8
392 shlq $56,%r10
393 xorq (%rbp,%rbx,8),%r9
394 roll $8,%edx
395 xorq 8(%rsi,%rax,1),%r8
396 xorq (%rsi,%rax,1),%r9
397 movb %dl,%al
398 xorq %r10,%r8
399 movzwq (%r11,%r12,2),%r12
400 movzbl %dl,%ebx
401 shlb $4,%al
402 movzbq (%rsp,%rcx,1),%r13
403 shrl $4,%ebx
404 shlq $48,%r12
405 xorq %r8,%r13
406 movq %r9,%r10
407 xorq %r12,%r9
408 shrq $8,%r8
409 movzbq %r13b,%r13
410 shrq $8,%r9
411 xorq -128(%rbp,%rcx,8),%r8
412 shlq $56,%r10
413 xorq (%rbp,%rcx,8),%r9
414 roll $8,%edx
415 xorq 8(%rsi,%rax,1),%r8
416 xorq (%rsi,%rax,1),%r9
417 movb %dl,%al
418 xorq %r10,%r8
419 movzwq (%r11,%r13,2),%r13
420 movzbl %dl,%ecx
421 shlb $4,%al
422 movzbq (%rsp,%rbx,1),%r12
423 shrl $4,%ecx
424 shlq $48,%r13
425 xorq %r8,%r12
426 movq %r9,%r10
427 xorq %r13,%r9
428 shrq $8,%r8
429 movzbq %r12b,%r12
430 movl 8(%rdi),%edx
431 shrq $8,%r9
432 xorq -128(%rbp,%rbx,8),%r8
433 shlq $56,%r10
434 xorq (%rbp,%rbx,8),%r9
435 roll $8,%edx
436 xorq 8(%rsi,%rax,1),%r8
437 xorq (%rsi,%rax,1),%r9
438 movb %dl,%al
439 xorq %r10,%r8
440 movzwq (%r11,%r12,2),%r12
441 movzbl %dl,%ebx
442 shlb $4,%al
443 movzbq (%rsp,%rcx,1),%r13
444 shrl $4,%ebx
445 shlq $48,%r12
446 xorq %r8,%r13
447 movq %r9,%r10
448 xorq %r12,%r9
449 shrq $8,%r8
450 movzbq %r13b,%r13
451 shrq $8,%r9
452 xorq -128(%rbp,%rcx,8),%r8
453 shlq $56,%r10
454 xorq (%rbp,%rcx,8),%r9
455 roll $8,%edx
456 xorq 8(%rsi,%rax,1),%r8
457 xorq (%rsi,%rax,1),%r9
458 movb %dl,%al
459 xorq %r10,%r8
460 movzwq (%r11,%r13,2),%r13
461 movzbl %dl,%ecx
462 shlb $4,%al
463 movzbq (%rsp,%rbx,1),%r12
464 shrl $4,%ecx
465 shlq $48,%r13
466 xorq %r8,%r12
467 movq %r9,%r10
468 xorq %r13,%r9
469 shrq $8,%r8
470 movzbq %r12b,%r12
471 shrq $8,%r9
472 xorq -128(%rbp,%rbx,8),%r8
473 shlq $56,%r10
474 xorq (%rbp,%rbx,8),%r9
475 roll $8,%edx
476 xorq 8(%rsi,%rax,1),%r8
477 xorq (%rsi,%rax,1),%r9
478 movb %dl,%al
479 xorq %r10,%r8
480 movzwq (%r11,%r12,2),%r12
481 movzbl %dl,%ebx
482 shlb $4,%al
483 movzbq (%rsp,%rcx,1),%r13
484 shrl $4,%ebx
485 shlq $48,%r12
486 xorq %r8,%r13
487 movq %r9,%r10
488 xorq %r12,%r9
489 shrq $8,%r8
490 movzbq %r13b,%r13
491 shrq $8,%r9
492 xorq -128(%rbp,%rcx,8),%r8
493 shlq $56,%r10
494 xorq (%rbp,%rcx,8),%r9
495 roll $8,%edx
496 xorq 8(%rsi,%rax,1),%r8
497 xorq (%rsi,%rax,1),%r9
498 movb %dl,%al
499 xorq %r10,%r8
500 movzwq (%r11,%r13,2),%r13
501 movzbl %dl,%ecx
502 shlb $4,%al
503 movzbq (%rsp,%rbx,1),%r12
504 shrl $4,%ecx
505 shlq $48,%r13
506 xorq %r8,%r12
507 movq %r9,%r10
508 xorq %r13,%r9
509 shrq $8,%r8
510 movzbq %r12b,%r12
511 movl 4(%rdi),%edx
512 shrq $8,%r9
513 xorq -128(%rbp,%rbx,8),%r8
514 shlq $56,%r10
515 xorq (%rbp,%rbx,8),%r9
516 roll $8,%edx
517 xorq 8(%rsi,%rax,1),%r8
518 xorq (%rsi,%rax,1),%r9
519 movb %dl,%al
520 xorq %r10,%r8
521 movzwq (%r11,%r12,2),%r12
522 movzbl %dl,%ebx
523 shlb $4,%al
524 movzbq (%rsp,%rcx,1),%r13
525 shrl $4,%ebx
526 shlq $48,%r12
527 xorq %r8,%r13
528 movq %r9,%r10
529 xorq %r12,%r9
530 shrq $8,%r8
531 movzbq %r13b,%r13
532 shrq $8,%r9
533 xorq -128(%rbp,%rcx,8),%r8
534 shlq $56,%r10
535 xorq (%rbp,%rcx,8),%r9
536 roll $8,%edx
537 xorq 8(%rsi,%rax,1),%r8
538 xorq (%rsi,%rax,1),%r9
539 movb %dl,%al
540 xorq %r10,%r8
541 movzwq (%r11,%r13,2),%r13
542 movzbl %dl,%ecx
543 shlb $4,%al
544 movzbq (%rsp,%rbx,1),%r12
545 shrl $4,%ecx
546 shlq $48,%r13
547 xorq %r8,%r12
548 movq %r9,%r10
549 xorq %r13,%r9
550 shrq $8,%r8
551 movzbq %r12b,%r12
552 shrq $8,%r9
553 xorq -128(%rbp,%rbx,8),%r8
554 shlq $56,%r10
555 xorq (%rbp,%rbx,8),%r9
556 roll $8,%edx
557 xorq 8(%rsi,%rax,1),%r8
558 xorq (%rsi,%rax,1),%r9
559 movb %dl,%al
560 xorq %r10,%r8
561 movzwq (%r11,%r12,2),%r12
562 movzbl %dl,%ebx
563 shlb $4,%al
564 movzbq (%rsp,%rcx,1),%r13
565 shrl $4,%ebx
566 shlq $48,%r12
567 xorq %r8,%r13
568 movq %r9,%r10
569 xorq %r12,%r9
570 shrq $8,%r8
571 movzbq %r13b,%r13
572 shrq $8,%r9
573 xorq -128(%rbp,%rcx,8),%r8
574 shlq $56,%r10
575 xorq (%rbp,%rcx,8),%r9
576 roll $8,%edx
577 xorq 8(%rsi,%rax,1),%r8
578 xorq (%rsi,%rax,1),%r9
579 movb %dl,%al
580 xorq %r10,%r8
581 movzwq (%r11,%r13,2),%r13
582 movzbl %dl,%ecx
583 shlb $4,%al
584 movzbq (%rsp,%rbx,1),%r12
585 shrl $4,%ecx
586 shlq $48,%r13
587 xorq %r8,%r12
588 movq %r9,%r10
589 xorq %r13,%r9
590 shrq $8,%r8
591 movzbq %r12b,%r12
592 movl 0(%rdi),%edx
593 shrq $8,%r9
594 xorq -128(%rbp,%rbx,8),%r8
595 shlq $56,%r10
596 xorq (%rbp,%rbx,8),%r9
597 roll $8,%edx
598 xorq 8(%rsi,%rax,1),%r8
599 xorq (%rsi,%rax,1),%r9
600 movb %dl,%al
601 xorq %r10,%r8
602 movzwq (%r11,%r12,2),%r12
603 movzbl %dl,%ebx
604 shlb $4,%al
605 movzbq (%rsp,%rcx,1),%r13
606 shrl $4,%ebx
607 shlq $48,%r12
608 xorq %r8,%r13
609 movq %r9,%r10
610 xorq %r12,%r9
611 shrq $8,%r8
612 movzbq %r13b,%r13
613 shrq $8,%r9
614 xorq -128(%rbp,%rcx,8),%r8
615 shlq $56,%r10
616 xorq (%rbp,%rcx,8),%r9
617 roll $8,%edx
618 xorq 8(%rsi,%rax,1),%r8
619 xorq (%rsi,%rax,1),%r9
620 movb %dl,%al
621 xorq %r10,%r8
622 movzwq (%r11,%r13,2),%r13
623 movzbl %dl,%ecx
624 shlb $4,%al
625 movzbq (%rsp,%rbx,1),%r12
626 shrl $4,%ecx
627 shlq $48,%r13
628 xorq %r8,%r12
629 movq %r9,%r10
630 xorq %r13,%r9
631 shrq $8,%r8
632 movzbq %r12b,%r12
633 shrq $8,%r9
634 xorq -128(%rbp,%rbx,8),%r8
635 shlq $56,%r10
636 xorq (%rbp,%rbx,8),%r9
637 roll $8,%edx
638 xorq 8(%rsi,%rax,1),%r8
639 xorq (%rsi,%rax,1),%r9
640 movb %dl,%al
641 xorq %r10,%r8
642 movzwq (%r11,%r12,2),%r12
643 movzbl %dl,%ebx
644 shlb $4,%al
645 movzbq (%rsp,%rcx,1),%r13
646 shrl $4,%ebx
647 shlq $48,%r12
648 xorq %r8,%r13
649 movq %r9,%r10
650 xorq %r12,%r9
651 shrq $8,%r8
652 movzbq %r13b,%r13
653 shrq $8,%r9
654 xorq -128(%rbp,%rcx,8),%r8
655 shlq $56,%r10
656 xorq (%rbp,%rcx,8),%r9
657 roll $8,%edx
658 xorq 8(%rsi,%rax,1),%r8
659 xorq (%rsi,%rax,1),%r9
660 movb %dl,%al
661 xorq %r10,%r8
662 movzwq (%r11,%r13,2),%r13
663 movzbl %dl,%ecx
664 shlb $4,%al
665 movzbq (%rsp,%rbx,1),%r12
666 andl $240,%ecx
667 shlq $48,%r13
668 xorq %r8,%r12
669 movq %r9,%r10
670 xorq %r13,%r9
671 shrq $8,%r8
672 movzbq %r12b,%r12
673 movl -4(%rdi),%edx
674 shrq $8,%r9
675 xorq -128(%rbp,%rbx,8),%r8
676 shlq $56,%r10
677 xorq (%rbp,%rbx,8),%r9
678 movzwq (%r11,%r12,2),%r12
679 xorq 8(%rsi,%rax,1),%r8
680 xorq (%rsi,%rax,1),%r9
681 shlq $48,%r12
682 xorq %r10,%r8
683 xorq %r12,%r9
684 movzbq %r8b,%r13
685 shrq $4,%r8
686 movq %r9,%r10
687 shlb $4,%r13b
688 shrq $4,%r9
689 xorq 8(%rsi,%rcx,1),%r8
690 movzwq (%r11,%r13,2),%r13
691 shlq $60,%r10
692 xorq (%rsi,%rcx,1),%r9
693 xorq %r10,%r8
694 shlq $48,%r13
695 bswapq %r8
696 xorq %r13,%r9
697 bswapq %r9
698 cmpq %r15,%r14
699 jb .Louter_loop
700 movq %r8,8(%rdi)
701 movq %r9,(%rdi)
703 leaq 280(%rsp),%rsi
704 movq 0(%rsi),%r15
705 movq 8(%rsi),%r14
706 movq 16(%rsi),%r13
707 movq 24(%rsi),%r12
708 movq 32(%rsi),%rbp
709 movq 40(%rsi),%rbx
710 leaq 48(%rsi),%rsp
711 .Lghash_epilogue:
712 movq 8(%rsp),%rdi
713 movq 16(%rsp),%rsi
714 .byte 0xf3,0xc3
715 .LSEH_end_gcm_ghash_4bit:
716 .globl gcm_init_clmul
717 .def gcm_init_clmul; .scl 2; .type 32; .endef
718 .p2align 4
719 gcm_init_clmul:
720 movdqu (%rdx),%xmm2
721 pshufd $78,%xmm2,%xmm2
724 pshufd $255,%xmm2,%xmm4
725 movdqa %xmm2,%xmm3
726 psllq $1,%xmm2
727 pxor %xmm5,%xmm5
728 psrlq $63,%xmm3
729 pcmpgtd %xmm4,%xmm5
730 pslldq $8,%xmm3
731 por %xmm3,%xmm2
734 pand .L0x1c2_polynomial(%rip),%xmm5
735 pxor %xmm5,%xmm2
738 movdqa %xmm2,%xmm0
739 movdqa %xmm0,%xmm1
740 pshufd $78,%xmm0,%xmm3
741 pshufd $78,%xmm2,%xmm4
742 pxor %xmm0,%xmm3
743 pxor %xmm2,%xmm4
744 .byte 102,15,58,68,194,0
745 .byte 102,15,58,68,202,17
746 .byte 102,15,58,68,220,0
747 pxor %xmm0,%xmm3
748 pxor %xmm1,%xmm3
750 movdqa %xmm3,%xmm4
751 psrldq $8,%xmm3
752 pslldq $8,%xmm4
753 pxor %xmm3,%xmm1
754 pxor %xmm4,%xmm0
756 movdqa %xmm0,%xmm3
757 psllq $1,%xmm0
758 pxor %xmm3,%xmm0
759 psllq $5,%xmm0
760 pxor %xmm3,%xmm0
761 psllq $57,%xmm0
762 movdqa %xmm0,%xmm4
763 pslldq $8,%xmm0
764 psrldq $8,%xmm4
765 pxor %xmm3,%xmm0
766 pxor %xmm4,%xmm1
769 movdqa %xmm0,%xmm4
770 psrlq $5,%xmm0
771 pxor %xmm4,%xmm0
772 psrlq $1,%xmm0
773 pxor %xmm4,%xmm0
774 pxor %xmm1,%xmm4
775 psrlq $1,%xmm0
776 pxor %xmm4,%xmm0
777 movdqu %xmm2,(%rcx)
778 movdqu %xmm0,16(%rcx)
779 .byte 0xf3,0xc3
781 .globl gcm_gmult_clmul
782 .def gcm_gmult_clmul; .scl 2; .type 32; .endef
783 .p2align 4
784 gcm_gmult_clmul:
785 movdqu (%rcx),%xmm0
786 movdqa .Lbswap_mask(%rip),%xmm5
787 movdqu (%rdx),%xmm2
788 .byte 102,15,56,0,197
789 movdqa %xmm0,%xmm1
790 pshufd $78,%xmm0,%xmm3
791 pshufd $78,%xmm2,%xmm4
792 pxor %xmm0,%xmm3
793 pxor %xmm2,%xmm4
794 .byte 102,15,58,68,194,0
795 .byte 102,15,58,68,202,17
796 .byte 102,15,58,68,220,0
797 pxor %xmm0,%xmm3
798 pxor %xmm1,%xmm3
800 movdqa %xmm3,%xmm4
801 psrldq $8,%xmm3
802 pslldq $8,%xmm4
803 pxor %xmm3,%xmm1
804 pxor %xmm4,%xmm0
806 movdqa %xmm0,%xmm3
807 psllq $1,%xmm0
808 pxor %xmm3,%xmm0
809 psllq $5,%xmm0
810 pxor %xmm3,%xmm0
811 psllq $57,%xmm0
812 movdqa %xmm0,%xmm4
813 pslldq $8,%xmm0
814 psrldq $8,%xmm4
815 pxor %xmm3,%xmm0
816 pxor %xmm4,%xmm1
819 movdqa %xmm0,%xmm4
820 psrlq $5,%xmm0
821 pxor %xmm4,%xmm0
822 psrlq $1,%xmm0
823 pxor %xmm4,%xmm0
824 pxor %xmm1,%xmm4
825 psrlq $1,%xmm0
826 pxor %xmm4,%xmm0
827 .byte 102,15,56,0,197
828 movdqu %xmm0,(%rcx)
829 .byte 0xf3,0xc3
831 .globl gcm_ghash_clmul
832 .def gcm_ghash_clmul; .scl 2; .type 32; .endef
833 .p2align 4
834 gcm_ghash_clmul:
835 .LSEH_begin_gcm_ghash_clmul:
837 .byte 0x48,0x83,0xec,0x58
838 .byte 0x0f,0x29,0x34,0x24
839 .byte 0x0f,0x29,0x7c,0x24,0x10
840 .byte 0x44,0x0f,0x29,0x44,0x24,0x20
841 .byte 0x44,0x0f,0x29,0x4c,0x24,0x30
842 .byte 0x44,0x0f,0x29,0x54,0x24,0x40
843 movdqa .Lbswap_mask(%rip),%xmm5
845 movdqu (%rcx),%xmm0
846 movdqu (%rdx),%xmm2
847 .byte 102,15,56,0,197
849 subq $16,%r9
850 jz .Lodd_tail
852 movdqu 16(%rdx),%xmm8
858 movdqu (%r8),%xmm3
859 movdqu 16(%r8),%xmm6
860 .byte 102,15,56,0,221
861 .byte 102,15,56,0,245
862 pxor %xmm3,%xmm0
863 movdqa %xmm6,%xmm7
864 pshufd $78,%xmm6,%xmm3
865 pshufd $78,%xmm2,%xmm4
866 pxor %xmm6,%xmm3
867 pxor %xmm2,%xmm4
868 .byte 102,15,58,68,242,0
869 .byte 102,15,58,68,250,17
870 .byte 102,15,58,68,220,0
871 pxor %xmm6,%xmm3
872 pxor %xmm7,%xmm3
874 movdqa %xmm3,%xmm4
875 psrldq $8,%xmm3
876 pslldq $8,%xmm4
877 pxor %xmm3,%xmm7
878 pxor %xmm4,%xmm6
879 movdqa %xmm0,%xmm1
880 pshufd $78,%xmm0,%xmm3
881 pshufd $78,%xmm8,%xmm4
882 pxor %xmm0,%xmm3
883 pxor %xmm8,%xmm4
885 leaq 32(%r8),%r8
886 subq $32,%r9
887 jbe .Leven_tail
889 .Lmod_loop:
890 .byte 102,65,15,58,68,192,0
891 .byte 102,65,15,58,68,200,17
892 .byte 102,15,58,68,220,0
893 pxor %xmm0,%xmm3
894 pxor %xmm1,%xmm3
896 movdqa %xmm3,%xmm4
897 psrldq $8,%xmm3
898 pslldq $8,%xmm4
899 pxor %xmm3,%xmm1
900 pxor %xmm4,%xmm0
901 movdqu (%r8),%xmm3
902 pxor %xmm6,%xmm0
903 pxor %xmm7,%xmm1
905 movdqu 16(%r8),%xmm6
906 .byte 102,15,56,0,221
907 .byte 102,15,56,0,245
909 movdqa %xmm6,%xmm7
910 pshufd $78,%xmm6,%xmm9
911 pshufd $78,%xmm2,%xmm10
912 pxor %xmm6,%xmm9
913 pxor %xmm2,%xmm10
914 pxor %xmm3,%xmm1
916 movdqa %xmm0,%xmm3
917 psllq $1,%xmm0
918 pxor %xmm3,%xmm0
919 psllq $5,%xmm0
920 pxor %xmm3,%xmm0
921 .byte 102,15,58,68,242,0
922 psllq $57,%xmm0
923 movdqa %xmm0,%xmm4
924 pslldq $8,%xmm0
925 psrldq $8,%xmm4
926 pxor %xmm3,%xmm0
927 pxor %xmm4,%xmm1
929 .byte 102,15,58,68,250,17
930 movdqa %xmm0,%xmm4
931 psrlq $5,%xmm0
932 pxor %xmm4,%xmm0
933 psrlq $1,%xmm0
934 pxor %xmm4,%xmm0
935 pxor %xmm1,%xmm4
936 psrlq $1,%xmm0
937 pxor %xmm4,%xmm0
939 .byte 102,69,15,58,68,202,0
940 movdqa %xmm0,%xmm1
941 pshufd $78,%xmm0,%xmm3
942 pshufd $78,%xmm8,%xmm4
943 pxor %xmm0,%xmm3
944 pxor %xmm8,%xmm4
946 pxor %xmm6,%xmm9
947 pxor %xmm7,%xmm9
948 movdqa %xmm9,%xmm10
949 psrldq $8,%xmm9
950 pslldq $8,%xmm10
951 pxor %xmm9,%xmm7
952 pxor %xmm10,%xmm6
954 leaq 32(%r8),%r8
955 subq $32,%r9
956 ja .Lmod_loop
958 .Leven_tail:
959 .byte 102,65,15,58,68,192,0
960 .byte 102,65,15,58,68,200,17
961 .byte 102,15,58,68,220,0
962 pxor %xmm0,%xmm3
963 pxor %xmm1,%xmm3
965 movdqa %xmm3,%xmm4
966 psrldq $8,%xmm3
967 pslldq $8,%xmm4
968 pxor %xmm3,%xmm1
969 pxor %xmm4,%xmm0
970 pxor %xmm6,%xmm0
971 pxor %xmm7,%xmm1
973 movdqa %xmm0,%xmm3
974 psllq $1,%xmm0
975 pxor %xmm3,%xmm0
976 psllq $5,%xmm0
977 pxor %xmm3,%xmm0
978 psllq $57,%xmm0
979 movdqa %xmm0,%xmm4
980 pslldq $8,%xmm0
981 psrldq $8,%xmm4
982 pxor %xmm3,%xmm0
983 pxor %xmm4,%xmm1
986 movdqa %xmm0,%xmm4
987 psrlq $5,%xmm0
988 pxor %xmm4,%xmm0
989 psrlq $1,%xmm0
990 pxor %xmm4,%xmm0
991 pxor %xmm1,%xmm4
992 psrlq $1,%xmm0
993 pxor %xmm4,%xmm0
994 testq %r9,%r9
995 jnz .Ldone
997 .Lodd_tail:
998 movdqu (%r8),%xmm3
999 .byte 102,15,56,0,221
1000 pxor %xmm3,%xmm0
1001 movdqa %xmm0,%xmm1
1002 pshufd $78,%xmm0,%xmm3
1003 pshufd $78,%xmm2,%xmm4
1004 pxor %xmm0,%xmm3
1005 pxor %xmm2,%xmm4
1006 .byte 102,15,58,68,194,0
1007 .byte 102,15,58,68,202,17
1008 .byte 102,15,58,68,220,0
1009 pxor %xmm0,%xmm3
1010 pxor %xmm1,%xmm3
1012 movdqa %xmm3,%xmm4
1013 psrldq $8,%xmm3
1014 pslldq $8,%xmm4
1015 pxor %xmm3,%xmm1
1016 pxor %xmm4,%xmm0
1018 movdqa %xmm0,%xmm3
1019 psllq $1,%xmm0
1020 pxor %xmm3,%xmm0
1021 psllq $5,%xmm0
1022 pxor %xmm3,%xmm0
1023 psllq $57,%xmm0
1024 movdqa %xmm0,%xmm4
1025 pslldq $8,%xmm0
1026 psrldq $8,%xmm4
1027 pxor %xmm3,%xmm0
1028 pxor %xmm4,%xmm1
1031 movdqa %xmm0,%xmm4
1032 psrlq $5,%xmm0
1033 pxor %xmm4,%xmm0
1034 psrlq $1,%xmm0
1035 pxor %xmm4,%xmm0
1036 pxor %xmm1,%xmm4
1037 psrlq $1,%xmm0
1038 pxor %xmm4,%xmm0
1039 .Ldone:
1040 .byte 102,15,56,0,197
1041 movdqu %xmm0,(%rcx)
1042 movaps (%rsp),%xmm6
1043 movaps 16(%rsp),%xmm7
1044 movaps 32(%rsp),%xmm8
1045 movaps 48(%rsp),%xmm9
1046 movaps 64(%rsp),%xmm10
1047 addq $88,%rsp
1048 .byte 0xf3,0xc3
1049 .LSEH_end_gcm_ghash_clmul:
1051 .p2align 6
1052 .Lbswap_mask:
1053 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1054 .L0x1c2_polynomial:
1055 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1056 .p2align 6
1058 .Lrem_4bit:
1059 .long 0,0,0,471859200,0,943718400,0,610271232
1060 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1061 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1062 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1064 .Lrem_8bit:
1065 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1066 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1067 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1068 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1069 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1070 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1071 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1072 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1073 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1074 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1075 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1076 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1077 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1078 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1079 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1080 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1081 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1082 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1083 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1084 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1085 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1086 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1087 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1088 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1089 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1090 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1091 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1092 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1093 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1094 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1095 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1096 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1098 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1099 .p2align 6
1101 .def se_handler; .scl 3; .type 32; .endef
1102 .p2align 4
1103 se_handler:
1104 pushq %rsi
1105 pushq %rdi
1106 pushq %rbx
1107 pushq %rbp
1108 pushq %r12
1109 pushq %r13
1110 pushq %r14
1111 pushq %r15
1112 pushfq
1113 subq $64,%rsp
1115 movq 120(%r8),%rax
1116 movq 248(%r8),%rbx
1118 movq 8(%r9),%rsi
1119 movq 56(%r9),%r11
1121 movl 0(%r11),%r10d
1122 leaq (%rsi,%r10,1),%r10
1123 cmpq %r10,%rbx
1124 jb .Lin_prologue
1126 movq 152(%r8),%rax
1128 movl 4(%r11),%r10d
1129 leaq (%rsi,%r10,1),%r10
1130 cmpq %r10,%rbx
1131 jae .Lin_prologue
1133 leaq 24(%rax),%rax
1135 movq -8(%rax),%rbx
1136 movq -16(%rax),%rbp
1137 movq -24(%rax),%r12
1138 movq %rbx,144(%r8)
1139 movq %rbp,160(%r8)
1140 movq %r12,216(%r8)
1142 .Lin_prologue:
1143 movq 8(%rax),%rdi
1144 movq 16(%rax),%rsi
1145 movq %rax,152(%r8)
1146 movq %rsi,168(%r8)
1147 movq %rdi,176(%r8)
1149 movq 40(%r9),%rdi
1150 movq %r8,%rsi
1151 movl $154,%ecx
1152 .long 0xa548f3fc
1154 movq %r9,%rsi
1155 xorq %rcx,%rcx
1156 movq 8(%rsi),%rdx
1157 movq 0(%rsi),%r8
1158 movq 16(%rsi),%r9
1159 movq 40(%rsi),%r10
1160 leaq 56(%rsi),%r11
1161 leaq 24(%rsi),%r12
1162 movq %r10,32(%rsp)
1163 movq %r11,40(%rsp)
1164 movq %r12,48(%rsp)
1165 movq %rcx,56(%rsp)
1166 call *__imp_RtlVirtualUnwind(%rip)
1168 movl $1,%eax
1169 addq $64,%rsp
1170 popfq
1171 popq %r15
1172 popq %r14
1173 popq %r13
1174 popq %r12
1175 popq %rbp
1176 popq %rbx
1177 popq %rdi
1178 popq %rsi
1179 .byte 0xf3,0xc3
1182 .section .pdata
1183 .p2align 2
1184 .rva .LSEH_begin_gcm_gmult_4bit
1185 .rva .LSEH_end_gcm_gmult_4bit
1186 .rva .LSEH_info_gcm_gmult_4bit
1188 .rva .LSEH_begin_gcm_ghash_4bit
1189 .rva .LSEH_end_gcm_ghash_4bit
1190 .rva .LSEH_info_gcm_ghash_4bit
1192 .rva .LSEH_begin_gcm_ghash_clmul
1193 .rva .LSEH_end_gcm_ghash_clmul
1194 .rva .LSEH_info_gcm_ghash_clmul
1196 .section .xdata
1197 .p2align 3
1198 .LSEH_info_gcm_gmult_4bit:
1199 .byte 9,0,0,0
1200 .rva se_handler
1201 .rva .Lgmult_prologue,.Lgmult_epilogue
1202 .LSEH_info_gcm_ghash_4bit:
1203 .byte 9,0,0,0
1204 .rva se_handler
1205 .rva .Lghash_prologue,.Lghash_epilogue
1206 .LSEH_info_gcm_ghash_clmul:
1207 .byte 0x01,0x1f,0x0b,0x00
1208 .byte 0x1f,0xa8,0x04,0x00
1209 .byte 0x19,0x98,0x03,0x00
1210 .byte 0x13,0x88,0x02,0x00
1211 .byte 0x0d,0x78,0x01,0x00
1212 .byte 0x08,0x68,0x00,0x00
1213 .byte 0x04,0xa2,0x00,0x00