updated licenses
[gnutls.git] / lib / accelerated / x86 / elf / appro-aes-gcm-x86-64.s
blob8f2b96ff1fc967baea227e57eeeaa3e713854b2a
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # * Redistributions of source code must retain copyright notices,
9 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # * Neither the name of the Andy Polyakov nor the names of its
17 # copyright holder and contributors may be used to endorse or
18 # promote products derived from this software without specific
19 # prior written permission.
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # *** This file is auto-generated ***
40 .text
42 .globl gcm_gmult_4bit
43 .type gcm_gmult_4bit,@function
44 .align 16
45 gcm_gmult_4bit:
46 pushq %rbx
47 pushq %rbp
48 pushq %r12
49 .Lgmult_prologue:
51 movzbq 15(%rdi),%r8
52 leaq .Lrem_4bit(%rip),%r11
53 xorq %rax,%rax
54 xorq %rbx,%rbx
55 movb %r8b,%al
56 movb %r8b,%bl
57 shlb $4,%al
58 movq $14,%rcx
59 movq 8(%rsi,%rax,1),%r8
60 movq (%rsi,%rax,1),%r9
61 andb $240,%bl
62 movq %r8,%rdx
63 jmp .Loop1
65 .align 16
66 .Loop1:
67 shrq $4,%r8
68 andq $15,%rdx
69 movq %r9,%r10
70 movb (%rdi,%rcx,1),%al
71 shrq $4,%r9
72 xorq 8(%rsi,%rbx,1),%r8
73 shlq $60,%r10
74 xorq (%rsi,%rbx,1),%r9
75 movb %al,%bl
76 xorq (%r11,%rdx,8),%r9
77 movq %r8,%rdx
78 shlb $4,%al
79 xorq %r10,%r8
80 decq %rcx
81 js .Lbreak1
83 shrq $4,%r8
84 andq $15,%rdx
85 movq %r9,%r10
86 shrq $4,%r9
87 xorq 8(%rsi,%rax,1),%r8
88 shlq $60,%r10
89 xorq (%rsi,%rax,1),%r9
90 andb $240,%bl
91 xorq (%r11,%rdx,8),%r9
92 movq %r8,%rdx
93 xorq %r10,%r8
94 jmp .Loop1
96 .align 16
97 .Lbreak1:
98 shrq $4,%r8
99 andq $15,%rdx
100 movq %r9,%r10
101 shrq $4,%r9
102 xorq 8(%rsi,%rax,1),%r8
103 shlq $60,%r10
104 xorq (%rsi,%rax,1),%r9
105 andb $240,%bl
106 xorq (%r11,%rdx,8),%r9
107 movq %r8,%rdx
108 xorq %r10,%r8
110 shrq $4,%r8
111 andq $15,%rdx
112 movq %r9,%r10
113 shrq $4,%r9
114 xorq 8(%rsi,%rbx,1),%r8
115 shlq $60,%r10
116 xorq (%rsi,%rbx,1),%r9
117 xorq %r10,%r8
118 xorq (%r11,%rdx,8),%r9
120 bswapq %r8
121 bswapq %r9
122 movq %r8,8(%rdi)
123 movq %r9,(%rdi)
125 movq 16(%rsp),%rbx
126 leaq 24(%rsp),%rsp
127 .Lgmult_epilogue:
128 .byte 0xf3,0xc3
129 .size gcm_gmult_4bit,.-gcm_gmult_4bit
130 .globl gcm_ghash_4bit
131 .type gcm_ghash_4bit,@function
132 .align 16
133 gcm_ghash_4bit:
134 pushq %rbx
135 pushq %rbp
136 pushq %r12
137 pushq %r13
138 pushq %r14
139 pushq %r15
140 subq $280,%rsp
141 .Lghash_prologue:
142 movq %rdx,%r14
143 movq %rcx,%r15
144 subq $-128,%rsi
145 leaq 16+128(%rsp),%rbp
146 xorl %edx,%edx
147 movq 0+0-128(%rsi),%r8
148 movq 0+8-128(%rsi),%rax
149 movb %al,%dl
150 shrq $4,%rax
151 movq %r8,%r10
152 shrq $4,%r8
153 movq 16+0-128(%rsi),%r9
154 shlb $4,%dl
155 movq 16+8-128(%rsi),%rbx
156 shlq $60,%r10
157 movb %dl,0(%rsp)
158 orq %r10,%rax
159 movb %bl,%dl
160 shrq $4,%rbx
161 movq %r9,%r10
162 shrq $4,%r9
163 movq %r8,0(%rbp)
164 movq 32+0-128(%rsi),%r8
165 shlb $4,%dl
166 movq %rax,0-128(%rbp)
167 movq 32+8-128(%rsi),%rax
168 shlq $60,%r10
169 movb %dl,1(%rsp)
170 orq %r10,%rbx
171 movb %al,%dl
172 shrq $4,%rax
173 movq %r8,%r10
174 shrq $4,%r8
175 movq %r9,8(%rbp)
176 movq 48+0-128(%rsi),%r9
177 shlb $4,%dl
178 movq %rbx,8-128(%rbp)
179 movq 48+8-128(%rsi),%rbx
180 shlq $60,%r10
181 movb %dl,2(%rsp)
182 orq %r10,%rax
183 movb %bl,%dl
184 shrq $4,%rbx
185 movq %r9,%r10
186 shrq $4,%r9
187 movq %r8,16(%rbp)
188 movq 64+0-128(%rsi),%r8
189 shlb $4,%dl
190 movq %rax,16-128(%rbp)
191 movq 64+8-128(%rsi),%rax
192 shlq $60,%r10
193 movb %dl,3(%rsp)
194 orq %r10,%rbx
195 movb %al,%dl
196 shrq $4,%rax
197 movq %r8,%r10
198 shrq $4,%r8
199 movq %r9,24(%rbp)
200 movq 80+0-128(%rsi),%r9
201 shlb $4,%dl
202 movq %rbx,24-128(%rbp)
203 movq 80+8-128(%rsi),%rbx
204 shlq $60,%r10
205 movb %dl,4(%rsp)
206 orq %r10,%rax
207 movb %bl,%dl
208 shrq $4,%rbx
209 movq %r9,%r10
210 shrq $4,%r9
211 movq %r8,32(%rbp)
212 movq 96+0-128(%rsi),%r8
213 shlb $4,%dl
214 movq %rax,32-128(%rbp)
215 movq 96+8-128(%rsi),%rax
216 shlq $60,%r10
217 movb %dl,5(%rsp)
218 orq %r10,%rbx
219 movb %al,%dl
220 shrq $4,%rax
221 movq %r8,%r10
222 shrq $4,%r8
223 movq %r9,40(%rbp)
224 movq 112+0-128(%rsi),%r9
225 shlb $4,%dl
226 movq %rbx,40-128(%rbp)
227 movq 112+8-128(%rsi),%rbx
228 shlq $60,%r10
229 movb %dl,6(%rsp)
230 orq %r10,%rax
231 movb %bl,%dl
232 shrq $4,%rbx
233 movq %r9,%r10
234 shrq $4,%r9
235 movq %r8,48(%rbp)
236 movq 128+0-128(%rsi),%r8
237 shlb $4,%dl
238 movq %rax,48-128(%rbp)
239 movq 128+8-128(%rsi),%rax
240 shlq $60,%r10
241 movb %dl,7(%rsp)
242 orq %r10,%rbx
243 movb %al,%dl
244 shrq $4,%rax
245 movq %r8,%r10
246 shrq $4,%r8
247 movq %r9,56(%rbp)
248 movq 144+0-128(%rsi),%r9
249 shlb $4,%dl
250 movq %rbx,56-128(%rbp)
251 movq 144+8-128(%rsi),%rbx
252 shlq $60,%r10
253 movb %dl,8(%rsp)
254 orq %r10,%rax
255 movb %bl,%dl
256 shrq $4,%rbx
257 movq %r9,%r10
258 shrq $4,%r9
259 movq %r8,64(%rbp)
260 movq 160+0-128(%rsi),%r8
261 shlb $4,%dl
262 movq %rax,64-128(%rbp)
263 movq 160+8-128(%rsi),%rax
264 shlq $60,%r10
265 movb %dl,9(%rsp)
266 orq %r10,%rbx
267 movb %al,%dl
268 shrq $4,%rax
269 movq %r8,%r10
270 shrq $4,%r8
271 movq %r9,72(%rbp)
272 movq 176+0-128(%rsi),%r9
273 shlb $4,%dl
274 movq %rbx,72-128(%rbp)
275 movq 176+8-128(%rsi),%rbx
276 shlq $60,%r10
277 movb %dl,10(%rsp)
278 orq %r10,%rax
279 movb %bl,%dl
280 shrq $4,%rbx
281 movq %r9,%r10
282 shrq $4,%r9
283 movq %r8,80(%rbp)
284 movq 192+0-128(%rsi),%r8
285 shlb $4,%dl
286 movq %rax,80-128(%rbp)
287 movq 192+8-128(%rsi),%rax
288 shlq $60,%r10
289 movb %dl,11(%rsp)
290 orq %r10,%rbx
291 movb %al,%dl
292 shrq $4,%rax
293 movq %r8,%r10
294 shrq $4,%r8
295 movq %r9,88(%rbp)
296 movq 208+0-128(%rsi),%r9
297 shlb $4,%dl
298 movq %rbx,88-128(%rbp)
299 movq 208+8-128(%rsi),%rbx
300 shlq $60,%r10
301 movb %dl,12(%rsp)
302 orq %r10,%rax
303 movb %bl,%dl
304 shrq $4,%rbx
305 movq %r9,%r10
306 shrq $4,%r9
307 movq %r8,96(%rbp)
308 movq 224+0-128(%rsi),%r8
309 shlb $4,%dl
310 movq %rax,96-128(%rbp)
311 movq 224+8-128(%rsi),%rax
312 shlq $60,%r10
313 movb %dl,13(%rsp)
314 orq %r10,%rbx
315 movb %al,%dl
316 shrq $4,%rax
317 movq %r8,%r10
318 shrq $4,%r8
319 movq %r9,104(%rbp)
320 movq 240+0-128(%rsi),%r9
321 shlb $4,%dl
322 movq %rbx,104-128(%rbp)
323 movq 240+8-128(%rsi),%rbx
324 shlq $60,%r10
325 movb %dl,14(%rsp)
326 orq %r10,%rax
327 movb %bl,%dl
328 shrq $4,%rbx
329 movq %r9,%r10
330 shrq $4,%r9
331 movq %r8,112(%rbp)
332 shlb $4,%dl
333 movq %rax,112-128(%rbp)
334 shlq $60,%r10
335 movb %dl,15(%rsp)
336 orq %r10,%rbx
337 movq %r9,120(%rbp)
338 movq %rbx,120-128(%rbp)
339 addq $-128,%rsi
340 movq 8(%rdi),%r8
341 movq 0(%rdi),%r9
342 addq %r14,%r15
343 leaq .Lrem_8bit(%rip),%r11
344 jmp .Louter_loop
345 .align 16
346 .Louter_loop:
347 xorq (%r14),%r9
348 movq 8(%r14),%rdx
349 leaq 16(%r14),%r14
350 xorq %r8,%rdx
351 movq %r9,(%rdi)
352 movq %rdx,8(%rdi)
353 shrq $32,%rdx
354 xorq %rax,%rax
355 roll $8,%edx
356 movb %dl,%al
357 movzbl %dl,%ebx
358 shlb $4,%al
359 shrl $4,%ebx
360 roll $8,%edx
361 movq 8(%rsi,%rax,1),%r8
362 movq (%rsi,%rax,1),%r9
363 movb %dl,%al
364 movzbl %dl,%ecx
365 shlb $4,%al
366 movzbq (%rsp,%rbx,1),%r12
367 shrl $4,%ecx
368 xorq %r8,%r12
369 movq %r9,%r10
370 shrq $8,%r8
371 movzbq %r12b,%r12
372 shrq $8,%r9
373 xorq -128(%rbp,%rbx,8),%r8
374 shlq $56,%r10
375 xorq (%rbp,%rbx,8),%r9
376 roll $8,%edx
377 xorq 8(%rsi,%rax,1),%r8
378 xorq (%rsi,%rax,1),%r9
379 movb %dl,%al
380 xorq %r10,%r8
381 movzwq (%r11,%r12,2),%r12
382 movzbl %dl,%ebx
383 shlb $4,%al
384 movzbq (%rsp,%rcx,1),%r13
385 shrl $4,%ebx
386 shlq $48,%r12
387 xorq %r8,%r13
388 movq %r9,%r10
389 xorq %r12,%r9
390 shrq $8,%r8
391 movzbq %r13b,%r13
392 shrq $8,%r9
393 xorq -128(%rbp,%rcx,8),%r8
394 shlq $56,%r10
395 xorq (%rbp,%rcx,8),%r9
396 roll $8,%edx
397 xorq 8(%rsi,%rax,1),%r8
398 xorq (%rsi,%rax,1),%r9
399 movb %dl,%al
400 xorq %r10,%r8
401 movzwq (%r11,%r13,2),%r13
402 movzbl %dl,%ecx
403 shlb $4,%al
404 movzbq (%rsp,%rbx,1),%r12
405 shrl $4,%ecx
406 shlq $48,%r13
407 xorq %r8,%r12
408 movq %r9,%r10
409 xorq %r13,%r9
410 shrq $8,%r8
411 movzbq %r12b,%r12
412 movl 8(%rdi),%edx
413 shrq $8,%r9
414 xorq -128(%rbp,%rbx,8),%r8
415 shlq $56,%r10
416 xorq (%rbp,%rbx,8),%r9
417 roll $8,%edx
418 xorq 8(%rsi,%rax,1),%r8
419 xorq (%rsi,%rax,1),%r9
420 movb %dl,%al
421 xorq %r10,%r8
422 movzwq (%r11,%r12,2),%r12
423 movzbl %dl,%ebx
424 shlb $4,%al
425 movzbq (%rsp,%rcx,1),%r13
426 shrl $4,%ebx
427 shlq $48,%r12
428 xorq %r8,%r13
429 movq %r9,%r10
430 xorq %r12,%r9
431 shrq $8,%r8
432 movzbq %r13b,%r13
433 shrq $8,%r9
434 xorq -128(%rbp,%rcx,8),%r8
435 shlq $56,%r10
436 xorq (%rbp,%rcx,8),%r9
437 roll $8,%edx
438 xorq 8(%rsi,%rax,1),%r8
439 xorq (%rsi,%rax,1),%r9
440 movb %dl,%al
441 xorq %r10,%r8
442 movzwq (%r11,%r13,2),%r13
443 movzbl %dl,%ecx
444 shlb $4,%al
445 movzbq (%rsp,%rbx,1),%r12
446 shrl $4,%ecx
447 shlq $48,%r13
448 xorq %r8,%r12
449 movq %r9,%r10
450 xorq %r13,%r9
451 shrq $8,%r8
452 movzbq %r12b,%r12
453 shrq $8,%r9
454 xorq -128(%rbp,%rbx,8),%r8
455 shlq $56,%r10
456 xorq (%rbp,%rbx,8),%r9
457 roll $8,%edx
458 xorq 8(%rsi,%rax,1),%r8
459 xorq (%rsi,%rax,1),%r9
460 movb %dl,%al
461 xorq %r10,%r8
462 movzwq (%r11,%r12,2),%r12
463 movzbl %dl,%ebx
464 shlb $4,%al
465 movzbq (%rsp,%rcx,1),%r13
466 shrl $4,%ebx
467 shlq $48,%r12
468 xorq %r8,%r13
469 movq %r9,%r10
470 xorq %r12,%r9
471 shrq $8,%r8
472 movzbq %r13b,%r13
473 shrq $8,%r9
474 xorq -128(%rbp,%rcx,8),%r8
475 shlq $56,%r10
476 xorq (%rbp,%rcx,8),%r9
477 roll $8,%edx
478 xorq 8(%rsi,%rax,1),%r8
479 xorq (%rsi,%rax,1),%r9
480 movb %dl,%al
481 xorq %r10,%r8
482 movzwq (%r11,%r13,2),%r13
483 movzbl %dl,%ecx
484 shlb $4,%al
485 movzbq (%rsp,%rbx,1),%r12
486 shrl $4,%ecx
487 shlq $48,%r13
488 xorq %r8,%r12
489 movq %r9,%r10
490 xorq %r13,%r9
491 shrq $8,%r8
492 movzbq %r12b,%r12
493 movl 4(%rdi),%edx
494 shrq $8,%r9
495 xorq -128(%rbp,%rbx,8),%r8
496 shlq $56,%r10
497 xorq (%rbp,%rbx,8),%r9
498 roll $8,%edx
499 xorq 8(%rsi,%rax,1),%r8
500 xorq (%rsi,%rax,1),%r9
501 movb %dl,%al
502 xorq %r10,%r8
503 movzwq (%r11,%r12,2),%r12
504 movzbl %dl,%ebx
505 shlb $4,%al
506 movzbq (%rsp,%rcx,1),%r13
507 shrl $4,%ebx
508 shlq $48,%r12
509 xorq %r8,%r13
510 movq %r9,%r10
511 xorq %r12,%r9
512 shrq $8,%r8
513 movzbq %r13b,%r13
514 shrq $8,%r9
515 xorq -128(%rbp,%rcx,8),%r8
516 shlq $56,%r10
517 xorq (%rbp,%rcx,8),%r9
518 roll $8,%edx
519 xorq 8(%rsi,%rax,1),%r8
520 xorq (%rsi,%rax,1),%r9
521 movb %dl,%al
522 xorq %r10,%r8
523 movzwq (%r11,%r13,2),%r13
524 movzbl %dl,%ecx
525 shlb $4,%al
526 movzbq (%rsp,%rbx,1),%r12
527 shrl $4,%ecx
528 shlq $48,%r13
529 xorq %r8,%r12
530 movq %r9,%r10
531 xorq %r13,%r9
532 shrq $8,%r8
533 movzbq %r12b,%r12
534 shrq $8,%r9
535 xorq -128(%rbp,%rbx,8),%r8
536 shlq $56,%r10
537 xorq (%rbp,%rbx,8),%r9
538 roll $8,%edx
539 xorq 8(%rsi,%rax,1),%r8
540 xorq (%rsi,%rax,1),%r9
541 movb %dl,%al
542 xorq %r10,%r8
543 movzwq (%r11,%r12,2),%r12
544 movzbl %dl,%ebx
545 shlb $4,%al
546 movzbq (%rsp,%rcx,1),%r13
547 shrl $4,%ebx
548 shlq $48,%r12
549 xorq %r8,%r13
550 movq %r9,%r10
551 xorq %r12,%r9
552 shrq $8,%r8
553 movzbq %r13b,%r13
554 shrq $8,%r9
555 xorq -128(%rbp,%rcx,8),%r8
556 shlq $56,%r10
557 xorq (%rbp,%rcx,8),%r9
558 roll $8,%edx
559 xorq 8(%rsi,%rax,1),%r8
560 xorq (%rsi,%rax,1),%r9
561 movb %dl,%al
562 xorq %r10,%r8
563 movzwq (%r11,%r13,2),%r13
564 movzbl %dl,%ecx
565 shlb $4,%al
566 movzbq (%rsp,%rbx,1),%r12
567 shrl $4,%ecx
568 shlq $48,%r13
569 xorq %r8,%r12
570 movq %r9,%r10
571 xorq %r13,%r9
572 shrq $8,%r8
573 movzbq %r12b,%r12
574 movl 0(%rdi),%edx
575 shrq $8,%r9
576 xorq -128(%rbp,%rbx,8),%r8
577 shlq $56,%r10
578 xorq (%rbp,%rbx,8),%r9
579 roll $8,%edx
580 xorq 8(%rsi,%rax,1),%r8
581 xorq (%rsi,%rax,1),%r9
582 movb %dl,%al
583 xorq %r10,%r8
584 movzwq (%r11,%r12,2),%r12
585 movzbl %dl,%ebx
586 shlb $4,%al
587 movzbq (%rsp,%rcx,1),%r13
588 shrl $4,%ebx
589 shlq $48,%r12
590 xorq %r8,%r13
591 movq %r9,%r10
592 xorq %r12,%r9
593 shrq $8,%r8
594 movzbq %r13b,%r13
595 shrq $8,%r9
596 xorq -128(%rbp,%rcx,8),%r8
597 shlq $56,%r10
598 xorq (%rbp,%rcx,8),%r9
599 roll $8,%edx
600 xorq 8(%rsi,%rax,1),%r8
601 xorq (%rsi,%rax,1),%r9
602 movb %dl,%al
603 xorq %r10,%r8
604 movzwq (%r11,%r13,2),%r13
605 movzbl %dl,%ecx
606 shlb $4,%al
607 movzbq (%rsp,%rbx,1),%r12
608 shrl $4,%ecx
609 shlq $48,%r13
610 xorq %r8,%r12
611 movq %r9,%r10
612 xorq %r13,%r9
613 shrq $8,%r8
614 movzbq %r12b,%r12
615 shrq $8,%r9
616 xorq -128(%rbp,%rbx,8),%r8
617 shlq $56,%r10
618 xorq (%rbp,%rbx,8),%r9
619 roll $8,%edx
620 xorq 8(%rsi,%rax,1),%r8
621 xorq (%rsi,%rax,1),%r9
622 movb %dl,%al
623 xorq %r10,%r8
624 movzwq (%r11,%r12,2),%r12
625 movzbl %dl,%ebx
626 shlb $4,%al
627 movzbq (%rsp,%rcx,1),%r13
628 shrl $4,%ebx
629 shlq $48,%r12
630 xorq %r8,%r13
631 movq %r9,%r10
632 xorq %r12,%r9
633 shrq $8,%r8
634 movzbq %r13b,%r13
635 shrq $8,%r9
636 xorq -128(%rbp,%rcx,8),%r8
637 shlq $56,%r10
638 xorq (%rbp,%rcx,8),%r9
639 roll $8,%edx
640 xorq 8(%rsi,%rax,1),%r8
641 xorq (%rsi,%rax,1),%r9
642 movb %dl,%al
643 xorq %r10,%r8
644 movzwq (%r11,%r13,2),%r13
645 movzbl %dl,%ecx
646 shlb $4,%al
647 movzbq (%rsp,%rbx,1),%r12
648 andl $240,%ecx
649 shlq $48,%r13
650 xorq %r8,%r12
651 movq %r9,%r10
652 xorq %r13,%r9
653 shrq $8,%r8
654 movzbq %r12b,%r12
655 movl -4(%rdi),%edx
656 shrq $8,%r9
657 xorq -128(%rbp,%rbx,8),%r8
658 shlq $56,%r10
659 xorq (%rbp,%rbx,8),%r9
660 movzwq (%r11,%r12,2),%r12
661 xorq 8(%rsi,%rax,1),%r8
662 xorq (%rsi,%rax,1),%r9
663 shlq $48,%r12
664 xorq %r10,%r8
665 xorq %r12,%r9
666 movzbq %r8b,%r13
667 shrq $4,%r8
668 movq %r9,%r10
669 shlb $4,%r13b
670 shrq $4,%r9
671 xorq 8(%rsi,%rcx,1),%r8
672 movzwq (%r11,%r13,2),%r13
673 shlq $60,%r10
674 xorq (%rsi,%rcx,1),%r9
675 xorq %r10,%r8
676 shlq $48,%r13
677 bswapq %r8
678 xorq %r13,%r9
679 bswapq %r9
680 cmpq %r15,%r14
681 jb .Louter_loop
682 movq %r8,8(%rdi)
683 movq %r9,(%rdi)
685 leaq 280(%rsp),%rsi
686 movq 0(%rsi),%r15
687 movq 8(%rsi),%r14
688 movq 16(%rsi),%r13
689 movq 24(%rsi),%r12
690 movq 32(%rsi),%rbp
691 movq 40(%rsi),%rbx
692 leaq 48(%rsi),%rsp
693 .Lghash_epilogue:
694 .byte 0xf3,0xc3
695 .size gcm_ghash_4bit,.-gcm_ghash_4bit
696 .globl gcm_init_clmul
697 .type gcm_init_clmul,@function
698 .align 16
699 gcm_init_clmul:
700 movdqu (%rsi),%xmm2
701 pshufd $78,%xmm2,%xmm2
704 pshufd $255,%xmm2,%xmm4
705 movdqa %xmm2,%xmm3
706 psllq $1,%xmm2
707 pxor %xmm5,%xmm5
708 psrlq $63,%xmm3
709 pcmpgtd %xmm4,%xmm5
710 pslldq $8,%xmm3
711 por %xmm3,%xmm2
714 pand .L0x1c2_polynomial(%rip),%xmm5
715 pxor %xmm5,%xmm2
718 movdqa %xmm2,%xmm0
719 movdqa %xmm0,%xmm1
720 pshufd $78,%xmm0,%xmm3
721 pshufd $78,%xmm2,%xmm4
722 pxor %xmm0,%xmm3
723 pxor %xmm2,%xmm4
724 .byte 102,15,58,68,194,0
725 .byte 102,15,58,68,202,17
726 .byte 102,15,58,68,220,0
727 pxor %xmm0,%xmm3
728 pxor %xmm1,%xmm3
730 movdqa %xmm3,%xmm4
731 psrldq $8,%xmm3
732 pslldq $8,%xmm4
733 pxor %xmm3,%xmm1
734 pxor %xmm4,%xmm0
736 movdqa %xmm0,%xmm3
737 psllq $1,%xmm0
738 pxor %xmm3,%xmm0
739 psllq $5,%xmm0
740 pxor %xmm3,%xmm0
741 psllq $57,%xmm0
742 movdqa %xmm0,%xmm4
743 pslldq $8,%xmm0
744 psrldq $8,%xmm4
745 pxor %xmm3,%xmm0
746 pxor %xmm4,%xmm1
749 movdqa %xmm0,%xmm4
750 psrlq $5,%xmm0
751 pxor %xmm4,%xmm0
752 psrlq $1,%xmm0
753 pxor %xmm4,%xmm0
754 pxor %xmm1,%xmm4
755 psrlq $1,%xmm0
756 pxor %xmm4,%xmm0
757 movdqu %xmm2,(%rdi)
758 movdqu %xmm0,16(%rdi)
759 .byte 0xf3,0xc3
760 .size gcm_init_clmul,.-gcm_init_clmul
761 .globl gcm_gmult_clmul
762 .type gcm_gmult_clmul,@function
763 .align 16
764 gcm_gmult_clmul:
765 movdqu (%rdi),%xmm0
766 movdqa .Lbswap_mask(%rip),%xmm5
767 movdqu (%rsi),%xmm2
768 .byte 102,15,56,0,197
769 movdqa %xmm0,%xmm1
770 pshufd $78,%xmm0,%xmm3
771 pshufd $78,%xmm2,%xmm4
772 pxor %xmm0,%xmm3
773 pxor %xmm2,%xmm4
774 .byte 102,15,58,68,194,0
775 .byte 102,15,58,68,202,17
776 .byte 102,15,58,68,220,0
777 pxor %xmm0,%xmm3
778 pxor %xmm1,%xmm3
780 movdqa %xmm3,%xmm4
781 psrldq $8,%xmm3
782 pslldq $8,%xmm4
783 pxor %xmm3,%xmm1
784 pxor %xmm4,%xmm0
786 movdqa %xmm0,%xmm3
787 psllq $1,%xmm0
788 pxor %xmm3,%xmm0
789 psllq $5,%xmm0
790 pxor %xmm3,%xmm0
791 psllq $57,%xmm0
792 movdqa %xmm0,%xmm4
793 pslldq $8,%xmm0
794 psrldq $8,%xmm4
795 pxor %xmm3,%xmm0
796 pxor %xmm4,%xmm1
799 movdqa %xmm0,%xmm4
800 psrlq $5,%xmm0
801 pxor %xmm4,%xmm0
802 psrlq $1,%xmm0
803 pxor %xmm4,%xmm0
804 pxor %xmm1,%xmm4
805 psrlq $1,%xmm0
806 pxor %xmm4,%xmm0
807 .byte 102,15,56,0,197
808 movdqu %xmm0,(%rdi)
809 .byte 0xf3,0xc3
810 .size gcm_gmult_clmul,.-gcm_gmult_clmul
811 .globl gcm_ghash_clmul
812 .type gcm_ghash_clmul,@function
813 .align 16
814 gcm_ghash_clmul:
815 movdqa .Lbswap_mask(%rip),%xmm5
817 movdqu (%rdi),%xmm0
818 movdqu (%rsi),%xmm2
819 .byte 102,15,56,0,197
821 subq $16,%rcx
822 jz .Lodd_tail
824 movdqu 16(%rsi),%xmm8
830 movdqu (%rdx),%xmm3
831 movdqu 16(%rdx),%xmm6
832 .byte 102,15,56,0,221
833 .byte 102,15,56,0,245
834 pxor %xmm3,%xmm0
835 movdqa %xmm6,%xmm7
836 pshufd $78,%xmm6,%xmm3
837 pshufd $78,%xmm2,%xmm4
838 pxor %xmm6,%xmm3
839 pxor %xmm2,%xmm4
840 .byte 102,15,58,68,242,0
841 .byte 102,15,58,68,250,17
842 .byte 102,15,58,68,220,0
843 pxor %xmm6,%xmm3
844 pxor %xmm7,%xmm3
846 movdqa %xmm3,%xmm4
847 psrldq $8,%xmm3
848 pslldq $8,%xmm4
849 pxor %xmm3,%xmm7
850 pxor %xmm4,%xmm6
851 movdqa %xmm0,%xmm1
852 pshufd $78,%xmm0,%xmm3
853 pshufd $78,%xmm8,%xmm4
854 pxor %xmm0,%xmm3
855 pxor %xmm8,%xmm4
857 leaq 32(%rdx),%rdx
858 subq $32,%rcx
859 jbe .Leven_tail
861 .Lmod_loop:
862 .byte 102,65,15,58,68,192,0
863 .byte 102,65,15,58,68,200,17
864 .byte 102,15,58,68,220,0
865 pxor %xmm0,%xmm3
866 pxor %xmm1,%xmm3
868 movdqa %xmm3,%xmm4
869 psrldq $8,%xmm3
870 pslldq $8,%xmm4
871 pxor %xmm3,%xmm1
872 pxor %xmm4,%xmm0
873 movdqu (%rdx),%xmm3
874 pxor %xmm6,%xmm0
875 pxor %xmm7,%xmm1
877 movdqu 16(%rdx),%xmm6
878 .byte 102,15,56,0,221
879 .byte 102,15,56,0,245
881 movdqa %xmm6,%xmm7
882 pshufd $78,%xmm6,%xmm9
883 pshufd $78,%xmm2,%xmm10
884 pxor %xmm6,%xmm9
885 pxor %xmm2,%xmm10
886 pxor %xmm3,%xmm1
888 movdqa %xmm0,%xmm3
889 psllq $1,%xmm0
890 pxor %xmm3,%xmm0
891 psllq $5,%xmm0
892 pxor %xmm3,%xmm0
893 .byte 102,15,58,68,242,0
894 psllq $57,%xmm0
895 movdqa %xmm0,%xmm4
896 pslldq $8,%xmm0
897 psrldq $8,%xmm4
898 pxor %xmm3,%xmm0
899 pxor %xmm4,%xmm1
901 .byte 102,15,58,68,250,17
902 movdqa %xmm0,%xmm4
903 psrlq $5,%xmm0
904 pxor %xmm4,%xmm0
905 psrlq $1,%xmm0
906 pxor %xmm4,%xmm0
907 pxor %xmm1,%xmm4
908 psrlq $1,%xmm0
909 pxor %xmm4,%xmm0
911 .byte 102,69,15,58,68,202,0
912 movdqa %xmm0,%xmm1
913 pshufd $78,%xmm0,%xmm3
914 pshufd $78,%xmm8,%xmm4
915 pxor %xmm0,%xmm3
916 pxor %xmm8,%xmm4
918 pxor %xmm6,%xmm9
919 pxor %xmm7,%xmm9
920 movdqa %xmm9,%xmm10
921 psrldq $8,%xmm9
922 pslldq $8,%xmm10
923 pxor %xmm9,%xmm7
924 pxor %xmm10,%xmm6
926 leaq 32(%rdx),%rdx
927 subq $32,%rcx
928 ja .Lmod_loop
930 .Leven_tail:
931 .byte 102,65,15,58,68,192,0
932 .byte 102,65,15,58,68,200,17
933 .byte 102,15,58,68,220,0
934 pxor %xmm0,%xmm3
935 pxor %xmm1,%xmm3
937 movdqa %xmm3,%xmm4
938 psrldq $8,%xmm3
939 pslldq $8,%xmm4
940 pxor %xmm3,%xmm1
941 pxor %xmm4,%xmm0
942 pxor %xmm6,%xmm0
943 pxor %xmm7,%xmm1
945 movdqa %xmm0,%xmm3
946 psllq $1,%xmm0
947 pxor %xmm3,%xmm0
948 psllq $5,%xmm0
949 pxor %xmm3,%xmm0
950 psllq $57,%xmm0
951 movdqa %xmm0,%xmm4
952 pslldq $8,%xmm0
953 psrldq $8,%xmm4
954 pxor %xmm3,%xmm0
955 pxor %xmm4,%xmm1
958 movdqa %xmm0,%xmm4
959 psrlq $5,%xmm0
960 pxor %xmm4,%xmm0
961 psrlq $1,%xmm0
962 pxor %xmm4,%xmm0
963 pxor %xmm1,%xmm4
964 psrlq $1,%xmm0
965 pxor %xmm4,%xmm0
966 testq %rcx,%rcx
967 jnz .Ldone
969 .Lodd_tail:
970 movdqu (%rdx),%xmm3
971 .byte 102,15,56,0,221
972 pxor %xmm3,%xmm0
973 movdqa %xmm0,%xmm1
974 pshufd $78,%xmm0,%xmm3
975 pshufd $78,%xmm2,%xmm4
976 pxor %xmm0,%xmm3
977 pxor %xmm2,%xmm4
978 .byte 102,15,58,68,194,0
979 .byte 102,15,58,68,202,17
980 .byte 102,15,58,68,220,0
981 pxor %xmm0,%xmm3
982 pxor %xmm1,%xmm3
984 movdqa %xmm3,%xmm4
985 psrldq $8,%xmm3
986 pslldq $8,%xmm4
987 pxor %xmm3,%xmm1
988 pxor %xmm4,%xmm0
990 movdqa %xmm0,%xmm3
991 psllq $1,%xmm0
992 pxor %xmm3,%xmm0
993 psllq $5,%xmm0
994 pxor %xmm3,%xmm0
995 psllq $57,%xmm0
996 movdqa %xmm0,%xmm4
997 pslldq $8,%xmm0
998 psrldq $8,%xmm4
999 pxor %xmm3,%xmm0
1000 pxor %xmm4,%xmm1
1003 movdqa %xmm0,%xmm4
1004 psrlq $5,%xmm0
1005 pxor %xmm4,%xmm0
1006 psrlq $1,%xmm0
1007 pxor %xmm4,%xmm0
1008 pxor %xmm1,%xmm4
1009 psrlq $1,%xmm0
1010 pxor %xmm4,%xmm0
1011 .Ldone:
1012 .byte 102,15,56,0,197
1013 movdqu %xmm0,(%rdi)
1014 .byte 0xf3,0xc3
1015 .LSEH_end_gcm_ghash_clmul:
1016 .size gcm_ghash_clmul,.-gcm_ghash_clmul
1017 .align 64
1018 .Lbswap_mask:
1019 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1020 .L0x1c2_polynomial:
1021 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1022 .align 64
1023 .type .Lrem_4bit,@object
1024 .Lrem_4bit:
1025 .long 0,0,0,471859200,0,943718400,0,610271232
1026 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1027 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1028 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1029 .type .Lrem_8bit,@object
1030 .Lrem_8bit:
1031 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1032 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1033 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1034 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1035 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1036 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1037 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1038 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1039 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1040 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1041 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1042 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1043 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1044 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1045 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1046 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1047 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1048 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1049 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1050 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1051 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1052 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1053 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1054 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1055 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1056 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1057 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1058 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1059 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1060 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1061 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1062 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1064 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1065 .align 64
1067 .section .note.GNU-stack,"",%progbits