libsodium: Needed for Dnscrypto-proxy Release 1.3.0
[tomato.git] / release / src / router / libsodium / src / libsodium / crypto_stream / salsa20 / amd64_xmm6 / stream_salsa20_amd64_xmm6.s
blob9fb04066ad1772fd14a1cd5f09ccd1d3540e894f
2 #if defined(__amd64) || defined(__amd64__) || defined(__x86_64__)
4 .text
5 .p2align 5
7 .globl _crypto_stream_salsa20
8 .globl crypto_stream_salsa20
9 _crypto_stream_salsa20:
10 crypto_stream_salsa20:
11 mov %rsp,%r11
12 and $31,%r11
13 add $480,%r11
14 sub %r11,%rsp
15 movq %r11,352(%rsp)
16 movq %r12,360(%rsp)
17 movq %r13,368(%rsp)
18 movq %r14,376(%rsp)
19 movq %r15,384(%rsp)
20 movq %rbx,392(%rsp)
21 movq %rbp,400(%rsp)
22 mov %rsi,%r9
23 mov %rdi,%rdi
24 mov %rdi,%rsi
25 mov %rdx,%rdx
26 mov %rcx,%r10
27 cmp $0,%r9
28 jbe ._done
30 mov $0,%rax
31 mov %r9,%rcx
32 rep stosb
33 sub %r9,%rdi
34 jmp ._start
36 .text
37 .p2align 5
39 .globl _crypto_stream_salsa20_xor
40 .globl crypto_stream_salsa20_xor
41 _crypto_stream_salsa20_xor:
42 crypto_stream_salsa20_xor:
43 mov %rsp,%r11
44 and $31,%r11
45 add $480,%r11
46 sub %r11,%rsp
47 movq %r11,352(%rsp)
48 movq %r12,360(%rsp)
49 movq %r13,368(%rsp)
50 movq %r14,376(%rsp)
51 movq %r15,384(%rsp)
52 movq %rbx,392(%rsp)
53 movq %rbp,400(%rsp)
54 mov %rdi,%rdi
55 mov %rsi,%rsi
56 mov %rdx,%r9
57 mov %rcx,%rdx
58 mov %r8,%r10
59 cmp $0,%r9
60 jbe ._done
62 ._start:
63 movl 20(%r10),%ecx
64 movl 0(%r10),%r8d
65 movl 0(%rdx),%eax
66 movl 16(%r10),%r11d
67 movl %ecx,0(%rsp)
68 movl %r8d,4+0(%rsp)
69 movl %eax,8+0(%rsp)
70 movl %r11d,12+0(%rsp)
71 mov $0,%rcx
72 movl 24(%r10),%r8d
73 movl 4(%r10),%eax
74 movl 4(%rdx),%edx
75 movl %ecx,16(%rsp)
76 movl %r8d,4+16(%rsp)
77 movl %eax,8+16(%rsp)
78 movl %edx,12+16(%rsp)
79 movl 12(%r10),%edx
80 mov $0,%rcx
81 movl 28(%r10),%r8d
82 movl 8(%r10),%eax
83 movl %edx,32(%rsp)
84 movl %ecx,4+32(%rsp)
85 movl %r8d,8+32(%rsp)
86 movl %eax,12+32(%rsp)
87 mov $1634760805,%rdx
88 mov $857760878,%rcx
89 mov $2036477234,%r8
90 mov $1797285236,%rax
91 movl %edx,48(%rsp)
92 movl %ecx,4+48(%rsp)
93 movl %r8d,8+48(%rsp)
94 movl %eax,12+48(%rsp)
95 cmp $256,%r9
96 jb ._bytesbetween1and255
98 movdqa 48(%rsp),%xmm0
99 pshufd $0x55,%xmm0,%xmm1
100 pshufd $0xaa,%xmm0,%xmm2
101 pshufd $0xff,%xmm0,%xmm3
102 pshufd $0x00,%xmm0,%xmm0
103 movdqa %xmm1,64(%rsp)
104 movdqa %xmm2,80(%rsp)
105 movdqa %xmm3,96(%rsp)
106 movdqa %xmm0,112(%rsp)
107 movdqa 0(%rsp),%xmm0
108 pshufd $0xaa,%xmm0,%xmm1
109 pshufd $0xff,%xmm0,%xmm2
110 pshufd $0x00,%xmm0,%xmm3
111 pshufd $0x55,%xmm0,%xmm0
112 movdqa %xmm1,128(%rsp)
113 movdqa %xmm2,144(%rsp)
114 movdqa %xmm3,160(%rsp)
115 movdqa %xmm0,176(%rsp)
116 movdqa 16(%rsp),%xmm0
117 pshufd $0xff,%xmm0,%xmm1
118 pshufd $0x55,%xmm0,%xmm2
119 pshufd $0xaa,%xmm0,%xmm0
120 movdqa %xmm1,192(%rsp)
121 movdqa %xmm2,208(%rsp)
122 movdqa %xmm0,224(%rsp)
123 movdqa 32(%rsp),%xmm0
124 pshufd $0x00,%xmm0,%xmm1
125 pshufd $0xaa,%xmm0,%xmm2
126 pshufd $0xff,%xmm0,%xmm0
127 movdqa %xmm1,240(%rsp)
128 movdqa %xmm2,256(%rsp)
129 movdqa %xmm0,272(%rsp)
131 ._bytesatleast256:
132 movl 16(%rsp),%edx
133 movl 4+32(%rsp),%ecx
134 movl %edx,288(%rsp)
135 movl %ecx,304(%rsp)
136 add $1,%rdx
137 shl $32,%rcx
138 add %rcx,%rdx
139 mov %rdx,%rcx
140 shr $32,%rcx
141 movl %edx,4+288(%rsp)
142 movl %ecx,4+304(%rsp)
143 add $1,%rdx
144 shl $32,%rcx
145 add %rcx,%rdx
146 mov %rdx,%rcx
147 shr $32,%rcx
148 movl %edx,8+288(%rsp)
149 movl %ecx,8+304(%rsp)
150 add $1,%rdx
151 shl $32,%rcx
152 add %rcx,%rdx
153 mov %rdx,%rcx
154 shr $32,%rcx
155 movl %edx,12+288(%rsp)
156 movl %ecx,12+304(%rsp)
157 add $1,%rdx
158 shl $32,%rcx
159 add %rcx,%rdx
160 mov %rdx,%rcx
161 shr $32,%rcx
162 movl %edx,16(%rsp)
163 movl %ecx,4+32(%rsp)
164 movq %r9,408(%rsp)
165 mov $20,%rdx
166 movdqa 64(%rsp),%xmm0
167 movdqa 80(%rsp),%xmm1
168 movdqa 96(%rsp),%xmm2
169 movdqa 256(%rsp),%xmm3
170 movdqa 272(%rsp),%xmm4
171 movdqa 128(%rsp),%xmm5
172 movdqa 144(%rsp),%xmm6
173 movdqa 176(%rsp),%xmm7
174 movdqa 192(%rsp),%xmm8
175 movdqa 208(%rsp),%xmm9
176 movdqa 224(%rsp),%xmm10
177 movdqa 304(%rsp),%xmm11
178 movdqa 112(%rsp),%xmm12
179 movdqa 160(%rsp),%xmm13
180 movdqa 240(%rsp),%xmm14
181 movdqa 288(%rsp),%xmm15
183 ._mainloop1:
184 movdqa %xmm1,320(%rsp)
185 movdqa %xmm2,336(%rsp)
186 movdqa %xmm13,%xmm1
187 paddd %xmm12,%xmm1
188 movdqa %xmm1,%xmm2
189 pslld $7,%xmm1
190 pxor %xmm1,%xmm14
191 psrld $25,%xmm2
192 pxor %xmm2,%xmm14
193 movdqa %xmm7,%xmm1
194 paddd %xmm0,%xmm1
195 movdqa %xmm1,%xmm2
196 pslld $7,%xmm1
197 pxor %xmm1,%xmm11
198 psrld $25,%xmm2
199 pxor %xmm2,%xmm11
200 movdqa %xmm12,%xmm1
201 paddd %xmm14,%xmm1
202 movdqa %xmm1,%xmm2
203 pslld $9,%xmm1
204 pxor %xmm1,%xmm15
205 psrld $23,%xmm2
206 pxor %xmm2,%xmm15
207 movdqa %xmm0,%xmm1
208 paddd %xmm11,%xmm1
209 movdqa %xmm1,%xmm2
210 pslld $9,%xmm1
211 pxor %xmm1,%xmm9
212 psrld $23,%xmm2
213 pxor %xmm2,%xmm9
214 movdqa %xmm14,%xmm1
215 paddd %xmm15,%xmm1
216 movdqa %xmm1,%xmm2
217 pslld $13,%xmm1
218 pxor %xmm1,%xmm13
219 psrld $19,%xmm2
220 pxor %xmm2,%xmm13
221 movdqa %xmm11,%xmm1
222 paddd %xmm9,%xmm1
223 movdqa %xmm1,%xmm2
224 pslld $13,%xmm1
225 pxor %xmm1,%xmm7
226 psrld $19,%xmm2
227 pxor %xmm2,%xmm7
228 movdqa %xmm15,%xmm1
229 paddd %xmm13,%xmm1
230 movdqa %xmm1,%xmm2
231 pslld $18,%xmm1
232 pxor %xmm1,%xmm12
233 psrld $14,%xmm2
234 pxor %xmm2,%xmm12
235 movdqa 320(%rsp),%xmm1
236 movdqa %xmm12,320(%rsp)
237 movdqa %xmm9,%xmm2
238 paddd %xmm7,%xmm2
239 movdqa %xmm2,%xmm12
240 pslld $18,%xmm2
241 pxor %xmm2,%xmm0
242 psrld $14,%xmm12
243 pxor %xmm12,%xmm0
244 movdqa %xmm5,%xmm2
245 paddd %xmm1,%xmm2
246 movdqa %xmm2,%xmm12
247 pslld $7,%xmm2
248 pxor %xmm2,%xmm3
249 psrld $25,%xmm12
250 pxor %xmm12,%xmm3
251 movdqa 336(%rsp),%xmm2
252 movdqa %xmm0,336(%rsp)
253 movdqa %xmm6,%xmm0
254 paddd %xmm2,%xmm0
255 movdqa %xmm0,%xmm12
256 pslld $7,%xmm0
257 pxor %xmm0,%xmm4
258 psrld $25,%xmm12
259 pxor %xmm12,%xmm4
260 movdqa %xmm1,%xmm0
261 paddd %xmm3,%xmm0
262 movdqa %xmm0,%xmm12
263 pslld $9,%xmm0
264 pxor %xmm0,%xmm10
265 psrld $23,%xmm12
266 pxor %xmm12,%xmm10
267 movdqa %xmm2,%xmm0
268 paddd %xmm4,%xmm0
269 movdqa %xmm0,%xmm12
270 pslld $9,%xmm0
271 pxor %xmm0,%xmm8
272 psrld $23,%xmm12
273 pxor %xmm12,%xmm8
274 movdqa %xmm3,%xmm0
275 paddd %xmm10,%xmm0
276 movdqa %xmm0,%xmm12
277 pslld $13,%xmm0
278 pxor %xmm0,%xmm5
279 psrld $19,%xmm12
280 pxor %xmm12,%xmm5
281 movdqa %xmm4,%xmm0
282 paddd %xmm8,%xmm0
283 movdqa %xmm0,%xmm12
284 pslld $13,%xmm0
285 pxor %xmm0,%xmm6
286 psrld $19,%xmm12
287 pxor %xmm12,%xmm6
288 movdqa %xmm10,%xmm0
289 paddd %xmm5,%xmm0
290 movdqa %xmm0,%xmm12
291 pslld $18,%xmm0
292 pxor %xmm0,%xmm1
293 psrld $14,%xmm12
294 pxor %xmm12,%xmm1
295 movdqa 320(%rsp),%xmm0
296 movdqa %xmm1,320(%rsp)
297 movdqa %xmm4,%xmm1
298 paddd %xmm0,%xmm1
299 movdqa %xmm1,%xmm12
300 pslld $7,%xmm1
301 pxor %xmm1,%xmm7
302 psrld $25,%xmm12
303 pxor %xmm12,%xmm7
304 movdqa %xmm8,%xmm1
305 paddd %xmm6,%xmm1
306 movdqa %xmm1,%xmm12
307 pslld $18,%xmm1
308 pxor %xmm1,%xmm2
309 psrld $14,%xmm12
310 pxor %xmm12,%xmm2
311 movdqa 336(%rsp),%xmm12
312 movdqa %xmm2,336(%rsp)
313 movdqa %xmm14,%xmm1
314 paddd %xmm12,%xmm1
315 movdqa %xmm1,%xmm2
316 pslld $7,%xmm1
317 pxor %xmm1,%xmm5
318 psrld $25,%xmm2
319 pxor %xmm2,%xmm5
320 movdqa %xmm0,%xmm1
321 paddd %xmm7,%xmm1
322 movdqa %xmm1,%xmm2
323 pslld $9,%xmm1
324 pxor %xmm1,%xmm10
325 psrld $23,%xmm2
326 pxor %xmm2,%xmm10
327 movdqa %xmm12,%xmm1
328 paddd %xmm5,%xmm1
329 movdqa %xmm1,%xmm2
330 pslld $9,%xmm1
331 pxor %xmm1,%xmm8
332 psrld $23,%xmm2
333 pxor %xmm2,%xmm8
334 movdqa %xmm7,%xmm1
335 paddd %xmm10,%xmm1
336 movdqa %xmm1,%xmm2
337 pslld $13,%xmm1
338 pxor %xmm1,%xmm4
339 psrld $19,%xmm2
340 pxor %xmm2,%xmm4
341 movdqa %xmm5,%xmm1
342 paddd %xmm8,%xmm1
343 movdqa %xmm1,%xmm2
344 pslld $13,%xmm1
345 pxor %xmm1,%xmm14
346 psrld $19,%xmm2
347 pxor %xmm2,%xmm14
348 movdqa %xmm10,%xmm1
349 paddd %xmm4,%xmm1
350 movdqa %xmm1,%xmm2
351 pslld $18,%xmm1
352 pxor %xmm1,%xmm0
353 psrld $14,%xmm2
354 pxor %xmm2,%xmm0
355 movdqa 320(%rsp),%xmm1
356 movdqa %xmm0,320(%rsp)
357 movdqa %xmm8,%xmm0
358 paddd %xmm14,%xmm0
359 movdqa %xmm0,%xmm2
360 pslld $18,%xmm0
361 pxor %xmm0,%xmm12
362 psrld $14,%xmm2
363 pxor %xmm2,%xmm12
364 movdqa %xmm11,%xmm0
365 paddd %xmm1,%xmm0
366 movdqa %xmm0,%xmm2
367 pslld $7,%xmm0
368 pxor %xmm0,%xmm6
369 psrld $25,%xmm2
370 pxor %xmm2,%xmm6
371 movdqa 336(%rsp),%xmm2
372 movdqa %xmm12,336(%rsp)
373 movdqa %xmm3,%xmm0
374 paddd %xmm2,%xmm0
375 movdqa %xmm0,%xmm12
376 pslld $7,%xmm0
377 pxor %xmm0,%xmm13
378 psrld $25,%xmm12
379 pxor %xmm12,%xmm13
380 movdqa %xmm1,%xmm0
381 paddd %xmm6,%xmm0
382 movdqa %xmm0,%xmm12
383 pslld $9,%xmm0
384 pxor %xmm0,%xmm15
385 psrld $23,%xmm12
386 pxor %xmm12,%xmm15
387 movdqa %xmm2,%xmm0
388 paddd %xmm13,%xmm0
389 movdqa %xmm0,%xmm12
390 pslld $9,%xmm0
391 pxor %xmm0,%xmm9
392 psrld $23,%xmm12
393 pxor %xmm12,%xmm9
394 movdqa %xmm6,%xmm0
395 paddd %xmm15,%xmm0
396 movdqa %xmm0,%xmm12
397 pslld $13,%xmm0
398 pxor %xmm0,%xmm11
399 psrld $19,%xmm12
400 pxor %xmm12,%xmm11
401 movdqa %xmm13,%xmm0
402 paddd %xmm9,%xmm0
403 movdqa %xmm0,%xmm12
404 pslld $13,%xmm0
405 pxor %xmm0,%xmm3
406 psrld $19,%xmm12
407 pxor %xmm12,%xmm3
408 movdqa %xmm15,%xmm0
409 paddd %xmm11,%xmm0
410 movdqa %xmm0,%xmm12
411 pslld $18,%xmm0
412 pxor %xmm0,%xmm1
413 psrld $14,%xmm12
414 pxor %xmm12,%xmm1
415 movdqa %xmm9,%xmm0
416 paddd %xmm3,%xmm0
417 movdqa %xmm0,%xmm12
418 pslld $18,%xmm0
419 pxor %xmm0,%xmm2
420 psrld $14,%xmm12
421 pxor %xmm12,%xmm2
422 movdqa 320(%rsp),%xmm12
423 movdqa 336(%rsp),%xmm0
424 sub $2,%rdx
425 ja ._mainloop1
427 paddd 112(%rsp),%xmm12
428 paddd 176(%rsp),%xmm7
429 paddd 224(%rsp),%xmm10
430 paddd 272(%rsp),%xmm4
431 movd %xmm12,%rdx
432 movd %xmm7,%rcx
433 movd %xmm10,%r8
434 movd %xmm4,%r9
435 pshufd $0x39,%xmm12,%xmm12
436 pshufd $0x39,%xmm7,%xmm7
437 pshufd $0x39,%xmm10,%xmm10
438 pshufd $0x39,%xmm4,%xmm4
439 xorl 0(%rsi),%edx
440 xorl 4(%rsi),%ecx
441 xorl 8(%rsi),%r8d
442 xorl 12(%rsi),%r9d
443 movl %edx,0(%rdi)
444 movl %ecx,4(%rdi)
445 movl %r8d,8(%rdi)
446 movl %r9d,12(%rdi)
447 movd %xmm12,%rdx
448 movd %xmm7,%rcx
449 movd %xmm10,%r8
450 movd %xmm4,%r9
451 pshufd $0x39,%xmm12,%xmm12
452 pshufd $0x39,%xmm7,%xmm7
453 pshufd $0x39,%xmm10,%xmm10
454 pshufd $0x39,%xmm4,%xmm4
455 xorl 64(%rsi),%edx
456 xorl 68(%rsi),%ecx
457 xorl 72(%rsi),%r8d
458 xorl 76(%rsi),%r9d
459 movl %edx,64(%rdi)
460 movl %ecx,68(%rdi)
461 movl %r8d,72(%rdi)
462 movl %r9d,76(%rdi)
463 movd %xmm12,%rdx
464 movd %xmm7,%rcx
465 movd %xmm10,%r8
466 movd %xmm4,%r9
467 pshufd $0x39,%xmm12,%xmm12
468 pshufd $0x39,%xmm7,%xmm7
469 pshufd $0x39,%xmm10,%xmm10
470 pshufd $0x39,%xmm4,%xmm4
471 xorl 128(%rsi),%edx
472 xorl 132(%rsi),%ecx
473 xorl 136(%rsi),%r8d
474 xorl 140(%rsi),%r9d
475 movl %edx,128(%rdi)
476 movl %ecx,132(%rdi)
477 movl %r8d,136(%rdi)
478 movl %r9d,140(%rdi)
479 movd %xmm12,%rdx
480 movd %xmm7,%rcx
481 movd %xmm10,%r8
482 movd %xmm4,%r9
483 xorl 192(%rsi),%edx
484 xorl 196(%rsi),%ecx
485 xorl 200(%rsi),%r8d
486 xorl 204(%rsi),%r9d
487 movl %edx,192(%rdi)
488 movl %ecx,196(%rdi)
489 movl %r8d,200(%rdi)
490 movl %r9d,204(%rdi)
491 paddd 240(%rsp),%xmm14
492 paddd 64(%rsp),%xmm0
493 paddd 128(%rsp),%xmm5
494 paddd 192(%rsp),%xmm8
495 movd %xmm14,%rdx
496 movd %xmm0,%rcx
497 movd %xmm5,%r8
498 movd %xmm8,%r9
499 pshufd $0x39,%xmm14,%xmm14
500 pshufd $0x39,%xmm0,%xmm0
501 pshufd $0x39,%xmm5,%xmm5
502 pshufd $0x39,%xmm8,%xmm8
503 xorl 16(%rsi),%edx
504 xorl 20(%rsi),%ecx
505 xorl 24(%rsi),%r8d
506 xorl 28(%rsi),%r9d
507 movl %edx,16(%rdi)
508 movl %ecx,20(%rdi)
509 movl %r8d,24(%rdi)
510 movl %r9d,28(%rdi)
511 movd %xmm14,%rdx
512 movd %xmm0,%rcx
513 movd %xmm5,%r8
514 movd %xmm8,%r9
515 pshufd $0x39,%xmm14,%xmm14
516 pshufd $0x39,%xmm0,%xmm0
517 pshufd $0x39,%xmm5,%xmm5
518 pshufd $0x39,%xmm8,%xmm8
519 xorl 80(%rsi),%edx
520 xorl 84(%rsi),%ecx
521 xorl 88(%rsi),%r8d
522 xorl 92(%rsi),%r9d
523 movl %edx,80(%rdi)
524 movl %ecx,84(%rdi)
525 movl %r8d,88(%rdi)
526 movl %r9d,92(%rdi)
527 movd %xmm14,%rdx
528 movd %xmm0,%rcx
529 movd %xmm5,%r8
530 movd %xmm8,%r9
531 pshufd $0x39,%xmm14,%xmm14
532 pshufd $0x39,%xmm0,%xmm0
533 pshufd $0x39,%xmm5,%xmm5
534 pshufd $0x39,%xmm8,%xmm8
535 xorl 144(%rsi),%edx
536 xorl 148(%rsi),%ecx
537 xorl 152(%rsi),%r8d
538 xorl 156(%rsi),%r9d
539 movl %edx,144(%rdi)
540 movl %ecx,148(%rdi)
541 movl %r8d,152(%rdi)
542 movl %r9d,156(%rdi)
543 movd %xmm14,%rdx
544 movd %xmm0,%rcx
545 movd %xmm5,%r8
546 movd %xmm8,%r9
547 xorl 208(%rsi),%edx
548 xorl 212(%rsi),%ecx
549 xorl 216(%rsi),%r8d
550 xorl 220(%rsi),%r9d
551 movl %edx,208(%rdi)
552 movl %ecx,212(%rdi)
553 movl %r8d,216(%rdi)
554 movl %r9d,220(%rdi)
555 paddd 288(%rsp),%xmm15
556 paddd 304(%rsp),%xmm11
557 paddd 80(%rsp),%xmm1
558 paddd 144(%rsp),%xmm6
559 movd %xmm15,%rdx
560 movd %xmm11,%rcx
561 movd %xmm1,%r8
562 movd %xmm6,%r9
563 pshufd $0x39,%xmm15,%xmm15
564 pshufd $0x39,%xmm11,%xmm11
565 pshufd $0x39,%xmm1,%xmm1
566 pshufd $0x39,%xmm6,%xmm6
567 xorl 32(%rsi),%edx
568 xorl 36(%rsi),%ecx
569 xorl 40(%rsi),%r8d
570 xorl 44(%rsi),%r9d
571 movl %edx,32(%rdi)
572 movl %ecx,36(%rdi)
573 movl %r8d,40(%rdi)
574 movl %r9d,44(%rdi)
575 movd %xmm15,%rdx
576 movd %xmm11,%rcx
577 movd %xmm1,%r8
578 movd %xmm6,%r9
579 pshufd $0x39,%xmm15,%xmm15
580 pshufd $0x39,%xmm11,%xmm11
581 pshufd $0x39,%xmm1,%xmm1
582 pshufd $0x39,%xmm6,%xmm6
583 xorl 96(%rsi),%edx
584 xorl 100(%rsi),%ecx
585 xorl 104(%rsi),%r8d
586 xorl 108(%rsi),%r9d
587 movl %edx,96(%rdi)
588 movl %ecx,100(%rdi)
589 movl %r8d,104(%rdi)
590 movl %r9d,108(%rdi)
591 movd %xmm15,%rdx
592 movd %xmm11,%rcx
593 movd %xmm1,%r8
594 movd %xmm6,%r9
595 pshufd $0x39,%xmm15,%xmm15
596 pshufd $0x39,%xmm11,%xmm11
597 pshufd $0x39,%xmm1,%xmm1
598 pshufd $0x39,%xmm6,%xmm6
599 xorl 160(%rsi),%edx
600 xorl 164(%rsi),%ecx
601 xorl 168(%rsi),%r8d
602 xorl 172(%rsi),%r9d
603 movl %edx,160(%rdi)
604 movl %ecx,164(%rdi)
605 movl %r8d,168(%rdi)
606 movl %r9d,172(%rdi)
607 movd %xmm15,%rdx
608 movd %xmm11,%rcx
609 movd %xmm1,%r8
610 movd %xmm6,%r9
611 xorl 224(%rsi),%edx
612 xorl 228(%rsi),%ecx
613 xorl 232(%rsi),%r8d
614 xorl 236(%rsi),%r9d
615 movl %edx,224(%rdi)
616 movl %ecx,228(%rdi)
617 movl %r8d,232(%rdi)
618 movl %r9d,236(%rdi)
619 paddd 160(%rsp),%xmm13
620 paddd 208(%rsp),%xmm9
621 paddd 256(%rsp),%xmm3
622 paddd 96(%rsp),%xmm2
623 movd %xmm13,%rdx
624 movd %xmm9,%rcx
625 movd %xmm3,%r8
626 movd %xmm2,%r9
627 pshufd $0x39,%xmm13,%xmm13
628 pshufd $0x39,%xmm9,%xmm9
629 pshufd $0x39,%xmm3,%xmm3
630 pshufd $0x39,%xmm2,%xmm2
631 xorl 48(%rsi),%edx
632 xorl 52(%rsi),%ecx
633 xorl 56(%rsi),%r8d
634 xorl 60(%rsi),%r9d
635 movl %edx,48(%rdi)
636 movl %ecx,52(%rdi)
637 movl %r8d,56(%rdi)
638 movl %r9d,60(%rdi)
639 movd %xmm13,%rdx
640 movd %xmm9,%rcx
641 movd %xmm3,%r8
642 movd %xmm2,%r9
643 pshufd $0x39,%xmm13,%xmm13
644 pshufd $0x39,%xmm9,%xmm9
645 pshufd $0x39,%xmm3,%xmm3
646 pshufd $0x39,%xmm2,%xmm2
647 xorl 112(%rsi),%edx
648 xorl 116(%rsi),%ecx
649 xorl 120(%rsi),%r8d
650 xorl 124(%rsi),%r9d
651 movl %edx,112(%rdi)
652 movl %ecx,116(%rdi)
653 movl %r8d,120(%rdi)
654 movl %r9d,124(%rdi)
655 movd %xmm13,%rdx
656 movd %xmm9,%rcx
657 movd %xmm3,%r8
658 movd %xmm2,%r9
659 pshufd $0x39,%xmm13,%xmm13
660 pshufd $0x39,%xmm9,%xmm9
661 pshufd $0x39,%xmm3,%xmm3
662 pshufd $0x39,%xmm2,%xmm2
663 xorl 176(%rsi),%edx
664 xorl 180(%rsi),%ecx
665 xorl 184(%rsi),%r8d
666 xorl 188(%rsi),%r9d
667 movl %edx,176(%rdi)
668 movl %ecx,180(%rdi)
669 movl %r8d,184(%rdi)
670 movl %r9d,188(%rdi)
671 movd %xmm13,%rdx
672 movd %xmm9,%rcx
673 movd %xmm3,%r8
674 movd %xmm2,%r9
675 xorl 240(%rsi),%edx
676 xorl 244(%rsi),%ecx
677 xorl 248(%rsi),%r8d
678 xorl 252(%rsi),%r9d
679 movl %edx,240(%rdi)
680 movl %ecx,244(%rdi)
681 movl %r8d,248(%rdi)
682 movl %r9d,252(%rdi)
683 movq 408(%rsp),%r9
684 sub $256,%r9
685 add $256,%rsi
686 add $256,%rdi
687 cmp $256,%r9
688 jae ._bytesatleast256
690 cmp $0,%r9
691 jbe ._done
693 ._bytesbetween1and255:
694 cmp $64,%r9
695 jae ._nocopy
697 mov %rdi,%rdx
698 leaq 416(%rsp),%rdi
699 mov %r9,%rcx
700 rep movsb
701 leaq 416(%rsp),%rdi
702 leaq 416(%rsp),%rsi
704 ._nocopy:
705 movq %r9,408(%rsp)
706 movdqa 48(%rsp),%xmm0
707 movdqa 0(%rsp),%xmm1
708 movdqa 16(%rsp),%xmm2
709 movdqa 32(%rsp),%xmm3
710 movdqa %xmm1,%xmm4
711 mov $20,%rcx
713 ._mainloop2:
714 paddd %xmm0,%xmm4
715 movdqa %xmm0,%xmm5
716 movdqa %xmm4,%xmm6
717 pslld $7,%xmm4
718 psrld $25,%xmm6
719 pxor %xmm4,%xmm3
720 pxor %xmm6,%xmm3
721 paddd %xmm3,%xmm5
722 movdqa %xmm3,%xmm4
723 movdqa %xmm5,%xmm6
724 pslld $9,%xmm5
725 psrld $23,%xmm6
726 pxor %xmm5,%xmm2
727 pshufd $0x93,%xmm3,%xmm3
728 pxor %xmm6,%xmm2
729 paddd %xmm2,%xmm4
730 movdqa %xmm2,%xmm5
731 movdqa %xmm4,%xmm6
732 pslld $13,%xmm4
733 psrld $19,%xmm6
734 pxor %xmm4,%xmm1
735 pshufd $0x4e,%xmm2,%xmm2
736 pxor %xmm6,%xmm1
737 paddd %xmm1,%xmm5
738 movdqa %xmm3,%xmm4
739 movdqa %xmm5,%xmm6
740 pslld $18,%xmm5
741 psrld $14,%xmm6
742 pxor %xmm5,%xmm0
743 pshufd $0x39,%xmm1,%xmm1
744 pxor %xmm6,%xmm0
745 paddd %xmm0,%xmm4
746 movdqa %xmm0,%xmm5
747 movdqa %xmm4,%xmm6
748 pslld $7,%xmm4
749 psrld $25,%xmm6
750 pxor %xmm4,%xmm1
751 pxor %xmm6,%xmm1
752 paddd %xmm1,%xmm5
753 movdqa %xmm1,%xmm4
754 movdqa %xmm5,%xmm6
755 pslld $9,%xmm5
756 psrld $23,%xmm6
757 pxor %xmm5,%xmm2
758 pshufd $0x93,%xmm1,%xmm1
759 pxor %xmm6,%xmm2
760 paddd %xmm2,%xmm4
761 movdqa %xmm2,%xmm5
762 movdqa %xmm4,%xmm6
763 pslld $13,%xmm4
764 psrld $19,%xmm6
765 pxor %xmm4,%xmm3
766 pshufd $0x4e,%xmm2,%xmm2
767 pxor %xmm6,%xmm3
768 paddd %xmm3,%xmm5
769 movdqa %xmm1,%xmm4
770 movdqa %xmm5,%xmm6
771 pslld $18,%xmm5
772 psrld $14,%xmm6
773 pxor %xmm5,%xmm0
774 pshufd $0x39,%xmm3,%xmm3
775 pxor %xmm6,%xmm0
776 paddd %xmm0,%xmm4
777 movdqa %xmm0,%xmm5
778 movdqa %xmm4,%xmm6
779 pslld $7,%xmm4
780 psrld $25,%xmm6
781 pxor %xmm4,%xmm3
782 pxor %xmm6,%xmm3
783 paddd %xmm3,%xmm5
784 movdqa %xmm3,%xmm4
785 movdqa %xmm5,%xmm6
786 pslld $9,%xmm5
787 psrld $23,%xmm6
788 pxor %xmm5,%xmm2
789 pshufd $0x93,%xmm3,%xmm3
790 pxor %xmm6,%xmm2
791 paddd %xmm2,%xmm4
792 movdqa %xmm2,%xmm5
793 movdqa %xmm4,%xmm6
794 pslld $13,%xmm4
795 psrld $19,%xmm6
796 pxor %xmm4,%xmm1
797 pshufd $0x4e,%xmm2,%xmm2
798 pxor %xmm6,%xmm1
799 paddd %xmm1,%xmm5
800 movdqa %xmm3,%xmm4
801 movdqa %xmm5,%xmm6
802 pslld $18,%xmm5
803 psrld $14,%xmm6
804 pxor %xmm5,%xmm0
805 pshufd $0x39,%xmm1,%xmm1
806 pxor %xmm6,%xmm0
807 paddd %xmm0,%xmm4
808 movdqa %xmm0,%xmm5
809 movdqa %xmm4,%xmm6
810 pslld $7,%xmm4
811 psrld $25,%xmm6
812 pxor %xmm4,%xmm1
813 pxor %xmm6,%xmm1
814 paddd %xmm1,%xmm5
815 movdqa %xmm1,%xmm4
816 movdqa %xmm5,%xmm6
817 pslld $9,%xmm5
818 psrld $23,%xmm6
819 pxor %xmm5,%xmm2
820 pshufd $0x93,%xmm1,%xmm1
821 pxor %xmm6,%xmm2
822 paddd %xmm2,%xmm4
823 movdqa %xmm2,%xmm5
824 movdqa %xmm4,%xmm6
825 pslld $13,%xmm4
826 psrld $19,%xmm6
827 pxor %xmm4,%xmm3
828 pshufd $0x4e,%xmm2,%xmm2
829 pxor %xmm6,%xmm3
830 sub $4,%rcx
831 paddd %xmm3,%xmm5
832 movdqa %xmm1,%xmm4
833 movdqa %xmm5,%xmm6
834 pslld $18,%xmm5
835 pxor %xmm7,%xmm7
836 psrld $14,%xmm6
837 pxor %xmm5,%xmm0
838 pshufd $0x39,%xmm3,%xmm3
839 pxor %xmm6,%xmm0
840 ja ._mainloop2
842 paddd 48(%rsp),%xmm0
843 paddd 0(%rsp),%xmm1
844 paddd 16(%rsp),%xmm2
845 paddd 32(%rsp),%xmm3
846 movd %xmm0,%rcx
847 movd %xmm1,%r8
848 movd %xmm2,%r9
849 movd %xmm3,%rax
850 pshufd $0x39,%xmm0,%xmm0
851 pshufd $0x39,%xmm1,%xmm1
852 pshufd $0x39,%xmm2,%xmm2
853 pshufd $0x39,%xmm3,%xmm3
854 xorl 0(%rsi),%ecx
855 xorl 48(%rsi),%r8d
856 xorl 32(%rsi),%r9d
857 xorl 16(%rsi),%eax
858 movl %ecx,0(%rdi)
859 movl %r8d,48(%rdi)
860 movl %r9d,32(%rdi)
861 movl %eax,16(%rdi)
862 movd %xmm0,%rcx
863 movd %xmm1,%r8
864 movd %xmm2,%r9
865 movd %xmm3,%rax
866 pshufd $0x39,%xmm0,%xmm0
867 pshufd $0x39,%xmm1,%xmm1
868 pshufd $0x39,%xmm2,%xmm2
869 pshufd $0x39,%xmm3,%xmm3
870 xorl 20(%rsi),%ecx
871 xorl 4(%rsi),%r8d
872 xorl 52(%rsi),%r9d
873 xorl 36(%rsi),%eax
874 movl %ecx,20(%rdi)
875 movl %r8d,4(%rdi)
876 movl %r9d,52(%rdi)
877 movl %eax,36(%rdi)
878 movd %xmm0,%rcx
879 movd %xmm1,%r8
880 movd %xmm2,%r9
881 movd %xmm3,%rax
882 pshufd $0x39,%xmm0,%xmm0
883 pshufd $0x39,%xmm1,%xmm1
884 pshufd $0x39,%xmm2,%xmm2
885 pshufd $0x39,%xmm3,%xmm3
886 xorl 40(%rsi),%ecx
887 xorl 24(%rsi),%r8d
888 xorl 8(%rsi),%r9d
889 xorl 56(%rsi),%eax
890 movl %ecx,40(%rdi)
891 movl %r8d,24(%rdi)
892 movl %r9d,8(%rdi)
893 movl %eax,56(%rdi)
894 movd %xmm0,%rcx
895 movd %xmm1,%r8
896 movd %xmm2,%r9
897 movd %xmm3,%rax
898 xorl 60(%rsi),%ecx
899 xorl 44(%rsi),%r8d
900 xorl 28(%rsi),%r9d
901 xorl 12(%rsi),%eax
902 movl %ecx,60(%rdi)
903 movl %r8d,44(%rdi)
904 movl %r9d,28(%rdi)
905 movl %eax,12(%rdi)
906 movq 408(%rsp),%r9
907 movl 16(%rsp),%ecx
908 movl 4+32(%rsp),%r8d
909 add $1,%rcx
910 shl $32,%r8
911 add %r8,%rcx
912 mov %rcx,%r8
913 shr $32,%r8
914 movl %ecx,16(%rsp)
915 movl %r8d,4+32(%rsp)
916 cmp $64,%r9
918 ja ._bytesatleast65
920 jae ._bytesatleast64
922 mov %rdi,%rsi
923 mov %rdx,%rdi
924 mov %r9,%rcx
925 rep movsb
927 ._bytesatleast64:
928 ._done:
929 movq 352(%rsp),%r11
930 movq 360(%rsp),%r12
931 movq 368(%rsp),%r13
932 movq 376(%rsp),%r14
933 movq 384(%rsp),%r15
934 movq 392(%rsp),%rbx
935 movq 400(%rsp),%rbp
936 add %r11,%rsp
937 xor %rax,%rax
938 xor %rdx,%rdx
941 ._bytesatleast65:
942 sub $64,%r9
943 add $64,%rdi
944 add $64,%rsi
945 jmp ._bytesbetween1and255
947 #endif