Added assembly for macosx
[gnutls.git] / lib / accelerated / x86 / asm-macosx / padlock-x86-64-macosx.s
bloba19e824ad353ca0ba60e47083423b580b0cb178b
1 # Copyright (c) 2011, Andy Polyakov by <appro@openssl.org>
2 # All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # * Redistributions of source code must retain copyright notices,
9 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # * Neither the name of the Andy Polyakov nor the names of its
17 # copyright holder and contributors may be used to endorse or
18 # promote products derived from this software without specific
19 # prior written permission.
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 .text
39 .globl _padlock_capability
41 .p2align 4
42 _padlock_capability:
43 movq %rbx,%r8
44 xorl %eax,%eax
45 cpuid
46 xorl %eax,%eax
47 cmpl $1953391939,%ebx
48 jne L$noluck
49 cmpl $1215460705,%edx
50 jne L$noluck
51 cmpl $1936487777,%ecx
52 jne L$noluck
53 movl $3221225472,%eax
54 cpuid
55 movl %eax,%edx
56 xorl %eax,%eax
57 cmpl $3221225473,%edx
58 jb L$noluck
59 movl $3221225473,%eax
60 cpuid
61 movl %edx,%eax
62 andl $4294967279,%eax
63 orl $16,%eax
64 L$noluck:
65 movq %r8,%rbx
66 .byte 0xf3,0xc3
69 .globl _padlock_key_bswap
71 .p2align 4
72 _padlock_key_bswap:
73 movl 240(%rdi),%edx
74 L$bswap_loop:
75 movl (%rdi),%eax
76 bswapl %eax
77 movl %eax,(%rdi)
78 leaq 4(%rdi),%rdi
79 subl $1,%edx
80 jnz L$bswap_loop
81 .byte 0xf3,0xc3
84 .globl _padlock_verify_context
86 .p2align 4
87 _padlock_verify_context:
88 movq %rdi,%rdx
89 pushf
90 leaq L$padlock_saved_context(%rip),%rax
91 call _padlock_verify_ctx
92 leaq 8(%rsp),%rsp
93 .byte 0xf3,0xc3
97 .p2align 4
98 _padlock_verify_ctx:
99 movq 8(%rsp),%r8
100 btq $30,%r8
101 jnc L$verified
102 cmpq (%rax),%rdx
103 je L$verified
104 pushf
105 popf
106 L$verified:
107 movq %rdx,(%rax)
108 .byte 0xf3,0xc3
111 .globl _padlock_reload_key
113 .p2align 4
114 _padlock_reload_key:
115 pushf
116 popf
117 .byte 0xf3,0xc3
120 .globl _padlock_aes_block
122 .p2align 4
123 _padlock_aes_block:
124 movq %rbx,%r8
125 movq $1,%rcx
126 leaq 32(%rdx),%rbx
127 leaq 16(%rdx),%rdx
128 .byte 0xf3,0x0f,0xa7,0xc8
129 movq %r8,%rbx
130 .byte 0xf3,0xc3
133 .globl _padlock_xstore
135 .p2align 4
136 _padlock_xstore:
137 movl %esi,%edx
138 .byte 0x0f,0xa7,0xc0
139 .byte 0xf3,0xc3
142 .globl _padlock_sha1_oneshot
144 .p2align 4
145 _padlock_sha1_oneshot:
146 movq %rdx,%rcx
147 movq %rdi,%rdx
148 movups (%rdi),%xmm0
149 subq $128+8,%rsp
150 movl 16(%rdi),%eax
151 movaps %xmm0,(%rsp)
152 movq %rsp,%rdi
153 movl %eax,16(%rsp)
154 xorq %rax,%rax
155 .byte 0xf3,0x0f,0xa6,0xc8
156 movaps (%rsp),%xmm0
157 movl 16(%rsp),%eax
158 addq $128+8,%rsp
159 movups %xmm0,(%rdx)
160 movl %eax,16(%rdx)
161 .byte 0xf3,0xc3
164 .globl _padlock_sha1_blocks
166 .p2align 4
167 _padlock_sha1_blocks:
168 movq %rdx,%rcx
169 movq %rdi,%rdx
170 movups (%rdi),%xmm0
171 subq $128+8,%rsp
172 movl 16(%rdi),%eax
173 movaps %xmm0,(%rsp)
174 movq %rsp,%rdi
175 movl %eax,16(%rsp)
176 movq $-1,%rax
177 .byte 0xf3,0x0f,0xa6,0xc8
178 movaps (%rsp),%xmm0
179 movl 16(%rsp),%eax
180 addq $128+8,%rsp
181 movups %xmm0,(%rdx)
182 movl %eax,16(%rdx)
183 .byte 0xf3,0xc3
186 .globl _padlock_sha256_oneshot
188 .p2align 4
189 _padlock_sha256_oneshot:
190 movq %rdx,%rcx
191 movq %rdi,%rdx
192 movups (%rdi),%xmm0
193 subq $128+8,%rsp
194 movups 16(%rdi),%xmm1
195 movaps %xmm0,(%rsp)
196 movq %rsp,%rdi
197 movaps %xmm1,16(%rsp)
198 xorq %rax,%rax
199 .byte 0xf3,0x0f,0xa6,0xd0
200 movaps (%rsp),%xmm0
201 movaps 16(%rsp),%xmm1
202 addq $128+8,%rsp
203 movups %xmm0,(%rdx)
204 movups %xmm1,16(%rdx)
205 .byte 0xf3,0xc3
208 .globl _padlock_sha256_blocks
210 .p2align 4
211 _padlock_sha256_blocks:
212 movq %rdx,%rcx
213 movq %rdi,%rdx
214 movups (%rdi),%xmm0
215 subq $128+8,%rsp
216 movups 16(%rdi),%xmm1
217 movaps %xmm0,(%rsp)
218 movq %rsp,%rdi
219 movaps %xmm1,16(%rsp)
220 movq $-1,%rax
221 .byte 0xf3,0x0f,0xa6,0xd0
222 movaps (%rsp),%xmm0
223 movaps 16(%rsp),%xmm1
224 addq $128+8,%rsp
225 movups %xmm0,(%rdx)
226 movups %xmm1,16(%rdx)
227 .byte 0xf3,0xc3
230 .globl _padlock_sha512_blocks
232 .p2align 4
233 _padlock_sha512_blocks:
234 movq %rdx,%rcx
235 movq %rdi,%rdx
236 movups (%rdi),%xmm0
237 subq $128+8,%rsp
238 movups 16(%rdi),%xmm1
239 movups 32(%rdi),%xmm2
240 movups 48(%rdi),%xmm3
241 movaps %xmm0,(%rsp)
242 movq %rsp,%rdi
243 movaps %xmm1,16(%rsp)
244 movaps %xmm2,32(%rsp)
245 movaps %xmm3,48(%rsp)
246 .byte 0xf3,0x0f,0xa6,0xe0
247 movaps (%rsp),%xmm0
248 movaps 16(%rsp),%xmm1
249 movaps 32(%rsp),%xmm2
250 movaps 48(%rsp),%xmm3
251 addq $128+8,%rsp
252 movups %xmm0,(%rdx)
253 movups %xmm1,16(%rdx)
254 movups %xmm2,32(%rdx)
255 movups %xmm3,48(%rdx)
256 .byte 0xf3,0xc3
258 .globl _padlock_ecb_encrypt
260 .p2align 4
261 _padlock_ecb_encrypt:
262 pushq %rbp
263 pushq %rbx
265 xorl %eax,%eax
266 testq $15,%rdx
267 jnz L$ecb_abort
268 testq $15,%rcx
269 jnz L$ecb_abort
270 leaq L$padlock_saved_context(%rip),%rax
271 pushf
273 call _padlock_verify_ctx
274 leaq 16(%rdx),%rdx
275 xorl %eax,%eax
276 xorl %ebx,%ebx
277 cmpq $128,%rcx
278 jbe L$ecb_short
279 testl $32,(%rdx)
280 jnz L$ecb_aligned
281 testq $15,%rdi
282 setz %al
283 testq $15,%rsi
284 setz %bl
285 testl %ebx,%eax
286 jnz L$ecb_aligned
287 negq %rax
288 movq $512,%rbx
289 notq %rax
290 leaq (%rsp),%rbp
291 cmpq %rbx,%rcx
292 cmovcq %rcx,%rbx
293 andq %rbx,%rax
294 movq %rcx,%rbx
295 negq %rax
296 andq $512-1,%rbx
297 leaq (%rax,%rbp,1),%rsp
298 jmp L$ecb_loop
299 .p2align 4
300 L$ecb_loop:
301 cmpq %rcx,%rbx
302 cmovaq %rcx,%rbx
303 movq %rdi,%r8
304 movq %rsi,%r9
305 movq %rcx,%r10
306 movq %rbx,%rcx
307 movq %rbx,%r11
308 testq $15,%rdi
309 cmovnzq %rsp,%rdi
310 testq $15,%rsi
311 jz L$ecb_inp_aligned
312 shrq $3,%rcx
313 .byte 0xf3,0x48,0xa5
314 subq %rbx,%rdi
315 movq %rbx,%rcx
316 movq %rdi,%rsi
317 L$ecb_inp_aligned:
318 leaq -16(%rdx),%rax
319 leaq 16(%rdx),%rbx
320 shrq $4,%rcx
321 .byte 0xf3,0x0f,0xa7,200
322 movq %r8,%rdi
323 movq %r11,%rbx
324 testq $15,%rdi
325 jz L$ecb_out_aligned
326 movq %rbx,%rcx
327 shrq $3,%rcx
328 leaq (%rsp),%rsi
329 .byte 0xf3,0x48,0xa5
330 subq %rbx,%rdi
331 L$ecb_out_aligned:
332 movq %r9,%rsi
333 movq %r10,%rcx
334 addq %rbx,%rdi
335 addq %rbx,%rsi
336 subq %rbx,%rcx
337 movq $512,%rbx
338 jnz L$ecb_loop
340 cmpq %rsp,%rbp
341 je L$ecb_done
343 pxor %xmm0,%xmm0
344 leaq (%rsp),%rax
345 L$ecb_bzero:
346 movaps %xmm0,(%rax)
347 leaq 16(%rax),%rax
348 cmpq %rax,%rbp
349 ja L$ecb_bzero
351 L$ecb_done:
352 leaq (%rbp),%rsp
353 jmp L$ecb_exit
354 .p2align 4
355 L$ecb_short:
356 movq %rsp,%rbp
357 subq %rcx,%rsp
358 xorq %rbx,%rbx
359 L$ecb_short_copy:
360 movups (%rsi,%rbx,1),%xmm0
361 leaq 16(%rbx),%rbx
362 cmpq %rbx,%rcx
363 movaps %xmm0,-16(%rsp,%rbx,1)
364 ja L$ecb_short_copy
365 movq %rsp,%rsi
366 movq %rcx,%rbx
367 jmp L$ecb_loop
368 .p2align 4
369 L$ecb_aligned:
370 leaq -16(%rdx),%rax
371 leaq 16(%rdx),%rbx
372 shrq $4,%rcx
373 .byte 0xf3,0x0f,0xa7,200
374 L$ecb_exit:
375 movl $1,%eax
376 leaq 8(%rsp),%rsp
377 L$ecb_abort:
378 popq %rbx
379 popq %rbp
380 .byte 0xf3,0xc3
382 .globl _padlock_cbc_encrypt
384 .p2align 4
385 _padlock_cbc_encrypt:
386 pushq %rbp
387 pushq %rbx
389 xorl %eax,%eax
390 testq $15,%rdx
391 jnz L$cbc_abort
392 testq $15,%rcx
393 jnz L$cbc_abort
394 leaq L$padlock_saved_context(%rip),%rax
395 pushf
397 call _padlock_verify_ctx
398 leaq 16(%rdx),%rdx
399 xorl %eax,%eax
400 xorl %ebx,%ebx
401 cmpq $64,%rcx
402 jbe L$cbc_short
403 testl $32,(%rdx)
404 jnz L$cbc_aligned
405 testq $15,%rdi
406 setz %al
407 testq $15,%rsi
408 setz %bl
409 testl %ebx,%eax
410 jnz L$cbc_aligned
411 negq %rax
412 movq $512,%rbx
413 notq %rax
414 leaq (%rsp),%rbp
415 cmpq %rbx,%rcx
416 cmovcq %rcx,%rbx
417 andq %rbx,%rax
418 movq %rcx,%rbx
419 negq %rax
420 andq $512-1,%rbx
421 leaq (%rax,%rbp,1),%rsp
422 jmp L$cbc_loop
423 .p2align 4
424 L$cbc_loop:
425 cmpq %rcx,%rbx
426 cmovaq %rcx,%rbx
427 movq %rdi,%r8
428 movq %rsi,%r9
429 movq %rcx,%r10
430 movq %rbx,%rcx
431 movq %rbx,%r11
432 testq $15,%rdi
433 cmovnzq %rsp,%rdi
434 testq $15,%rsi
435 jz L$cbc_inp_aligned
436 shrq $3,%rcx
437 .byte 0xf3,0x48,0xa5
438 subq %rbx,%rdi
439 movq %rbx,%rcx
440 movq %rdi,%rsi
441 L$cbc_inp_aligned:
442 leaq -16(%rdx),%rax
443 leaq 16(%rdx),%rbx
444 shrq $4,%rcx
445 .byte 0xf3,0x0f,0xa7,208
446 movdqa (%rax),%xmm0
447 movdqa %xmm0,-16(%rdx)
448 movq %r8,%rdi
449 movq %r11,%rbx
450 testq $15,%rdi
451 jz L$cbc_out_aligned
452 movq %rbx,%rcx
453 shrq $3,%rcx
454 leaq (%rsp),%rsi
455 .byte 0xf3,0x48,0xa5
456 subq %rbx,%rdi
457 L$cbc_out_aligned:
458 movq %r9,%rsi
459 movq %r10,%rcx
460 addq %rbx,%rdi
461 addq %rbx,%rsi
462 subq %rbx,%rcx
463 movq $512,%rbx
464 jnz L$cbc_loop
466 cmpq %rsp,%rbp
467 je L$cbc_done
469 pxor %xmm0,%xmm0
470 leaq (%rsp),%rax
471 L$cbc_bzero:
472 movaps %xmm0,(%rax)
473 leaq 16(%rax),%rax
474 cmpq %rax,%rbp
475 ja L$cbc_bzero
477 L$cbc_done:
478 leaq (%rbp),%rsp
479 jmp L$cbc_exit
480 .p2align 4
481 L$cbc_short:
482 movq %rsp,%rbp
483 subq %rcx,%rsp
484 xorq %rbx,%rbx
485 L$cbc_short_copy:
486 movups (%rsi,%rbx,1),%xmm0
487 leaq 16(%rbx),%rbx
488 cmpq %rbx,%rcx
489 movaps %xmm0,-16(%rsp,%rbx,1)
490 ja L$cbc_short_copy
491 movq %rsp,%rsi
492 movq %rcx,%rbx
493 jmp L$cbc_loop
494 .p2align 4
495 L$cbc_aligned:
496 leaq -16(%rdx),%rax
497 leaq 16(%rdx),%rbx
498 shrq $4,%rcx
499 .byte 0xf3,0x0f,0xa7,208
500 movdqa (%rax),%xmm0
501 movdqa %xmm0,-16(%rdx)
502 L$cbc_exit:
503 movl $1,%eax
504 leaq 8(%rsp),%rsp
505 L$cbc_abort:
506 popq %rbx
507 popq %rbp
508 .byte 0xf3,0xc3
510 .byte 86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
511 .p2align 4
512 .data
513 .p2align 3
514 L$padlock_saved_context:
515 .quad 0