Import LibreSSL v2.4.2 to vendor branch
[dragonfly.git] / crypto / libressl / crypto / bn / gf2m-macosx-x86_64.s
blobe7d0ec9fe2287e45f15b2504d0b46ed5799dcd62
1 .text
4 .p2align 4
5 _mul_1x1:
6 subq $128+8,%rsp
7 movq $-1,%r9
8 leaq (%rax,%rax,1),%rsi
9 shrq $3,%r9
10 leaq (,%rax,4),%rdi
11 andq %rax,%r9
12 leaq (,%rax,8),%r12
13 sarq $63,%rax
14 leaq (%r9,%r9,1),%r10
15 sarq $63,%rsi
16 leaq (,%r9,4),%r11
17 andq %rbp,%rax
18 sarq $63,%rdi
19 movq %rax,%rdx
20 shlq $63,%rax
21 andq %rbp,%rsi
22 shrq $1,%rdx
23 movq %rsi,%rcx
24 shlq $62,%rsi
25 andq %rbp,%rdi
26 shrq $2,%rcx
27 xorq %rsi,%rax
28 movq %rdi,%rbx
29 shlq $61,%rdi
30 xorq %rcx,%rdx
31 shrq $3,%rbx
32 xorq %rdi,%rax
33 xorq %rbx,%rdx
35 movq %r9,%r13
36 movq $0,0(%rsp)
37 xorq %r10,%r13
38 movq %r9,8(%rsp)
39 movq %r11,%r14
40 movq %r10,16(%rsp)
41 xorq %r12,%r14
42 movq %r13,24(%rsp)
44 xorq %r11,%r9
45 movq %r11,32(%rsp)
46 xorq %r11,%r10
47 movq %r9,40(%rsp)
48 xorq %r11,%r13
49 movq %r10,48(%rsp)
50 xorq %r14,%r9
51 movq %r13,56(%rsp)
52 xorq %r14,%r10
54 movq %r12,64(%rsp)
55 xorq %r14,%r13
56 movq %r9,72(%rsp)
57 xorq %r11,%r9
58 movq %r10,80(%rsp)
59 xorq %r11,%r10
60 movq %r13,88(%rsp)
62 xorq %r11,%r13
63 movq %r14,96(%rsp)
64 movq %r8,%rsi
65 movq %r9,104(%rsp)
66 andq %rbp,%rsi
67 movq %r10,112(%rsp)
68 shrq $4,%rbp
69 movq %r13,120(%rsp)
70 movq %r8,%rdi
71 andq %rbp,%rdi
72 shrq $4,%rbp
74 movq (%rsp,%rsi,8),%xmm0
75 movq %r8,%rsi
76 andq %rbp,%rsi
77 shrq $4,%rbp
78 movq (%rsp,%rdi,8),%rcx
79 movq %r8,%rdi
80 movq %rcx,%rbx
81 shlq $4,%rcx
82 andq %rbp,%rdi
83 movq (%rsp,%rsi,8),%xmm1
84 shrq $60,%rbx
85 xorq %rcx,%rax
86 pslldq $1,%xmm1
87 movq %r8,%rsi
88 shrq $4,%rbp
89 xorq %rbx,%rdx
90 andq %rbp,%rsi
91 shrq $4,%rbp
92 pxor %xmm1,%xmm0
93 movq (%rsp,%rdi,8),%rcx
94 movq %r8,%rdi
95 movq %rcx,%rbx
96 shlq $12,%rcx
97 andq %rbp,%rdi
98 movq (%rsp,%rsi,8),%xmm1
99 shrq $52,%rbx
100 xorq %rcx,%rax
101 pslldq $2,%xmm1
102 movq %r8,%rsi
103 shrq $4,%rbp
104 xorq %rbx,%rdx
105 andq %rbp,%rsi
106 shrq $4,%rbp
107 pxor %xmm1,%xmm0
108 movq (%rsp,%rdi,8),%rcx
109 movq %r8,%rdi
110 movq %rcx,%rbx
111 shlq $20,%rcx
112 andq %rbp,%rdi
113 movq (%rsp,%rsi,8),%xmm1
114 shrq $44,%rbx
115 xorq %rcx,%rax
116 pslldq $3,%xmm1
117 movq %r8,%rsi
118 shrq $4,%rbp
119 xorq %rbx,%rdx
120 andq %rbp,%rsi
121 shrq $4,%rbp
122 pxor %xmm1,%xmm0
123 movq (%rsp,%rdi,8),%rcx
124 movq %r8,%rdi
125 movq %rcx,%rbx
126 shlq $28,%rcx
127 andq %rbp,%rdi
128 movq (%rsp,%rsi,8),%xmm1
129 shrq $36,%rbx
130 xorq %rcx,%rax
131 pslldq $4,%xmm1
132 movq %r8,%rsi
133 shrq $4,%rbp
134 xorq %rbx,%rdx
135 andq %rbp,%rsi
136 shrq $4,%rbp
137 pxor %xmm1,%xmm0
138 movq (%rsp,%rdi,8),%rcx
139 movq %r8,%rdi
140 movq %rcx,%rbx
141 shlq $36,%rcx
142 andq %rbp,%rdi
143 movq (%rsp,%rsi,8),%xmm1
144 shrq $28,%rbx
145 xorq %rcx,%rax
146 pslldq $5,%xmm1
147 movq %r8,%rsi
148 shrq $4,%rbp
149 xorq %rbx,%rdx
150 andq %rbp,%rsi
151 shrq $4,%rbp
152 pxor %xmm1,%xmm0
153 movq (%rsp,%rdi,8),%rcx
154 movq %r8,%rdi
155 movq %rcx,%rbx
156 shlq $44,%rcx
157 andq %rbp,%rdi
158 movq (%rsp,%rsi,8),%xmm1
159 shrq $20,%rbx
160 xorq %rcx,%rax
161 pslldq $6,%xmm1
162 movq %r8,%rsi
163 shrq $4,%rbp
164 xorq %rbx,%rdx
165 andq %rbp,%rsi
166 shrq $4,%rbp
167 pxor %xmm1,%xmm0
168 movq (%rsp,%rdi,8),%rcx
169 movq %r8,%rdi
170 movq %rcx,%rbx
171 shlq $52,%rcx
172 andq %rbp,%rdi
173 movq (%rsp,%rsi,8),%xmm1
174 shrq $12,%rbx
175 xorq %rcx,%rax
176 pslldq $7,%xmm1
177 movq %r8,%rsi
178 shrq $4,%rbp
179 xorq %rbx,%rdx
180 andq %rbp,%rsi
181 shrq $4,%rbp
182 pxor %xmm1,%xmm0
183 movq (%rsp,%rdi,8),%rcx
184 movq %rcx,%rbx
185 shlq $60,%rcx
186 movd %xmm0,%rsi
187 shrq $4,%rbx
188 xorq %rcx,%rax
189 psrldq $8,%xmm0
190 xorq %rbx,%rdx
191 movd %xmm0,%rdi
192 xorq %rsi,%rax
193 xorq %rdi,%rdx
195 addq $128+8,%rsp
196 .byte 0xf3,0xc3
197 L$end_mul_1x1:
200 .globl _bn_GF2m_mul_2x2
202 .p2align 4
203 _bn_GF2m_mul_2x2:
204 movq _OPENSSL_ia32cap_P(%rip),%rax
205 btq $33,%rax
206 jnc L$vanilla_mul_2x2
208 movd %rsi,%xmm0
209 movd %rcx,%xmm1
210 movd %rdx,%xmm2
211 movd %r8,%xmm3
212 movdqa %xmm0,%xmm4
213 movdqa %xmm1,%xmm5
214 .byte 102,15,58,68,193,0
215 pxor %xmm2,%xmm4
216 pxor %xmm3,%xmm5
217 .byte 102,15,58,68,211,0
218 .byte 102,15,58,68,229,0
219 xorps %xmm0,%xmm4
220 xorps %xmm2,%xmm4
221 movdqa %xmm4,%xmm5
222 pslldq $8,%xmm4
223 psrldq $8,%xmm5
224 pxor %xmm4,%xmm2
225 pxor %xmm5,%xmm0
226 movdqu %xmm2,0(%rdi)
227 movdqu %xmm0,16(%rdi)
228 .byte 0xf3,0xc3
230 .p2align 4
231 L$vanilla_mul_2x2:
232 leaq -136(%rsp),%rsp
233 movq %r14,80(%rsp)
234 movq %r13,88(%rsp)
235 movq %r12,96(%rsp)
236 movq %rbp,104(%rsp)
237 movq %rbx,112(%rsp)
238 L$body_mul_2x2:
239 movq %rdi,32(%rsp)
240 movq %rsi,40(%rsp)
241 movq %rdx,48(%rsp)
242 movq %rcx,56(%rsp)
243 movq %r8,64(%rsp)
245 movq $15,%r8
246 movq %rsi,%rax
247 movq %rcx,%rbp
248 call _mul_1x1
249 movq %rax,16(%rsp)
250 movq %rdx,24(%rsp)
252 movq 48(%rsp),%rax
253 movq 64(%rsp),%rbp
254 call _mul_1x1
255 movq %rax,0(%rsp)
256 movq %rdx,8(%rsp)
258 movq 40(%rsp),%rax
259 movq 56(%rsp),%rbp
260 xorq 48(%rsp),%rax
261 xorq 64(%rsp),%rbp
262 call _mul_1x1
263 movq 0(%rsp),%rbx
264 movq 8(%rsp),%rcx
265 movq 16(%rsp),%rdi
266 movq 24(%rsp),%rsi
267 movq 32(%rsp),%rbp
269 xorq %rdx,%rax
270 xorq %rcx,%rdx
271 xorq %rbx,%rax
272 movq %rbx,0(%rbp)
273 xorq %rdi,%rdx
274 movq %rsi,24(%rbp)
275 xorq %rsi,%rax
276 xorq %rsi,%rdx
277 xorq %rdx,%rax
278 movq %rdx,16(%rbp)
279 movq %rax,8(%rbp)
281 movq 80(%rsp),%r14
282 movq 88(%rsp),%r13
283 movq 96(%rsp),%r12
284 movq 104(%rsp),%rbp
285 movq 112(%rsp),%rbx
286 leaq 136(%rsp),%rsp
287 .byte 0xf3,0xc3
288 L$end_mul_2x2:
290 .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
291 .p2align 4