Remove remaining OSI protocol support.
[dragonfly.git] / secure / lib / libcrypto / asm / x86-mont.s
blobcfa1ea2423b1007f5694c9f2965e775e022e033d
1 .file "../../../../crypto/openssl/crypto/bn/asm/x86-mont.s"
2 .text
3 .globl bn_mul_mont
4 .type bn_mul_mont,@function
5 .align 16
6 bn_mul_mont:
7 .L_bn_mul_mont_begin:
8 pushl %ebp
9 pushl %ebx
10 pushl %esi
11 pushl %edi
12 xorl %eax,%eax
13 movl 40(%esp),%edi
14 cmpl $4,%edi
15 jl .L000just_leave
16 leal 20(%esp),%esi
17 leal 24(%esp),%edx
18 movl %esp,%ebp
19 addl $2,%edi
20 negl %edi
21 leal -32(%esp,%edi,4),%esp
22 negl %edi
23 movl %esp,%eax
24 subl %edx,%eax
25 andl $2047,%eax
26 subl %eax,%esp
27 xorl %esp,%edx
28 andl $2048,%edx
29 xorl $2048,%edx
30 subl %edx,%esp
31 andl $-64,%esp
32 movl (%esi),%eax
33 movl 4(%esi),%ebx
34 movl 8(%esi),%ecx
35 movl 12(%esi),%edx
36 movl 16(%esi),%esi
37 movl (%esi),%esi
38 movl %eax,4(%esp)
39 movl %ebx,8(%esp)
40 movl %ecx,12(%esp)
41 movl %edx,16(%esp)
42 movl %esi,20(%esp)
43 leal -3(%edi),%ebx
44 movl %ebp,24(%esp)
45 leal OPENSSL_ia32cap_P,%eax
46 btl $26,(%eax)
47 jnc .L001non_sse2
48 movl $-1,%eax
49 movd %eax,%mm7
50 movl 8(%esp),%esi
51 movl 12(%esp),%edi
52 movl 16(%esp),%ebp
53 xorl %edx,%edx
54 xorl %ecx,%ecx
55 movd (%edi),%mm4
56 movd (%esi),%mm5
57 movd (%ebp),%mm3
58 pmuludq %mm4,%mm5
59 movq %mm5,%mm2
60 movq %mm5,%mm0
61 pand %mm7,%mm0
62 pmuludq 20(%esp),%mm5
63 pmuludq %mm5,%mm3
64 paddq %mm0,%mm3
65 movd 4(%ebp),%mm1
66 movd 4(%esi),%mm0
67 psrlq $32,%mm2
68 psrlq $32,%mm3
69 incl %ecx
70 .align 16
71 .L0021st:
72 pmuludq %mm4,%mm0
73 pmuludq %mm5,%mm1
74 paddq %mm0,%mm2
75 paddq %mm1,%mm3
76 movq %mm2,%mm0
77 pand %mm7,%mm0
78 movd 4(%ebp,%ecx,4),%mm1
79 paddq %mm0,%mm3
80 movd 4(%esi,%ecx,4),%mm0
81 psrlq $32,%mm2
82 movd %mm3,28(%esp,%ecx,4)
83 psrlq $32,%mm3
84 leal 1(%ecx),%ecx
85 cmpl %ebx,%ecx
86 jl .L0021st
87 pmuludq %mm4,%mm0
88 pmuludq %mm5,%mm1
89 paddq %mm0,%mm2
90 paddq %mm1,%mm3
91 movq %mm2,%mm0
92 pand %mm7,%mm0
93 paddq %mm0,%mm3
94 movd %mm3,28(%esp,%ecx,4)
95 psrlq $32,%mm2
96 psrlq $32,%mm3
97 paddq %mm2,%mm3
98 movq %mm3,32(%esp,%ebx,4)
99 incl %edx
100 .L003outer:
101 xorl %ecx,%ecx
102 movd (%edi,%edx,4),%mm4
103 movd (%esi),%mm5
104 movd 32(%esp),%mm6
105 movd (%ebp),%mm3
106 pmuludq %mm4,%mm5
107 paddq %mm6,%mm5
108 movq %mm5,%mm0
109 movq %mm5,%mm2
110 pand %mm7,%mm0
111 pmuludq 20(%esp),%mm5
112 pmuludq %mm5,%mm3
113 paddq %mm0,%mm3
114 movd 36(%esp),%mm6
115 movd 4(%ebp),%mm1
116 movd 4(%esi),%mm0
117 psrlq $32,%mm2
118 psrlq $32,%mm3
119 paddq %mm6,%mm2
120 incl %ecx
121 decl %ebx
122 .L004inner:
123 pmuludq %mm4,%mm0
124 pmuludq %mm5,%mm1
125 paddq %mm0,%mm2
126 paddq %mm1,%mm3
127 movq %mm2,%mm0
128 movd 36(%esp,%ecx,4),%mm6
129 pand %mm7,%mm0
130 movd 4(%ebp,%ecx,4),%mm1
131 paddq %mm0,%mm3
132 movd 4(%esi,%ecx,4),%mm0
133 psrlq $32,%mm2
134 movd %mm3,28(%esp,%ecx,4)
135 psrlq $32,%mm3
136 paddq %mm6,%mm2
137 decl %ebx
138 leal 1(%ecx),%ecx
139 jnz .L004inner
140 movl %ecx,%ebx
141 pmuludq %mm4,%mm0
142 pmuludq %mm5,%mm1
143 paddq %mm0,%mm2
144 paddq %mm1,%mm3
145 movq %mm2,%mm0
146 pand %mm7,%mm0
147 paddq %mm0,%mm3
148 movd %mm3,28(%esp,%ecx,4)
149 psrlq $32,%mm2
150 psrlq $32,%mm3
151 movd 36(%esp,%ebx,4),%mm6
152 paddq %mm2,%mm3
153 paddq %mm6,%mm3
154 movq %mm3,32(%esp,%ebx,4)
155 leal 1(%edx),%edx
156 cmpl %ebx,%edx
157 jle .L003outer
158 emms
159 jmp .L005common_tail
160 .align 16
161 .L001non_sse2:
162 movl 8(%esp),%esi
163 leal 1(%ebx),%ebp
164 movl 12(%esp),%edi
165 xorl %ecx,%ecx
166 movl %esi,%edx
167 andl $1,%ebp
168 subl %edi,%edx
169 leal 4(%edi,%ebx,4),%eax
170 orl %edx,%ebp
171 movl (%edi),%edi
172 jz .L006bn_sqr_mont
173 movl %eax,28(%esp)
174 movl (%esi),%eax
175 xorl %edx,%edx
176 .align 16
177 .L007mull:
178 movl %edx,%ebp
179 mull %edi
180 addl %eax,%ebp
181 leal 1(%ecx),%ecx
182 adcl $0,%edx
183 movl (%esi,%ecx,4),%eax
184 cmpl %ebx,%ecx
185 movl %ebp,28(%esp,%ecx,4)
186 jl .L007mull
187 movl %edx,%ebp
188 mull %edi
189 movl 20(%esp),%edi
190 addl %ebp,%eax
191 movl 16(%esp),%esi
192 adcl $0,%edx
193 imull 32(%esp),%edi
194 movl %eax,32(%esp,%ebx,4)
195 xorl %ecx,%ecx
196 movl %edx,36(%esp,%ebx,4)
197 movl %ecx,40(%esp,%ebx,4)
198 movl (%esi),%eax
199 mull %edi
200 addl 32(%esp),%eax
201 movl 4(%esi),%eax
202 adcl $0,%edx
203 incl %ecx
204 jmp .L0082ndmadd
205 .align 16
206 .L0091stmadd:
207 movl %edx,%ebp
208 mull %edi
209 addl 32(%esp,%ecx,4),%ebp
210 leal 1(%ecx),%ecx
211 adcl $0,%edx
212 addl %eax,%ebp
213 movl (%esi,%ecx,4),%eax
214 adcl $0,%edx
215 cmpl %ebx,%ecx
216 movl %ebp,28(%esp,%ecx,4)
217 jl .L0091stmadd
218 movl %edx,%ebp
219 mull %edi
220 addl 32(%esp,%ebx,4),%eax
221 movl 20(%esp),%edi
222 adcl $0,%edx
223 movl 16(%esp),%esi
224 addl %eax,%ebp
225 adcl $0,%edx
226 imull 32(%esp),%edi
227 xorl %ecx,%ecx
228 addl 36(%esp,%ebx,4),%edx
229 movl %ebp,32(%esp,%ebx,4)
230 adcl $0,%ecx
231 movl (%esi),%eax
232 movl %edx,36(%esp,%ebx,4)
233 movl %ecx,40(%esp,%ebx,4)
234 mull %edi
235 addl 32(%esp),%eax
236 movl 4(%esi),%eax
237 adcl $0,%edx
238 movl $1,%ecx
239 .align 16
240 .L0082ndmadd:
241 movl %edx,%ebp
242 mull %edi
243 addl 32(%esp,%ecx,4),%ebp
244 leal 1(%ecx),%ecx
245 adcl $0,%edx
246 addl %eax,%ebp
247 movl (%esi,%ecx,4),%eax
248 adcl $0,%edx
249 cmpl %ebx,%ecx
250 movl %ebp,24(%esp,%ecx,4)
251 jl .L0082ndmadd
252 movl %edx,%ebp
253 mull %edi
254 addl 32(%esp,%ebx,4),%ebp
255 adcl $0,%edx
256 addl %eax,%ebp
257 adcl $0,%edx
258 movl %ebp,28(%esp,%ebx,4)
259 xorl %eax,%eax
260 movl 12(%esp),%ecx
261 addl 36(%esp,%ebx,4),%edx
262 adcl 40(%esp,%ebx,4),%eax
263 leal 4(%ecx),%ecx
264 movl %edx,32(%esp,%ebx,4)
265 cmpl 28(%esp),%ecx
266 movl %eax,36(%esp,%ebx,4)
267 je .L005common_tail
268 movl (%ecx),%edi
269 movl 8(%esp),%esi
270 movl %ecx,12(%esp)
271 xorl %ecx,%ecx
272 xorl %edx,%edx
273 movl (%esi),%eax
274 jmp .L0091stmadd
275 .align 16
276 .L006bn_sqr_mont:
277 movl %ebx,(%esp)
278 movl %ecx,12(%esp)
279 movl %edi,%eax
280 mull %edi
281 movl %eax,32(%esp)
282 movl %edx,%ebx
283 shrl $1,%edx
284 andl $1,%ebx
285 incl %ecx
286 .align 16
287 .L010sqr:
288 movl (%esi,%ecx,4),%eax
289 movl %edx,%ebp
290 mull %edi
291 addl %ebp,%eax
292 leal 1(%ecx),%ecx
293 adcl $0,%edx
294 leal (%ebx,%eax,2),%ebp
295 shrl $31,%eax
296 cmpl (%esp),%ecx
297 movl %eax,%ebx
298 movl %ebp,28(%esp,%ecx,4)
299 jl .L010sqr
300 movl (%esi,%ecx,4),%eax
301 movl %edx,%ebp
302 mull %edi
303 addl %ebp,%eax
304 movl 20(%esp),%edi
305 adcl $0,%edx
306 movl 16(%esp),%esi
307 leal (%ebx,%eax,2),%ebp
308 imull 32(%esp),%edi
309 shrl $31,%eax
310 movl %ebp,32(%esp,%ecx,4)
311 leal (%eax,%edx,2),%ebp
312 movl (%esi),%eax
313 shrl $31,%edx
314 movl %ebp,36(%esp,%ecx,4)
315 movl %edx,40(%esp,%ecx,4)
316 mull %edi
317 addl 32(%esp),%eax
318 movl %ecx,%ebx
319 adcl $0,%edx
320 movl 4(%esi),%eax
321 movl $1,%ecx
322 .align 16
323 .L0113rdmadd:
324 movl %edx,%ebp
325 mull %edi
326 addl 32(%esp,%ecx,4),%ebp
327 adcl $0,%edx
328 addl %eax,%ebp
329 movl 4(%esi,%ecx,4),%eax
330 adcl $0,%edx
331 movl %ebp,28(%esp,%ecx,4)
332 movl %edx,%ebp
333 mull %edi
334 addl 36(%esp,%ecx,4),%ebp
335 leal 2(%ecx),%ecx
336 adcl $0,%edx
337 addl %eax,%ebp
338 movl (%esi,%ecx,4),%eax
339 adcl $0,%edx
340 cmpl %ebx,%ecx
341 movl %ebp,24(%esp,%ecx,4)
342 jl .L0113rdmadd
343 movl %edx,%ebp
344 mull %edi
345 addl 32(%esp,%ebx,4),%ebp
346 adcl $0,%edx
347 addl %eax,%ebp
348 adcl $0,%edx
349 movl %ebp,28(%esp,%ebx,4)
350 movl 12(%esp),%ecx
351 xorl %eax,%eax
352 movl 8(%esp),%esi
353 addl 36(%esp,%ebx,4),%edx
354 adcl 40(%esp,%ebx,4),%eax
355 movl %edx,32(%esp,%ebx,4)
356 cmpl %ebx,%ecx
357 movl %eax,36(%esp,%ebx,4)
358 je .L005common_tail
359 movl 4(%esi,%ecx,4),%edi
360 leal 1(%ecx),%ecx
361 movl %edi,%eax
362 movl %ecx,12(%esp)
363 mull %edi
364 addl 32(%esp,%ecx,4),%eax
365 adcl $0,%edx
366 movl %eax,32(%esp,%ecx,4)
367 xorl %ebp,%ebp
368 cmpl %ebx,%ecx
369 leal 1(%ecx),%ecx
370 je .L012sqrlast
371 movl %edx,%ebx
372 shrl $1,%edx
373 andl $1,%ebx
374 .align 16
375 .L013sqradd:
376 movl (%esi,%ecx,4),%eax
377 movl %edx,%ebp
378 mull %edi
379 addl %ebp,%eax
380 leal (%eax,%eax,1),%ebp
381 adcl $0,%edx
382 shrl $31,%eax
383 addl 32(%esp,%ecx,4),%ebp
384 leal 1(%ecx),%ecx
385 adcl $0,%eax
386 addl %ebx,%ebp
387 adcl $0,%eax
388 cmpl (%esp),%ecx
389 movl %ebp,28(%esp,%ecx,4)
390 movl %eax,%ebx
391 jle .L013sqradd
392 movl %edx,%ebp
393 leal (%ebx,%edx,2),%edx
394 shrl $31,%ebp
395 .L012sqrlast:
396 movl 20(%esp),%edi
397 movl 16(%esp),%esi
398 imull 32(%esp),%edi
399 addl 32(%esp,%ecx,4),%edx
400 movl (%esi),%eax
401 adcl $0,%ebp
402 movl %edx,32(%esp,%ecx,4)
403 movl %ebp,36(%esp,%ecx,4)
404 mull %edi
405 addl 32(%esp),%eax
406 leal -1(%ecx),%ebx
407 adcl $0,%edx
408 movl $1,%ecx
409 movl 4(%esi),%eax
410 jmp .L0113rdmadd
411 .align 16
412 .L005common_tail:
413 movl 16(%esp),%ebp
414 movl 4(%esp),%edi
415 leal 32(%esp),%esi
416 movl (%esi),%eax
417 movl %ebx,%ecx
418 xorl %edx,%edx
419 .align 16
420 .L014sub:
421 sbbl (%ebp,%edx,4),%eax
422 movl %eax,(%edi,%edx,4)
423 decl %ecx
424 movl 4(%esi,%edx,4),%eax
425 leal 1(%edx),%edx
426 jge .L014sub
427 sbbl $0,%eax
428 andl %eax,%esi
429 notl %eax
430 movl %edi,%ebp
431 andl %eax,%ebp
432 orl %ebp,%esi
433 .align 16
434 .L015copy:
435 movl (%esi,%ebx,4),%eax
436 movl %eax,(%edi,%ebx,4)
437 movl %ecx,32(%esp,%ebx,4)
438 decl %ebx
439 jge .L015copy
440 movl 24(%esp),%esp
441 movl $1,%eax
442 .L000just_leave:
443 popl %edi
444 popl %esi
445 popl %ebx
446 popl %ebp
448 .size bn_mul_mont,.-.L_bn_mul_mont_begin
449 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
450 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
451 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
452 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
453 .byte 111,114,103,62,0
454 .comm OPENSSL_ia32cap_P,4,4