new beta-0.90.0
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium / mod_34lsub1.asm
blob2d88223b847014d5f95c106b2427eb7c75afe7c5
1 dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
3 dnl Copyright 2000-2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C P5: 1.66 cycles/limb
37 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
40 defframe(PARAM_SIZE, 8)
41 defframe(PARAM_SRC, 4)
43 TEXT
44 ALIGN(16)
45 PROLOGUE(mpn_mod_34lsub1)
46 deflit(`FRAME',0)
48 movl PARAM_SIZE, %ecx
49 movl PARAM_SRC, %edx
51 subl $2, %ecx
52 ja L(three_or_more)
54 movl (%edx), %eax
55 jne L(one)
58 movl 4(%edx), %ecx
59 movl %eax, %edx
61 shrl $24, %edx
62 andl $0xFFFFFF, %eax
64 addl %edx, %eax
65 movl %ecx, %edx
67 shrl $16, %ecx
68 andl $0xFFFF, %edx
70 shll $8, %edx
71 addl %ecx, %eax
73 addl %edx, %eax
75 L(one):
76 ret
79 L(three_or_more):
80 C eax
81 C ebx
82 C ecx size-2
83 C edx src
84 C esi
85 C edi
86 C ebp
88 pushl %ebx FRAME_pushl()
89 pushl %esi FRAME_pushl()
91 pushl %edi FRAME_pushl()
92 pushl %ebp FRAME_pushl()
94 xorl %esi, %esi C 0mod3
95 xorl %edi, %edi C 1mod3
97 xorl %ebp, %ebp C 2mod3, and clear carry
99 L(top):
100 C eax scratch
101 C ebx scratch
102 C ecx counter, limbs
103 C edx src
104 C esi 0mod3
105 C edi 1mod3
106 C ebp 2mod3
108 movl (%edx), %eax
109 movl 4(%edx), %ebx
111 adcl %eax, %esi
112 movl 8(%edx), %eax
114 adcl %ebx, %edi
115 leal 12(%edx), %edx
117 adcl %eax, %ebp
118 leal -2(%ecx), %ecx
120 decl %ecx
121 jg L(top)
124 C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
126 movl $0xFFFFFFFF, %ebx C mask
127 incl %ecx
129 js L(combine) C 0 more
131 movl (%edx), %eax
132 movl $0xFFFFFF00, %ebx
134 adcl %eax, %esi
135 decl %ecx
137 js L(combine) C 1 more
139 movl 4(%edx), %eax
140 movl $0xFFFF0000, %ebx
142 adcl %eax, %edi
146 L(combine):
147 C eax
148 C ebx mask
149 C ecx
150 C edx
151 C esi 0mod3
152 C edi 1mod3
153 C ebp 2mod3
155 sbbl %ecx, %ecx C carry
156 movl %esi, %eax C 0mod3
158 andl %ebx, %ecx C masked for position
159 andl $0xFFFFFF, %eax C 0mod3 low
161 shrl $24, %esi C 0mod3 high
162 subl %ecx, %eax C apply carry
164 addl %esi, %eax C apply 0mod3
165 movl %edi, %ebx C 1mod3
167 shrl $16, %edi C 1mod3 high
168 andl $0x0000FFFF, %ebx
170 shll $8, %ebx C 1mod3 low
171 addl %edi, %eax C apply 1mod3 high
173 addl %ebx, %eax C apply 1mod3 low
174 movl %ebp, %ebx C 2mod3
176 shrl $8, %ebp C 2mod3 high
177 andl $0xFF, %ebx
179 shll $16, %ebx C 2mod3 low
180 addl %ebp, %eax C apply 2mod3 high
182 addl %ebx, %eax C apply 2mod3 low
184 popl %ebp
185 popl %edi
187 popl %esi
188 popl %ebx
192 EPILOGUE()