beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / bdiv_q_1.asm
blob132de067dba2cb1d894933a28972b4f8dae6065b
1 dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division.
3 dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
5 dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
36 C cycles/limb
37 C P54 30.0
38 C P55 29.0
39 C P6 13.0 odd divisor, 12.0 even (strangely)
40 C K6 14.0
41 C K7 12.0
42 C P4 42.0
44 MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
46 defframe(PARAM_SHIFT, 24)
47 defframe(PARAM_INVERSE,20)
48 defframe(PARAM_DIVISOR,16)
49 defframe(PARAM_SIZE, 12)
50 defframe(PARAM_SRC, 8)
51 defframe(PARAM_DST, 4)
53 dnl re-use parameter space
54 define(VAR_INVERSE,`PARAM_SRC')
56 TEXT
58 C mp_limb_t
59 C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
60 C mp_limb_t inverse, int shift)
62 ALIGN(16)
63 PROLOGUE(mpn_pi1_bdiv_q_1)
64 deflit(`FRAME',0)
66 movl PARAM_SHIFT, %ecx
67 pushl %ebp FRAME_pushl()
69 movl PARAM_INVERSE, %eax
70 movl PARAM_SIZE, %ebp
71 pushl %ebx FRAME_pushl()
72 L(common):
73 pushl %edi FRAME_pushl()
74 pushl %esi FRAME_pushl()
76 movl PARAM_SRC, %esi
77 movl PARAM_DST, %edi
79 leal (%esi,%ebp,4), %esi C src end
80 leal (%edi,%ebp,4), %edi C dst end
81 negl %ebp C -size
83 movl %eax, VAR_INVERSE
84 movl (%esi,%ebp,4), %eax C src[0]
86 xorl %ebx, %ebx
87 xorl %edx, %edx
89 incl %ebp
90 jz L(one)
92 movl (%esi,%ebp,4), %edx C src[1]
94 shrdl( %cl, %edx, %eax)
96 movl VAR_INVERSE, %edx
97 jmp L(entry)
100 ALIGN(8)
101 nop C k6 code alignment
103 L(top):
104 C eax q
105 C ebx carry bit, 0 or -1
106 C ecx shift
107 C edx carry limb
108 C esi src end
109 C edi dst end
110 C ebp counter, limbs, negative
112 movl -4(%esi,%ebp,4), %eax
113 subl %ebx, %edx C accumulate carry bit
115 movl (%esi,%ebp,4), %ebx
117 shrdl( %cl, %ebx, %eax)
119 subl %edx, %eax C apply carry limb
120 movl VAR_INVERSE, %edx
122 sbbl %ebx, %ebx
124 L(entry):
125 imull %edx, %eax
127 movl %eax, -4(%edi,%ebp,4)
128 movl PARAM_DIVISOR, %edx
130 mull %edx
132 incl %ebp
133 jnz L(top)
136 movl -4(%esi), %eax C src high limb
137 L(one):
138 shrl %cl, %eax
139 popl %esi FRAME_popl()
141 addl %ebx, %eax C apply carry bit
143 subl %edx, %eax C apply carry limb
145 imull VAR_INVERSE, %eax
147 movl %eax, -4(%edi)
149 popl %edi
150 popl %ebx
151 popl %ebp
155 EPILOGUE()
157 C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
158 C mp_limb_t divisor);
161 ALIGN(16)
162 PROLOGUE(mpn_bdiv_q_1)
163 deflit(`FRAME',0)
165 movl PARAM_DIVISOR, %eax
166 pushl %ebp FRAME_pushl()
168 movl $-1, %ecx C shift count
169 movl PARAM_SIZE, %ebp
171 pushl %ebx FRAME_pushl()
173 L(strip_twos):
174 incl %ecx
176 shrl %eax
177 jnc L(strip_twos)
179 leal 1(%eax,%eax), %ebx C d without twos
180 andl $127, %eax C d/2, 7 bits
182 ifdef(`PIC',`
183 LEA( binvert_limb_table, %edx)
184 movzbl (%eax,%edx), %eax C inv 8 bits
186 movzbl binvert_limb_table(%eax), %eax C inv 8 bits
189 leal (%eax,%eax), %edx C 2*inv
190 movl %ebx, PARAM_DIVISOR C d without twos
191 imull %eax, %eax C inv*inv
192 imull %ebx, %eax C inv*inv*d
193 subl %eax, %edx C inv = 2*inv - inv*inv*d
195 leal (%edx,%edx), %eax C 2*inv
196 imull %edx, %edx C inv*inv
197 imull %ebx, %edx C inv*inv*d
198 subl %edx, %eax C inv = 2*inv - inv*inv*d
200 ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
201 pushl %eax FRAME_pushl()
202 imull PARAM_DIVISOR, %eax
203 cmpl $1, %eax
204 popl %eax FRAME_popl()')
206 jmp L(common)
207 EPILOGUE()
208 ASM_END()