beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / core2 / divrem_1.asm
blob1b3f1394ec5cd540c430e9ae85c42390b974c4ee
1 dnl x86-64 mpn_divrem_1 -- mpn by limb division.
3 dnl Copyright 2004, 2005, 2007-2010, 2012, 2014 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C norm unorm frac
35 C AMD K8,K9 15 15 12
36 C AMD K10 15 15 12
37 C Intel P4 44 44 43
38 C Intel core2 24 24 19.5
39 C Intel corei 19 19 18
40 C Intel atom 51 51 36
41 C VIA nano 46 44 22.5
43 C mp_limb_t
44 C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
45 C mp_srcptr np, mp_size_t nn, mp_limb_t d)
47 C mp_limb_t
48 C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
49 C mp_srcptr np, mp_size_t nn, mp_limb_t d,
50 C mp_limb_t dinv, int cnt)
52 C INPUT PARAMETERS
53 define(`qp', `%rdi')
54 define(`fn_param', `%rsi')
55 define(`up_param', `%rdx')
56 define(`un_param', `%rcx')
57 define(`d', `%r8')
58 define(`dinv', `%r9') C only for mpn_preinv_divrem_1
59 C shift passed on stack C only for mpn_preinv_divrem_1
61 define(`cnt', `%rcx')
62 define(`up', `%rsi')
63 define(`fn', `%r12')
64 define(`un', `%rbx')
67 C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
68 C cnt qp d dinv
70 ABI_SUPPORT(DOS64)
71 ABI_SUPPORT(STD64)
73 IFSTD(`define(`CNTOFF', `40($1)')')
74 IFDOS(`define(`CNTOFF', `104($1)')')
76 ASM_START()
77 TEXT
78 ALIGN(16)
79 PROLOGUE(mpn_preinv_divrem_1)
80 FUNC_ENTRY(4)
81 IFDOS(` mov 56(%rsp), %r8 ')
82 IFDOS(` mov 64(%rsp), %r9 ')
83 xor R32(%rax), R32(%rax)
84 push %r13
85 push %r12
86 push %rbp
87 push %rbx
89 mov fn_param, fn
90 mov un_param, un
91 add fn_param, un_param
92 mov up_param, up
94 lea -8(qp,un_param,8), qp
96 mov CNTOFF(%rsp), R8(cnt)
97 shl R8(cnt), d
98 jmp L(ent)
99 EPILOGUE()
101 ALIGN(16)
102 PROLOGUE(mpn_divrem_1)
103 FUNC_ENTRY(4)
104 IFDOS(` mov 56(%rsp), %r8 ')
105 xor R32(%rax), R32(%rax)
106 push %r13
107 push %r12
108 push %rbp
109 push %rbx
111 mov fn_param, fn
112 mov un_param, un
113 add fn_param, un_param
114 mov up_param, up
115 je L(ret)
117 lea -8(qp,un_param,8), qp
118 xor R32(%rbp), R32(%rbp)
120 L(unnormalized):
121 test un, un
122 je L(44)
123 mov -8(up,un,8), %rax
124 cmp d, %rax
125 jae L(44)
126 mov %rbp, (qp)
127 mov %rax, %rbp
128 lea -8(qp), qp
129 je L(ret)
130 dec un
131 L(44):
132 bsr d, %rcx
133 not R32(%rcx)
134 sal R8(%rcx), d
135 sal R8(%rcx), %rbp
137 push %rcx
138 IFSTD(` push %rdi ')
139 IFSTD(` push %rsi ')
140 push %r8
141 IFSTD(` sub $8, %rsp ')
142 IFSTD(` mov d, %rdi ')
143 IFDOS(` sub $40, %rsp ')
144 IFDOS(` mov d, %rcx ')
145 ASSERT(nz, `test $15, %rsp')
146 CALL( mpn_invert_limb)
147 IFSTD(` add $8, %rsp ')
148 IFDOS(` add $40, %rsp ')
149 pop %r8
150 IFSTD(` pop %rsi ')
151 IFSTD(` pop %rdi ')
152 pop %rcx
154 mov %rax, dinv
155 mov %rbp, %rax
156 test un, un
157 je L(frac)
159 L(ent): mov -8(up,un,8), %rbp
160 shr R8(%rcx), %rax
161 shld R8(%rcx), %rbp, %rax
162 sub $2, un
163 js L(end)
165 ALIGN(16)
166 L(top): lea 1(%rax), %r11
167 mul dinv
168 mov (up,un,8), %r10
169 shld R8(%rcx), %r10, %rbp
170 mov %rbp, %r13
171 add %rax, %r13
172 adc %r11, %rdx
173 mov %rdx, %r11
174 imul d, %rdx
175 sub %rdx, %rbp
176 lea (d,%rbp), %rax
177 sub $8, qp
178 cmp %r13, %rbp
179 cmovc %rbp, %rax
180 adc $-1, %r11
181 cmp d, %rax
182 jae L(ufx)
183 L(uok): dec un
184 mov %r11, 8(qp)
185 mov %r10, %rbp
186 jns L(top)
188 L(end): lea 1(%rax), %r11
189 sal R8(%rcx), %rbp
190 mul dinv
191 add %rbp, %rax
192 adc %r11, %rdx
193 mov %rax, %r11
194 mov %rdx, %r13
195 imul d, %rdx
196 sub %rdx, %rbp
197 mov d, %rax
198 add %rbp, %rax
199 cmp %r11, %rbp
200 cmovc %rbp, %rax
201 adc $-1, %r13
202 cmp d, %rax
203 jae L(efx)
204 L(eok): mov %r13, (qp)
205 sub $8, qp
206 jmp L(frac)
208 L(ufx): sub d, %rax
209 inc %r11
210 jmp L(uok)
211 L(efx): sub d, %rax
212 inc %r13
213 jmp L(eok)
215 L(frac):mov d, %rbp
216 neg %rbp
217 jmp L(fent)
219 ALIGN(16) C K8-K10 P6-CNR P6-NHM P4
220 L(ftop):mul dinv C 0,12 0,17 0,17
221 add %r11, %rdx C 5 8 10
222 mov %rax, %r11 C 4 8 3
223 mov %rdx, %r13 C 6 9 11
224 imul %rbp, %rdx C 6 9 11
225 mov d, %rax C
226 add %rdx, %rax C 10 14 14
227 cmp %r11, %rdx C 10 14 14
228 cmovc %rdx, %rax C 11 15 15
229 adc $-1, %r13 C
230 mov %r13, (qp) C
231 sub $8, qp C
232 L(fent):lea 1(%rax), %r11 C
233 dec fn C
234 jns L(ftop) C
236 shr R8(%rcx), %rax
237 L(ret): pop %rbx
238 pop %rbp
239 pop %r12
240 pop %r13
241 FUNC_EXIT()
243 EPILOGUE()