beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc64 / divrem_1.c
blob531494a94f6c8d35cc6e7301b1680c8e3c8a4ea1
1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
3 Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
4 Inc.
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
21 or both in parallel, as here.
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 for more details.
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
32 #include "gmp.h"
33 #include "gmp-impl.h"
34 #include "longlong.h"
36 #include "mpn/sparc64/sparc64.h"
39 /* 64-bit divisor 32-bit divisor
40 cycles/limb cycles/limb
41 (approx) (approx)
42 integer fraction integer fraction
43 Ultrasparc 2i: 160 160 122 96
47 /* 32-bit divisors are treated in special case code. This requires 4 mulx
48 per limb instead of 8 in the general case.
50 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
51 addressing, to get the two halves of each limb read in the correct order.
52 This is kept in an adj variable. Doing that measures about 4 c/l faster
53 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter
54 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
55 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we
56 still use a variable since that ought to work out best. */
58 mp_limb_t
59 mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
60 mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
62 mp_size_t total_size_limbs;
63 mp_size_t i;
65 ASSERT (xsize_limbs >= 0);
66 ASSERT (size_limbs >= 0);
67 ASSERT (d_limb != 0);
68 /* FIXME: What's the correct overlap rule when xsize!=0? */
69 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
70 ap_limbptr, size_limbs));
72 total_size_limbs = size_limbs + xsize_limbs;
73 if (UNLIKELY (total_size_limbs == 0))
74 return 0;
76 /* udivx is good for total_size==1, and no need to bother checking
77 limb<divisor, since if that's likely the caller should check */
78 if (UNLIKELY (total_size_limbs == 1))
80 mp_limb_t a, q;
81 a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
82 q = a / d_limb;
83 qp_limbptr[0] = q;
84 return a - q*d_limb;
87 if (d_limb <= CNST_LIMB(0xFFFFFFFF))
89 mp_size_t size, xsize, total_size, adj;
90 unsigned *qp, n1, n0, q, r, nshift, norm_rmask;
91 mp_limb_t dinv_limb;
92 const unsigned *ap;
93 int norm, norm_rshift;
95 size = 2 * size_limbs;
96 xsize = 2 * xsize_limbs;
97 total_size = size + xsize;
99 ap = (unsigned *) ap_limbptr;
100 qp = (unsigned *) qp_limbptr;
102 qp += xsize;
103 r = 0; /* initial remainder */
105 if (LIKELY (size != 0))
107 n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
109 /* If the length of the source is uniformly distributed, then
110 there's a 50% chance of the high 32-bits being zero, which we
111 can skip. */
112 if (n1 == 0)
114 n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
115 total_size--;
116 size--;
117 ASSERT (size > 0); /* because always even */
118 qp[size + HALF_ENDIAN_ADJ(1)] = 0;
121 /* Skip a division if high < divisor (high quotient 0). Testing
122 here before before normalizing will still skip as often as
123 possible. */
124 if (n1 < d_limb)
126 r = n1;
127 size--;
128 qp[size + HALF_ENDIAN_ADJ(size)] = 0;
129 total_size--;
130 if (total_size == 0)
131 return r;
135 count_leading_zeros_32 (norm, d_limb);
136 norm -= 32;
137 d_limb <<= norm;
138 r <<= norm;
140 norm_rshift = 32 - norm;
141 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
143 invert_half_limb (dinv_limb, d_limb);
145 if (LIKELY (size != 0))
147 i = size - 1;
148 adj = HALF_ENDIAN_ADJ (i);
149 n1 = ap[i + adj];
150 adj = -adj;
151 r |= ((n1 >> norm_rshift) & norm_rmask);
152 for ( ; i > 0; i--)
154 n0 = ap[i-1 + adj];
155 adj = -adj;
156 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
157 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
158 qp[i + adj] = q;
159 n1 = n0;
161 nshift = n1 << norm;
162 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
163 qp[0 + HALF_ENDIAN_ADJ(0)] = q;
165 qp -= xsize;
166 adj = HALF_ENDIAN_ADJ (0);
167 for (i = xsize-1; i >= 0; i--)
169 udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
170 adj = -adj;
171 qp[i + adj] = q;
174 return r >> norm;
176 else
178 mp_srcptr ap;
179 mp_ptr qp;
180 mp_size_t size, xsize, total_size;
181 mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask;
182 int norm, norm_rshift;
184 ap = ap_limbptr;
185 qp = qp_limbptr;
186 size = size_limbs;
187 xsize = xsize_limbs;
188 total_size = total_size_limbs;
189 d = d_limb;
191 qp += total_size; /* above high limb */
192 r = 0; /* initial remainder */
194 if (LIKELY (size != 0))
196 /* Skip a division if high < divisor (high quotient 0). Testing
197 here before before normalizing will still skip as often as
198 possible. */
199 n1 = ap[size-1];
200 if (n1 < d)
202 r = n1;
203 *--qp = 0;
204 total_size--;
205 if (total_size == 0)
206 return r;
207 size--;
211 count_leading_zeros (norm, d);
212 d <<= norm;
213 r <<= norm;
215 norm_rshift = GMP_LIMB_BITS - norm;
216 norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
218 invert_limb (dinv, d);
220 if (LIKELY (size != 0))
222 n1 = ap[size-1];
223 r |= ((n1 >> norm_rshift) & norm_rmask);
224 for (i = size-2; i >= 0; i--)
226 n0 = ap[i];
227 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
228 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
229 *--qp = q;
230 n1 = n0;
232 nshift = n1 << norm;
233 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
234 *--qp = q;
236 for (i = 0; i < xsize; i++)
238 udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
239 *--qp = q;
241 return r >> norm;