1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
3 Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
21 or both in parallel, as here.
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
36 #include "mpn/sparc64/sparc64.h"
39 /* 64-bit divisor 32-bit divisor
40 cycles/limb cycles/limb
42 integer fraction integer fraction
43 Ultrasparc 2i: 160 160 122 96
47 /* 32-bit divisors are treated in special case code. This requires 4 mulx
48 per limb instead of 8 in the general case.
50 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
51 addressing, to get the two halves of each limb read in the correct order.
52 This is kept in an adj variable. Doing that measures about 4 c/l faster
53 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter
54 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
55 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we
56 still use a variable since that ought to work out best. */
59 mpn_divrem_1 (mp_ptr qp_limbptr
, mp_size_t xsize_limbs
,
60 mp_srcptr ap_limbptr
, mp_size_t size_limbs
, mp_limb_t d_limb
)
62 mp_size_t total_size_limbs
;
65 ASSERT (xsize_limbs
>= 0);
66 ASSERT (size_limbs
>= 0);
68 /* FIXME: What's the correct overlap rule when xsize!=0? */
69 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr
+ xsize_limbs
,
70 ap_limbptr
, size_limbs
));
72 total_size_limbs
= size_limbs
+ xsize_limbs
;
73 if (UNLIKELY (total_size_limbs
== 0))
76 /* udivx is good for total_size==1, and no need to bother checking
77 limb<divisor, since if that's likely the caller should check */
78 if (UNLIKELY (total_size_limbs
== 1))
81 a
= (LIKELY (size_limbs
!= 0) ? ap_limbptr
[0] : 0);
87 if (d_limb
<= CNST_LIMB(0xFFFFFFFF))
89 mp_size_t size
, xsize
, total_size
, adj
;
90 unsigned *qp
, n1
, n0
, q
, r
, nshift
, norm_rmask
;
93 int norm
, norm_rshift
;
95 size
= 2 * size_limbs
;
96 xsize
= 2 * xsize_limbs
;
97 total_size
= size
+ xsize
;
99 ap
= (unsigned *) ap_limbptr
;
100 qp
= (unsigned *) qp_limbptr
;
103 r
= 0; /* initial remainder */
105 if (LIKELY (size
!= 0))
107 n1
= ap
[size
-1 + HALF_ENDIAN_ADJ(1)];
109 /* If the length of the source is uniformly distributed, then
110 there's a 50% chance of the high 32-bits being zero, which we
114 n1
= ap
[size
-2 + HALF_ENDIAN_ADJ(0)];
117 ASSERT (size
> 0); /* because always even */
118 qp
[size
+ HALF_ENDIAN_ADJ(1)] = 0;
121 /* Skip a division if high < divisor (high quotient 0). Testing
122 here before before normalizing will still skip as often as
128 qp
[size
+ HALF_ENDIAN_ADJ(size
)] = 0;
135 count_leading_zeros_32 (norm
, d_limb
);
140 norm_rshift
= 32 - norm
;
141 norm_rmask
= (norm
== 0 ? 0 : 0xFFFFFFFF);
143 invert_half_limb (dinv_limb
, d_limb
);
145 if (LIKELY (size
!= 0))
148 adj
= HALF_ENDIAN_ADJ (i
);
151 r
|= ((n1
>> norm_rshift
) & norm_rmask
);
156 nshift
= (n1
<< norm
) | ((n0
>> norm_rshift
) & norm_rmask
);
157 udiv_qrnnd_half_preinv (q
, r
, r
, nshift
, d_limb
, dinv_limb
);
162 udiv_qrnnd_half_preinv (q
, r
, r
, nshift
, d_limb
, dinv_limb
);
163 qp
[0 + HALF_ENDIAN_ADJ(0)] = q
;
166 adj
= HALF_ENDIAN_ADJ (0);
167 for (i
= xsize
-1; i
>= 0; i
--)
169 udiv_qrnnd_half_preinv (q
, r
, r
, 0, d_limb
, dinv_limb
);
180 mp_size_t size
, xsize
, total_size
;
181 mp_limb_t d
, n1
, n0
, q
, r
, dinv
, nshift
, norm_rmask
;
182 int norm
, norm_rshift
;
188 total_size
= total_size_limbs
;
191 qp
+= total_size
; /* above high limb */
192 r
= 0; /* initial remainder */
194 if (LIKELY (size
!= 0))
196 /* Skip a division if high < divisor (high quotient 0). Testing
197 here before before normalizing will still skip as often as
211 count_leading_zeros (norm
, d
);
215 norm_rshift
= GMP_LIMB_BITS
- norm
;
216 norm_rmask
= (norm
== 0 ? 0 : ~CNST_LIMB(0));
218 invert_limb (dinv
, d
);
220 if (LIKELY (size
!= 0))
223 r
|= ((n1
>> norm_rshift
) & norm_rmask
);
224 for (i
= size
-2; i
>= 0; i
--)
227 nshift
= (n1
<< norm
) | ((n0
>> norm_rshift
) & norm_rmask
);
228 udiv_qrnnd_preinv (q
, r
, r
, nshift
, d
, dinv
);
233 udiv_qrnnd_preinv (q
, r
, r
, nshift
, d
, dinv
);
236 for (i
= 0; i
< xsize
; i
++)
238 udiv_qrnnd_preinv (q
, r
, r
, CNST_LIMB(0), d
, dinv
);