1 /* mpn_mod_1s_4p (ap, n, b, cps)
2 Divide (ap,,n) by b. Return the single-limb remainder.
3 Requires that d < B / 4.
5 Contributed to the GNU project by Torbjorn Granlund.
6 Based on a suggestion by Peter L. Montgomery.
8 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
9 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
10 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
12 Copyright 2008-2010 Free Software Foundation, Inc.
14 This file is part of the GNU MP Library.
16 The GNU MP Library is free software; you can redistribute it and/or modify
17 it under the terms of either:
19 * the GNU Lesser General Public License as published by the Free
20 Software Foundation; either version 3 of the License, or (at your
21 option) any later version.
25 * the GNU General Public License as published by the Free Software
26 Foundation; either version 2 of the License, or (at your option) any
29 or both in parallel, as here.
31 The GNU MP Library is distributed in the hope that it will be useful, but
32 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
36 You should have received copies of the GNU General Public License and the
37 GNU Lesser General Public License along with the GNU MP Library. If not,
38 see https://www.gnu.org/licenses/. */
44 #include "mpn/sparc64/sparc64.h"
47 mpn_mod_1s_4p_cps (mp_limb_t cps
[7], mp_limb_t b
)
50 mp_limb_t B1modb
, B2modb
, B3modb
, B4modb
, B5modb
;
53 ASSERT (b
<= (~(mp_limb_t
) 0) / 4);
55 count_leading_zeros (cnt
, b
);
63 B1modb
= -b
* ((bi
>> (GMP_LIMB_BITS
-cnt
)) | (CNST_LIMB(1) << cnt
));
64 ASSERT (B1modb
<= b
); /* NB: not fully reduced mod b */
65 cps
[2] = B1modb
>> cnt
;
67 udiv_rnnd_preinv (B2modb
, B1modb
, CNST_LIMB(0), b
, bi
);
68 cps
[3] = B2modb
>> cnt
;
70 udiv_rnnd_preinv (B3modb
, B2modb
, CNST_LIMB(0), b
, bi
);
71 cps
[4] = B3modb
>> cnt
;
73 udiv_rnnd_preinv (B4modb
, B3modb
, CNST_LIMB(0), b
, bi
);
74 cps
[5] = B4modb
>> cnt
;
76 udiv_rnnd_preinv (B5modb
, B4modb
, CNST_LIMB(0), b
, bi
);
77 cps
[6] = B5modb
>> cnt
;
83 for (i
= 3; i
<= 6; i
++)
93 mpn_mod_1s_4p (mp_srcptr ap
, mp_size_t n
, mp_limb_t b
, const mp_limb_t cps
[7])
95 mp_limb_t rh
, rl
, bi
, ph
, pl
, ch
, cl
, r
;
96 mp_limb_t B1modb
, B2modb
, B3modb
, B4modb
, B5modb
;
113 umul_ppmm_s (ph
, pl
, ap
[n
- 3], B1modb
);
114 add_ssaaaa (ph
, pl
, ph
, pl
, CNST_LIMB(0), ap
[n
- 4]);
115 umul_ppmm_s (ch
, cl
, ap
[n
- 2], B2modb
);
116 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
117 umul_ppmm_s (rh
, rl
, ap
[n
- 1], B3modb
);
118 add_ssaaaa (rh
, rl
, rh
, rl
, ph
, pl
);
132 umul_ppmm_s (ph
, pl
, ap
[n
- 2], B1modb
);
133 add_ssaaaa (ph
, pl
, ph
, pl
, CNST_LIMB(0), ap
[n
- 3]);
134 umul_ppmm_s (rh
, rl
, ap
[n
- 1], B2modb
);
135 add_ssaaaa (rh
, rl
, rh
, rl
, ph
, pl
);
140 for (i
= n
- 4; i
>= 0; i
-= 4)
143 + ap[i+1] * (B mod b) <= (B-1)(b-1)
144 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
145 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
146 + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
147 + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
149 umul_ppmm_s (ph
, pl
, ap
[i
+ 1], B1modb
);
150 add_ssaaaa (ph
, pl
, ph
, pl
, CNST_LIMB(0), ap
[i
+ 0]);
152 umul_ppmm_s (ch
, cl
, ap
[i
+ 2], B2modb
);
153 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
155 umul_ppmm_s (ch
, cl
, ap
[i
+ 3], B3modb
);
156 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
158 umul_ppmm_s (ch
, cl
, rl
, B4modb
);
159 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
161 umul_ppmm_s (rh
, rl
, rh
, B5modb
);
162 add_ssaaaa (rh
, rl
, rh
, rl
, ph
, pl
);
165 umul_ppmm_s (rh
, cl
, rh
, B1modb
);
166 add_ssaaaa (rh
, rl
, rh
, rl
, CNST_LIMB(0), cl
);
173 umul_ppmm (ph
, pl
, ap
[n
- 3], B1modb
);
174 add_ssaaaa (ph
, pl
, ph
, pl
, 0, ap
[n
- 4]);
175 umul_ppmm (ch
, cl
, ap
[n
- 2], B2modb
);
176 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
177 umul_ppmm (rh
, rl
, ap
[n
- 1], B3modb
);
178 add_ssaaaa (rh
, rl
, rh
, rl
, ph
, pl
);
192 umul_ppmm (ph
, pl
, ap
[n
- 2], B1modb
);
193 add_ssaaaa (ph
, pl
, ph
, pl
, 0, ap
[n
- 3]);
194 umul_ppmm (rh
, rl
, ap
[n
- 1], B2modb
);
195 add_ssaaaa (rh
, rl
, rh
, rl
, ph
, pl
);
200 for (i
= n
- 4; i
>= 0; i
-= 4)
203 + ap[i+1] * (B mod b) <= (B-1)(b-1)
204 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
205 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
206 + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
207 + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
209 umul_ppmm (ph
, pl
, ap
[i
+ 1], B1modb
);
210 add_ssaaaa (ph
, pl
, ph
, pl
, 0, ap
[i
+ 0]);
212 umul_ppmm (ch
, cl
, ap
[i
+ 2], B2modb
);
213 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
215 umul_ppmm (ch
, cl
, ap
[i
+ 3], B3modb
);
216 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
218 umul_ppmm (ch
, cl
, rl
, B4modb
);
219 add_ssaaaa (ph
, pl
, ph
, pl
, ch
, cl
);
221 umul_ppmm (rh
, rl
, rh
, B5modb
);
222 add_ssaaaa (rh
, rl
, rh
, rl
, ph
, pl
);
225 umul_ppmm (rh
, cl
, rh
, B1modb
);
226 add_ssaaaa (rh
, rl
, rh
, rl
, 0, cl
);
232 r
= (rh
<< cnt
) | (rl
>> (GMP_LIMB_BITS
- cnt
));
233 udiv_rnnd_preinv (r
, r
, rl
<< cnt
, b
, bi
);