beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc64 / mod_1_4.c
blobcc1b9484bccfba6b599a535d2be6ddc6a07aed61
1 /* mpn_mod_1s_4p (ap, n, b, cps)
2 Divide (ap,,n) by b. Return the single-limb remainder.
3 Requires that d < B / 4.
5 Contributed to the GNU project by Torbjorn Granlund.
6 Based on a suggestion by Peter L. Montgomery.
8 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
9 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
10 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
12 Copyright 2008-2010 Free Software Foundation, Inc.
14 This file is part of the GNU MP Library.
16 The GNU MP Library is free software; you can redistribute it and/or modify
17 it under the terms of either:
19 * the GNU Lesser General Public License as published by the Free
20 Software Foundation; either version 3 of the License, or (at your
21 option) any later version.
25 * the GNU General Public License as published by the Free Software
26 Foundation; either version 2 of the License, or (at your option) any
27 later version.
29 or both in parallel, as here.
31 The GNU MP Library is distributed in the hope that it will be useful, but
32 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
34 for more details.
36 You should have received copies of the GNU General Public License and the
37 GNU Lesser General Public License along with the GNU MP Library. If not,
38 see https://www.gnu.org/licenses/. */
40 #include "gmp.h"
41 #include "gmp-impl.h"
42 #include "longlong.h"
44 #include "mpn/sparc64/sparc64.h"
46 void
47 mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
49 mp_limb_t bi;
50 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
51 int cnt;
53 ASSERT (b <= (~(mp_limb_t) 0) / 4);
55 count_leading_zeros (cnt, b);
57 b <<= cnt;
58 invert_limb (bi, b);
60 cps[0] = bi;
61 cps[1] = cnt;
63 B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
64 ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
65 cps[2] = B1modb >> cnt;
67 udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
68 cps[3] = B2modb >> cnt;
70 udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
71 cps[4] = B3modb >> cnt;
73 udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
74 cps[5] = B4modb >> cnt;
76 udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
77 cps[6] = B5modb >> cnt;
79 #if WANT_ASSERT
81 int i;
82 b = cps[2];
83 for (i = 3; i <= 6; i++)
85 b += cps[i];
86 ASSERT (b >= cps[i]);
89 #endif
92 mp_limb_t
93 mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
95 mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
96 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
97 mp_size_t i;
98 int cnt;
100 ASSERT (n >= 1);
102 B1modb = cps[2];
103 B2modb = cps[3];
104 B3modb = cps[4];
105 B4modb = cps[5];
106 B5modb = cps[6];
108 if ((b >> 32) == 0)
110 switch (n & 3)
112 case 0:
113 umul_ppmm_s (ph, pl, ap[n - 3], B1modb);
114 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
115 umul_ppmm_s (ch, cl, ap[n - 2], B2modb);
116 add_ssaaaa (ph, pl, ph, pl, ch, cl);
117 umul_ppmm_s (rh, rl, ap[n - 1], B3modb);
118 add_ssaaaa (rh, rl, rh, rl, ph, pl);
119 n -= 4;
120 break;
121 case 1:
122 rh = 0;
123 rl = ap[n - 1];
124 n -= 1;
125 break;
126 case 2:
127 rh = ap[n - 1];
128 rl = ap[n - 2];
129 n -= 2;
130 break;
131 case 3:
132 umul_ppmm_s (ph, pl, ap[n - 2], B1modb);
133 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
134 umul_ppmm_s (rh, rl, ap[n - 1], B2modb);
135 add_ssaaaa (rh, rl, rh, rl, ph, pl);
136 n -= 3;
137 break;
140 for (i = n - 4; i >= 0; i -= 4)
142 /* rr = ap[i] < B
143 + ap[i+1] * (B mod b) <= (B-1)(b-1)
144 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
145 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
146 + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
147 + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
149 umul_ppmm_s (ph, pl, ap[i + 1], B1modb);
150 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
152 umul_ppmm_s (ch, cl, ap[i + 2], B2modb);
153 add_ssaaaa (ph, pl, ph, pl, ch, cl);
155 umul_ppmm_s (ch, cl, ap[i + 3], B3modb);
156 add_ssaaaa (ph, pl, ph, pl, ch, cl);
158 umul_ppmm_s (ch, cl, rl, B4modb);
159 add_ssaaaa (ph, pl, ph, pl, ch, cl);
161 umul_ppmm_s (rh, rl, rh, B5modb);
162 add_ssaaaa (rh, rl, rh, rl, ph, pl);
165 umul_ppmm_s (rh, cl, rh, B1modb);
166 add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
168 else
170 switch (n & 3)
172 case 0:
173 umul_ppmm (ph, pl, ap[n - 3], B1modb);
174 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
175 umul_ppmm (ch, cl, ap[n - 2], B2modb);
176 add_ssaaaa (ph, pl, ph, pl, ch, cl);
177 umul_ppmm (rh, rl, ap[n - 1], B3modb);
178 add_ssaaaa (rh, rl, rh, rl, ph, pl);
179 n -= 4;
180 break;
181 case 1:
182 rh = 0;
183 rl = ap[n - 1];
184 n -= 1;
185 break;
186 case 2:
187 rh = ap[n - 1];
188 rl = ap[n - 2];
189 n -= 2;
190 break;
191 case 3:
192 umul_ppmm (ph, pl, ap[n - 2], B1modb);
193 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
194 umul_ppmm (rh, rl, ap[n - 1], B2modb);
195 add_ssaaaa (rh, rl, rh, rl, ph, pl);
196 n -= 3;
197 break;
200 for (i = n - 4; i >= 0; i -= 4)
202 /* rr = ap[i] < B
203 + ap[i+1] * (B mod b) <= (B-1)(b-1)
204 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
205 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
206 + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
207 + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
209 umul_ppmm (ph, pl, ap[i + 1], B1modb);
210 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
212 umul_ppmm (ch, cl, ap[i + 2], B2modb);
213 add_ssaaaa (ph, pl, ph, pl, ch, cl);
215 umul_ppmm (ch, cl, ap[i + 3], B3modb);
216 add_ssaaaa (ph, pl, ph, pl, ch, cl);
218 umul_ppmm (ch, cl, rl, B4modb);
219 add_ssaaaa (ph, pl, ph, pl, ch, cl);
221 umul_ppmm (rh, rl, rh, B5modb);
222 add_ssaaaa (rh, rl, rh, rl, ph, pl);
225 umul_ppmm (rh, cl, rh, B1modb);
226 add_ssaaaa (rh, rl, rh, rl, 0, cl);
229 bi = cps[0];
230 cnt = cps[1];
232 r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
233 udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
235 return r >> cnt;