1 /* mulmod_bnm1.c -- multiplication mod B^n-1.
3 Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
27 or both in parallel, as here.
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
43 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
44 mod B^rn - 1, and values are semi-normalised; zero is represented
45 as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp.
48 mpn_bc_mulmod_bnm1 (mp_ptr rp
, mp_srcptr ap
, mp_srcptr bp
, mp_size_t rn
,
55 mpn_mul_n (tp
, ap
, bp
, rn
);
56 cy
= mpn_add_n (rp
, tp
, tp
+ rn
, rn
);
57 /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
58 * be no overflow when adding in the carry. */
59 MPN_INCR_U (rp
, rn
, cy
);
63 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
64 semi-normalised representation, computation is mod B^rn + 1. Needs
65 a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
66 Output is normalised. */
68 mpn_bc_mulmod_bnp1 (mp_ptr rp
, mp_srcptr ap
, mp_srcptr bp
, mp_size_t rn
,
75 mpn_mul_n (tp
, ap
, bp
, rn
+ 1);
76 ASSERT (tp
[2*rn
+1] == 0);
77 ASSERT (tp
[2*rn
] < GMP_NUMB_MAX
);
78 cy
= tp
[2*rn
] + mpn_sub_n (rp
, tp
, tp
+rn
, rn
);
80 MPN_INCR_U (rp
, rn
+1, cy
);
84 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
86 * The result is expected to be ZERO if and only if one of the operand
87 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
88 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
89 * combine results and obtain a natural number when one knows in
90 * advance that the final value is less than (B^rn-1).
91 * Moreover it should not be a problem if mulmod_bnm1 is used to
92 * compute the full product with an+bn <= rn, because this condition
93 * implies (B^an-1)(B^bn-1) < (B^rn-1) .
95 * Requires 0 < bn <= an <= rn and an + bn > rn/2
96 * Scratch need: rn + (need for recursive call OR rn + 4). This gives
98 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
101 mpn_mulmod_bnm1 (mp_ptr rp
, mp_size_t rn
, mp_srcptr ap
, mp_size_t an
, mp_srcptr bp
, mp_size_t bn
, mp_ptr tp
)
107 if ((rn
& 1) != 0 || BELOW_THRESHOLD (rn
, MULMOD_BNM1_THRESHOLD
))
109 if (UNLIKELY (bn
< rn
))
111 if (UNLIKELY (an
+ bn
<= rn
))
113 mpn_mul (rp
, ap
, an
, bp
, bn
);
118 mpn_mul (tp
, ap
, an
, bp
, bn
);
119 cy
= mpn_add (rp
, tp
, rn
, tp
+ rn
, an
+ bn
- rn
);
120 MPN_INCR_U (rp
, rn
, cy
);
124 mpn_bc_mulmod_bnm1 (rp
, ap
, bp
, rn
, tp
);
134 /* We need at least an + bn >= n, to be able to fit one of the
135 recursive products at rp. Requiring strict inequality makes
136 the code slightly simpler. If desired, we could avoid this
137 restriction by initially halving rn as long as rn is even and
140 ASSERT (an
+ bn
> n
);
142 /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
145 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
153 #define xp tp /* 2n + 2 */
154 /* am1 maybe in {xp, n} */
155 /* bm1 maybe in {xp + n, n} */
156 #define sp1 (tp + 2*n + 2)
157 /* ap1 maybe in {sp1, n + 1} */
158 /* bp1 maybe in {sp1 + n + 1, n + 1} */
170 cy
= mpn_add (xp
, a0
, n
, a1
, an
- n
);
171 MPN_INCR_U (xp
, n
, cy
);
177 cy
= mpn_add (so
, b0
, n
, b1
, bn
- n
);
178 MPN_INCR_U (so
, n
, cy
);
190 mpn_mulmod_bnm1 (rp
, n
, am1
, anm
, bm1
, bnm
, so
);
200 if (LIKELY (an
> n
)) {
202 cy
= mpn_sub (sp1
, a0
, n
, a1
, an
- n
);
204 MPN_INCR_U (sp1
, n
+ 1, cy
);
206 if (LIKELY (bn
> n
)) {
208 cy
= mpn_sub (sp1
+ n
+ 1, b0
, n
, b1
, bn
- n
);
210 MPN_INCR_U (sp1
+ n
+ 1, n
+ 1, cy
);
218 if (BELOW_THRESHOLD (n
, MUL_FFT_MODF_THRESHOLD
))
223 k
= mpn_fft_best_k (n
, 0);
225 while (n
& mask
) {k
--; mask
>>=1;};
227 if (k
>= FFT_FIRST_K
)
228 xp
[n
] = mpn_mul_fft (xp
, n
, ap1
, anp
, bp1
, bnp
, k
);
229 else if (UNLIKELY (bp1
== b0
))
231 ASSERT (anp
+ bnp
<= 2*n
+1);
232 ASSERT (anp
+ bnp
> n
);
234 mpn_mul (xp
, ap1
, anp
, bp1
, bnp
);
236 ASSERT (anp
<= n
|| xp
[2*n
]==0);
238 cy
= mpn_sub (xp
, xp
, n
, xp
+ n
, anp
);
240 MPN_INCR_U (xp
, n
+1, cy
);
243 mpn_bc_mulmod_bnp1 (xp
, ap1
, bp1
, n
, xp
);
246 /* Here the CRT recomposition begins.
248 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
249 Division by 2 is a bitwise rotation.
251 Assumes xp normalised mod (B^n+1).
253 The residue class [0] is represented by [B^n-1]; except when
257 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
258 #if HAVE_NATIVE_mpn_rsh1add_nc
259 cy
= mpn_rsh1add_nc(rp
, rp
, xp
, n
, xp
[n
]); /* B^n = 1 */
260 hi
= cy
<< (GMP_NUMB_BITS
- 1);
262 /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
263 overflows, i.e. a further increment will not overflow again. */
265 cy
= xp
[n
] + mpn_rsh1add_n(rp
, rp
, xp
, n
); /* B^n = 1 */
266 hi
= (cy
<<(GMP_NUMB_BITS
-1))&GMP_NUMB_MASK
; /* (cy&1) << ... */
268 /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
269 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
271 #if GMP_NAIL_BITS == 0
272 add_ssaaaa(cy
, rp
[n
-1], cy
, rp
[n
-1], 0, hi
);
274 cy
+= (hi
& rp
[n
-1]) >> (GMP_NUMB_BITS
-1);
277 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
278 #if HAVE_NATIVE_mpn_add_nc
279 cy
= mpn_add_nc(rp
, rp
, xp
, n
, xp
[n
]);
281 cy
= xp
[n
] + mpn_add_n(rp
, rp
, xp
, n
); /* xp[n] == 1 implies {xp,n} == ZERO */
284 mpn_rshift(rp
, rp
, n
, 1);
286 hi
= (cy
<<(GMP_NUMB_BITS
-1))&GMP_NUMB_MASK
; /* (cy&1) << ... */
288 /* We can have cy != 0 only if hi = 0... */
289 ASSERT ((rp
[n
-1] & GMP_NUMB_HIGHBIT
) == 0);
291 /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
294 /* Next increment can not overflow, read the previous comments about cy. */
295 ASSERT ((cy
== 0) || ((rp
[n
-1] & GMP_NUMB_HIGHBIT
) == 0));
296 MPN_INCR_U(rp
, n
, cy
);
298 /* Compute the highest half:
299 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
301 if (UNLIKELY (an
+ bn
< rn
))
303 /* Note that in this case, the only way the result can equal
304 zero mod B^{rn} - 1 is if one of the inputs is zero, and
305 then the output of both the recursive calls and this CRT
306 reconstruction is zero, not B^{rn} - 1. Which is good,
307 since the latter representation doesn't fit in the output
309 cy
= mpn_sub_n (rp
+ n
, rp
, xp
, an
+ bn
- n
);
311 /* FIXME: This subtraction of the high parts is not really
312 necessary, we do it to get the carry out, and for sanity
314 cy
= xp
[n
] + mpn_sub_nc (xp
+ an
+ bn
- n
, rp
+ an
+ bn
- n
,
315 xp
+ an
+ bn
- n
, rn
- (an
+ bn
), cy
);
316 ASSERT (an
+ bn
== rn
- 1 ||
317 mpn_zero_p (xp
+ an
+ bn
- n
+ 1, rn
- 1 - (an
+ bn
)));
318 cy
= mpn_sub_1 (rp
, rp
, an
+ bn
, cy
);
319 ASSERT (cy
== (xp
+ an
+ bn
- n
)[0]);
323 cy
= xp
[n
] + mpn_sub_n (rp
+ n
, rp
, xp
, n
);
324 /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
325 DECR will affect _at most_ the lowest n limbs. */
326 MPN_DECR_U (rp
, 2*n
, cy
);
338 mpn_mulmod_bnm1_next_size (mp_size_t n
)
342 if (BELOW_THRESHOLD (n
, MULMOD_BNM1_THRESHOLD
))
344 if (BELOW_THRESHOLD (n
, 4 * (MULMOD_BNM1_THRESHOLD
- 1) + 1))
345 return (n
+ (2-1)) & (-2);
346 if (BELOW_THRESHOLD (n
, 8 * (MULMOD_BNM1_THRESHOLD
- 1) + 1))
347 return (n
+ (4-1)) & (-4);
351 if (BELOW_THRESHOLD (nh
, MUL_FFT_MODF_THRESHOLD
))
352 return (n
+ (8-1)) & (-8);
354 return 2 * mpn_fft_next_size (nh
, mpn_fft_best_k (nh
, 0));