.
[glibc.git] / sysdeps / generic / mul_n.c
blobe37c5d8290996d5610d9cc532c54a53f9e6bfcfa
1 /* __mpn_mul_n -- Multiply two natural numbers of length n.
3 Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
21 #include "gmp.h"
22 #include "gmp-impl.h"
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
28 Argument constraints:
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
36 #endif
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
42 #endif
44 void
45 #if __STDC__
46 ____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
47 #else
48 ____mpn_mul_n ();
49 #endif
51 /* Handle simple cases with traditional multiplication.
53 This is the most critical code of multiplication. All multiplies rely
54 on this, both small and huge. Small ones arrive here immediately. Huge
55 ones arrive here as this is the base case for Karatsuba's recursive
56 algorithm below. */
58 void
59 #if __STDC__
60 ____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
61 #else
62 ____mpn_mul_n_basecase (prodp, up, vp, size)
63 mp_ptr prodp;
64 mp_srcptr up;
65 mp_srcptr vp;
66 mp_size_t size;
67 #endif
69 mp_size_t i;
70 mp_limb cy_limb;
71 mp_limb v_limb;
73 /* Multiply by the first limb in V separately, as the result can be
74 stored (not added) to PROD. We also avoid a loop for zeroing. */
75 v_limb = vp[0];
76 if (v_limb <= 1)
78 if (v_limb == 1)
79 MPN_COPY (prodp, up, size);
80 else
81 MPN_ZERO (prodp, size);
82 cy_limb = 0;
84 else
85 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
87 prodp[size] = cy_limb;
88 prodp++;
90 /* For each iteration in the outer loop, multiply one limb from
91 U with one limb from V, and add it to PROD. */
92 for (i = 1; i < size; i++)
94 v_limb = vp[i];
95 if (v_limb <= 1)
97 cy_limb = 0;
98 if (v_limb == 1)
99 cy_limb = __mpn_add_n (prodp, prodp, up, size);
101 else
102 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
104 prodp[size] = cy_limb;
105 prodp++;
109 void
110 #if __STDC__
111 ____mpn_mul_n (mp_ptr prodp,
112 mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
113 #else
114 ____mpn_mul_n (prodp, up, vp, size, tspace)
115 mp_ptr prodp;
116 mp_srcptr up;
117 mp_srcptr vp;
118 mp_size_t size;
119 mp_ptr tspace;
120 #endif
122 if ((size & 1) != 0)
124 /* The size is odd, the code code below doesn't handle that.
125 Multiply the least significant (size - 1) limbs with a recursive
126 call, and handle the most significant limb of S1 and S2
127 separately. */
128 /* A slightly faster way to do this would be to make the Karatsuba
129 code below behave as if the size were even, and let it check for
130 odd size in the end. I.e., in essence move this code to the end.
131 Doing so would save us a recursive call, and potentially make the
132 stack grow a lot less. */
134 mp_size_t esize = size - 1; /* even size */
135 mp_limb cy_limb;
137 MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
138 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
139 prodp[esize + esize] = cy_limb;
140 cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
142 prodp[esize + size] = cy_limb;
144 else
146 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
148 Split U in two pieces, U1 and U0, such that
149 U = U0 + U1*(B**n),
150 and V in V1 and V0, such that
151 V = V0 + V1*(B**n).
153 UV is then computed recursively using the identity
155 2n n n n
156 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
157 1 1 1 0 0 1 0 0
159 Where B = 2**BITS_PER_MP_LIMB. */
161 mp_size_t hsize = size >> 1;
162 mp_limb cy;
163 int negflg;
165 /*** Product H. ________________ ________________
166 |_____U1 x V1____||____U0 x V0_____| */
167 /* Put result in upper part of PROD and pass low part of TSPACE
168 as new TSPACE. */
169 MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
171 /*** Product M. ________________
172 |_(U1-U0)(V0-V1)_| */
173 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
175 __mpn_sub_n (prodp, up + hsize, up, hsize);
176 negflg = 0;
178 else
180 __mpn_sub_n (prodp, up, up + hsize, hsize);
181 negflg = 1;
183 if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
185 __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
186 negflg ^= 1;
188 else
190 __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
191 /* No change of NEGFLG. */
193 /* Read temporary operands from low part of PROD.
194 Put result in low part of TSPACE using upper part of TSPACE
195 as new TSPACE. */
196 MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
198 /*** Add/copy product H. */
199 MPN_COPY (prodp + hsize, prodp + size, hsize);
200 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
202 /*** Add product M (if NEGFLG M is a negative number). */
203 if (negflg)
204 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
205 else
206 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
208 /*** Product L. ________________ ________________
209 |________________||____U0 x V0_____| */
210 /* Read temporary operands from low part of PROD.
211 Put result in low part of TSPACE using upper part of TSPACE
212 as new TSPACE. */
213 MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
215 /*** Add/copy Product L (twice). */
217 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
218 if (cy)
219 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
221 MPN_COPY (prodp, tspace, hsize);
222 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
223 if (cy)
224 __mpn_add_1 (prodp + size, prodp + size, size, 1);
228 void
229 #if __STDC__
230 ____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
231 #else
232 ____mpn_sqr_n_basecase (prodp, up, size)
233 mp_ptr prodp;
234 mp_srcptr up;
235 mp_size_t size;
236 #endif
238 mp_size_t i;
239 mp_limb cy_limb;
240 mp_limb v_limb;
242 /* Multiply by the first limb in V separately, as the result can be
243 stored (not added) to PROD. We also avoid a loop for zeroing. */
244 v_limb = up[0];
245 if (v_limb <= 1)
247 if (v_limb == 1)
248 MPN_COPY (prodp, up, size);
249 else
250 MPN_ZERO (prodp, size);
251 cy_limb = 0;
253 else
254 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
256 prodp[size] = cy_limb;
257 prodp++;
259 /* For each iteration in the outer loop, multiply one limb from
260 U with one limb from V, and add it to PROD. */
261 for (i = 1; i < size; i++)
263 v_limb = up[i];
264 if (v_limb <= 1)
266 cy_limb = 0;
267 if (v_limb == 1)
268 cy_limb = __mpn_add_n (prodp, prodp, up, size);
270 else
271 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
273 prodp[size] = cy_limb;
274 prodp++;
278 void
279 #if __STDC__
280 ____mpn_sqr_n (mp_ptr prodp,
281 mp_srcptr up, mp_size_t size, mp_ptr tspace)
282 #else
283 ____mpn_sqr_n (prodp, up, size, tspace)
284 mp_ptr prodp;
285 mp_srcptr up;
286 mp_size_t size;
287 mp_ptr tspace;
288 #endif
290 if ((size & 1) != 0)
292 /* The size is odd, the code code below doesn't handle that.
293 Multiply the least significant (size - 1) limbs with a recursive
294 call, and handle the most significant limb of S1 and S2
295 separately. */
296 /* A slightly faster way to do this would be to make the Karatsuba
297 code below behave as if the size were even, and let it check for
298 odd size in the end. I.e., in essence move this code to the end.
299 Doing so would save us a recursive call, and potentially make the
300 stack grow a lot less. */
302 mp_size_t esize = size - 1; /* even size */
303 mp_limb cy_limb;
305 MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
306 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
307 prodp[esize + esize] = cy_limb;
308 cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
310 prodp[esize + size] = cy_limb;
312 else
314 mp_size_t hsize = size >> 1;
315 mp_limb cy;
317 /*** Product H. ________________ ________________
318 |_____U1 x U1____||____U0 x U0_____| */
319 /* Put result in upper part of PROD and pass low part of TSPACE
320 as new TSPACE. */
321 MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
323 /*** Product M. ________________
324 |_(U1-U0)(U0-U1)_| */
325 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
327 __mpn_sub_n (prodp, up + hsize, up, hsize);
329 else
331 __mpn_sub_n (prodp, up, up + hsize, hsize);
334 /* Read temporary operands from low part of PROD.
335 Put result in low part of TSPACE using upper part of TSPACE
336 as new TSPACE. */
337 MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
339 /*** Add/copy product H. */
340 MPN_COPY (prodp + hsize, prodp + size, hsize);
341 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
343 /*** Add product M (if NEGFLG M is a negative number). */
344 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
346 /*** Product L. ________________ ________________
347 |________________||____U0 x U0_____| */
348 /* Read temporary operands from low part of PROD.
349 Put result in low part of TSPACE using upper part of TSPACE
350 as new TSPACE. */
351 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
353 /*** Add/copy Product L (twice). */
355 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
356 if (cy)
357 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
359 MPN_COPY (prodp, tspace, hsize);
360 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
361 if (cy)
362 __mpn_add_1 (prodp + size, prodp + size, size, 1);
366 /* This should be made into an inline function in gmp.h. */
367 inline void
368 #if __STDC__
369 __mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
370 #else
371 __mpn_mul_n (prodp, up, vp, size)
372 mp_ptr prodp;
373 mp_srcptr up;
374 mp_srcptr vp;
375 mp_size_t size;
376 #endif
378 if (up == vp)
380 if (size < KARATSUBA_THRESHOLD)
382 ____mpn_sqr_n_basecase (prodp, up, size);
384 else
386 mp_ptr tspace;
387 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
388 ____mpn_sqr_n (prodp, up, size, tspace);
391 else
393 if (size < KARATSUBA_THRESHOLD)
395 ____mpn_mul_n_basecase (prodp, up, vp, size);
397 else
399 mp_ptr tspace;
400 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
401 ____mpn_mul_n (prodp, up, vp, size, tspace);