initial import
[glibc.git] / sysdeps / generic / mul_n.c
blob7900988143c79db2927bf460de90531f63394bc1
1 /* __mpn_mul_n -- Multiply two natural numbers of length n.
3 Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
21 #include "gmp.h"
22 #include "gmp-impl.h"
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
28 Argument constraints:
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
36 #endif
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
42 #endif
44 void
45 #if __STDC__
46 ____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
47 #else
48 ____mpn_mul_n ();
49 #endif
51 /* Handle simple cases with traditional multiplication.
53 This is the most critical code of multiplication. All multiplies rely
54 on this, both small and huge. Small ones arrive here immediately. Huge
55 ones arrive here as this is the base case for Karatsuba's recursive
56 algorithm below. */
58 void
59 #if __STDC__
60 ____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
61 #else
62 ____mpn_mul_n_basecase (prodp, up, vp, size)
63 mp_ptr prodp;
64 mp_srcptr up;
65 mp_srcptr vp;
66 mp_size_t size;
67 #endif
69 mp_size_t i;
70 mp_limb cy_limb;
71 mp_limb v_limb;
73 /* Multiply by the first limb in V separately, as the result can be
74 stored (not added) to PROD. We also avoid a loop for zeroing. */
75 v_limb = vp[0];
76 if (v_limb <= 1)
78 if (v_limb == 1)
79 MPN_COPY (prodp, up, size);
80 else
81 MPN_ZERO (prodp, size);
82 cy_limb = 0;
84 else
85 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
87 prodp[size] = cy_limb;
88 prodp++;
90 /* For each iteration in the outer loop, multiply one limb from
91 U with one limb from V, and add it to PROD. */
92 for (i = 1; i < size; i++)
94 v_limb = vp[i];
95 if (v_limb <= 1)
97 cy_limb = 0;
98 if (v_limb == 1)
99 cy_limb = __mpn_add_n (prodp, prodp, up, size);
101 else
102 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
104 prodp[size] = cy_limb;
105 prodp++;
109 void
110 #if __STDC__
111 ____mpn_mul_n (mp_ptr prodp,
112 mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
113 #else
114 ____mpn_mul_n (prodp, up, vp, size, tspace)
115 mp_ptr prodp;
116 mp_srcptr up;
117 mp_srcptr vp;
118 mp_size_t size;
119 mp_ptr tspace;
120 #endif
122 if ((size & 1) != 0)
124 /* The size is odd, the code code below doesn't handle that.
125 Multiply the least significant (size - 1) limbs with a recursive
126 call, and handle the most significant limb of S1 and S2
127 separately. */
128 /* A slightly faster way to do this would be to make the Karatsuba
129 code below behave as if the size were even, and let it check for
130 odd size in the end. I.e., in essence move this code to the end.
131 Doing so would save us a recursive call, and potentially make the
132 stack grow a lot less. */
134 mp_size_t esize = size - 1; /* even size */
135 mp_limb cy_limb;
137 MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
138 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
139 prodp[esize + esize] = cy_limb;
140 cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
142 prodp[esize + size] = cy_limb;
144 else
146 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
148 Split U in two pieces, U1 and U0, such that
149 U = U0 + U1*(B**n),
150 and V in V1 and V0, such that
151 V = V0 + V1*(B**n).
153 UV is then computed recursively using the identity
155 2n n n n
156 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
157 1 1 1 0 0 1 0 0
159 Where B = 2**BITS_PER_MP_LIMB. */
161 mp_size_t hsize = size >> 1;
162 mp_limb cy;
163 int negflg;
165 /*** Product H. ________________ ________________
166 |_____U1 x V1____||____U0 x V0_____| */
167 /* Put result in upper part of PROD and pass low part of TSPACE
168 as new TSPACE. */
169 MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
171 /*** Product M. ________________
172 |_(U1-U0)(V0-V1)_| */
173 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
175 __mpn_sub_n (prodp, up + hsize, up, hsize);
176 negflg = 0;
178 else
180 __mpn_sub_n (prodp, up, up + hsize, hsize);
181 negflg = 1;
183 if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
185 __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
186 negflg ^= 1;
188 else
190 __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
191 /* No change of NEGFLG. */
193 /* Read temporary operands from low part of PROD.
194 Put result in low part of TSPACE using upper part of TSPACE
195 as new TSPACE. */
196 MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
198 /*** Add/copy product H. */
199 MPN_COPY (prodp + hsize, prodp + size, hsize);
200 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
202 /*** Add product M (if NEGFLG M is a negative number). */
203 if (negflg)
204 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
205 else
206 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
208 /*** Product L. ________________ ________________
209 |________________||____U0 x V0_____| */
210 /* Read temporary operands from low part of PROD.
211 Put result in low part of TSPACE using upper part of TSPACE
212 as new TSPACE. */
213 MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
215 /*** Add/copy Product L (twice). */
217 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
218 if (cy)
220 if (cy > 0)
221 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
222 else
224 __mpn_sub_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
225 abort ();
229 MPN_COPY (prodp, tspace, hsize);
230 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
231 if (cy)
232 __mpn_add_1 (prodp + size, prodp + size, size, 1);
236 void
237 #if __STDC__
238 ____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
239 #else
240 ____mpn_sqr_n_basecase (prodp, up, size)
241 mp_ptr prodp;
242 mp_srcptr up;
243 mp_size_t size;
244 #endif
246 mp_size_t i;
247 mp_limb cy_limb;
248 mp_limb v_limb;
250 /* Multiply by the first limb in V separately, as the result can be
251 stored (not added) to PROD. We also avoid a loop for zeroing. */
252 v_limb = up[0];
253 if (v_limb <= 1)
255 if (v_limb == 1)
256 MPN_COPY (prodp, up, size);
257 else
258 MPN_ZERO (prodp, size);
259 cy_limb = 0;
261 else
262 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
264 prodp[size] = cy_limb;
265 prodp++;
267 /* For each iteration in the outer loop, multiply one limb from
268 U with one limb from V, and add it to PROD. */
269 for (i = 1; i < size; i++)
271 v_limb = up[i];
272 if (v_limb <= 1)
274 cy_limb = 0;
275 if (v_limb == 1)
276 cy_limb = __mpn_add_n (prodp, prodp, up, size);
278 else
279 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
281 prodp[size] = cy_limb;
282 prodp++;
286 void
287 #if __STDC__
288 ____mpn_sqr_n (mp_ptr prodp,
289 mp_srcptr up, mp_size_t size, mp_ptr tspace)
290 #else
291 ____mpn_sqr_n (prodp, up, size, tspace)
292 mp_ptr prodp;
293 mp_srcptr up;
294 mp_size_t size;
295 mp_ptr tspace;
296 #endif
298 if ((size & 1) != 0)
300 /* The size is odd, the code code below doesn't handle that.
301 Multiply the least significant (size - 1) limbs with a recursive
302 call, and handle the most significant limb of S1 and S2
303 separately. */
304 /* A slightly faster way to do this would be to make the Karatsuba
305 code below behave as if the size were even, and let it check for
306 odd size in the end. I.e., in essence move this code to the end.
307 Doing so would save us a recursive call, and potentially make the
308 stack grow a lot less. */
310 mp_size_t esize = size - 1; /* even size */
311 mp_limb cy_limb;
313 MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
314 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
315 prodp[esize + esize] = cy_limb;
316 cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
318 prodp[esize + size] = cy_limb;
320 else
322 mp_size_t hsize = size >> 1;
323 mp_limb cy;
325 /*** Product H. ________________ ________________
326 |_____U1 x U1____||____U0 x U0_____| */
327 /* Put result in upper part of PROD and pass low part of TSPACE
328 as new TSPACE. */
329 MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
331 /*** Product M. ________________
332 |_(U1-U0)(U0-U1)_| */
333 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
335 __mpn_sub_n (prodp, up + hsize, up, hsize);
337 else
339 __mpn_sub_n (prodp, up, up + hsize, hsize);
342 /* Read temporary operands from low part of PROD.
343 Put result in low part of TSPACE using upper part of TSPACE
344 as new TSPACE. */
345 MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
347 /*** Add/copy product H. */
348 MPN_COPY (prodp + hsize, prodp + size, hsize);
349 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
351 /*** Add product M (if NEGFLG M is a negative number). */
352 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
354 /*** Product L. ________________ ________________
355 |________________||____U0 x U0_____| */
356 /* Read temporary operands from low part of PROD.
357 Put result in low part of TSPACE using upper part of TSPACE
358 as new TSPACE. */
359 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
361 /*** Add/copy Product L (twice). */
363 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
364 if (cy)
366 if (cy > 0)
367 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
368 else
370 __mpn_sub_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
371 abort ();
375 MPN_COPY (prodp, tspace, hsize);
376 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
377 if (cy)
378 __mpn_add_1 (prodp + size, prodp + size, size, 1);
382 /* This should be made into an inline function in gmp.h. */
383 inline void
384 #if __STDC__
385 __mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
386 #else
387 __mpn_mul_n (prodp, up, vp, size)
388 mp_ptr prodp;
389 mp_srcptr up;
390 mp_srcptr vp;
391 mp_size_t size;
392 #endif
394 if (up == vp)
396 if (size < KARATSUBA_THRESHOLD)
398 ____mpn_sqr_n_basecase (prodp, up, size);
400 else
402 mp_ptr tspace;
403 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
404 ____mpn_sqr_n (prodp, up, size, tspace);
407 else
409 if (size < KARATSUBA_THRESHOLD)
411 ____mpn_mul_n_basecase (prodp, up, vp, size);
413 else
415 mp_ptr tspace;
416 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
417 ____mpn_mul_n (prodp, up, vp, size, tspace);