1 /* __mpn_mul_n -- Multiply two natural numbers of length n.
3 Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
46 ____mpn_mul_n (mp_ptr
, mp_srcptr
, mp_srcptr
, mp_size_t
, mp_ptr
);
51 /* Handle simple cases with traditional multiplication.
53 This is the most critical code of multiplication. All multiplies rely
54 on this, both small and huge. Small ones arrive here immediately. Huge
55 ones arrive here as this is the base case for Karatsuba's recursive
60 ____mpn_mul_n_basecase (mp_ptr prodp
, mp_srcptr up
, mp_srcptr vp
, mp_size_t size
)
62 ____mpn_mul_n_basecase (prodp
, up
, vp
, size
)
73 /* Multiply by the first limb in V separately, as the result can be
74 stored (not added) to PROD. We also avoid a loop for zeroing. */
79 MPN_COPY (prodp
, up
, size
);
81 MPN_ZERO (prodp
, size
);
85 cy_limb
= __mpn_mul_1 (prodp
, up
, size
, v_limb
);
87 prodp
[size
] = cy_limb
;
90 /* For each iteration in the outer loop, multiply one limb from
91 U with one limb from V, and add it to PROD. */
92 for (i
= 1; i
< size
; i
++)
99 cy_limb
= __mpn_add_n (prodp
, prodp
, up
, size
);
102 cy_limb
= __mpn_addmul_1 (prodp
, up
, size
, v_limb
);
104 prodp
[size
] = cy_limb
;
111 ____mpn_mul_n (mp_ptr prodp
,
112 mp_srcptr up
, mp_srcptr vp
, mp_size_t size
, mp_ptr tspace
)
114 ____mpn_mul_n (prodp
, up
, vp
, size
, tspace
)
124 /* The size is odd, the code code below doesn't handle that.
125 Multiply the least significant (size - 1) limbs with a recursive
126 call, and handle the most significant limb of S1 and S2
128 /* A slightly faster way to do this would be to make the Karatsuba
129 code below behave as if the size were even, and let it check for
130 odd size in the end. I.e., in essence move this code to the end.
131 Doing so would save us a recursive call, and potentially make the
132 stack grow a lot less. */
134 mp_size_t esize
= size
- 1; /* even size */
137 MPN_MUL_N_RECURSE (prodp
, up
, vp
, esize
, tspace
);
138 cy_limb
= __mpn_addmul_1 (prodp
+ esize
, up
, esize
, vp
[esize
]);
139 prodp
[esize
+ esize
] = cy_limb
;
140 cy_limb
= __mpn_addmul_1 (prodp
+ esize
, vp
, size
, up
[esize
]);
142 prodp
[esize
+ size
] = cy_limb
;
146 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
148 Split U in two pieces, U1 and U0, such that
150 and V in V1 and V0, such that
153 UV is then computed recursively using the identity
156 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
159 Where B = 2**BITS_PER_MP_LIMB. */
161 mp_size_t hsize
= size
>> 1;
165 /*** Product H. ________________ ________________
166 |_____U1 x V1____||____U0 x V0_____| */
167 /* Put result in upper part of PROD and pass low part of TSPACE
169 MPN_MUL_N_RECURSE (prodp
+ size
, up
+ hsize
, vp
+ hsize
, hsize
, tspace
);
171 /*** Product M. ________________
172 |_(U1-U0)(V0-V1)_| */
173 if (__mpn_cmp (up
+ hsize
, up
, hsize
) >= 0)
175 __mpn_sub_n (prodp
, up
+ hsize
, up
, hsize
);
180 __mpn_sub_n (prodp
, up
, up
+ hsize
, hsize
);
183 if (__mpn_cmp (vp
+ hsize
, vp
, hsize
) >= 0)
185 __mpn_sub_n (prodp
+ hsize
, vp
+ hsize
, vp
, hsize
);
190 __mpn_sub_n (prodp
+ hsize
, vp
, vp
+ hsize
, hsize
);
191 /* No change of NEGFLG. */
193 /* Read temporary operands from low part of PROD.
194 Put result in low part of TSPACE using upper part of TSPACE
196 MPN_MUL_N_RECURSE (tspace
, prodp
, prodp
+ hsize
, hsize
, tspace
+ size
);
198 /*** Add/copy product H. */
199 MPN_COPY (prodp
+ hsize
, prodp
+ size
, hsize
);
200 cy
= __mpn_add_n (prodp
+ size
, prodp
+ size
, prodp
+ size
+ hsize
, hsize
);
202 /*** Add product M (if NEGFLG M is a negative number). */
204 cy
-= __mpn_sub_n (prodp
+ hsize
, prodp
+ hsize
, tspace
, size
);
206 cy
+= __mpn_add_n (prodp
+ hsize
, prodp
+ hsize
, tspace
, size
);
208 /*** Product L. ________________ ________________
209 |________________||____U0 x V0_____| */
210 /* Read temporary operands from low part of PROD.
211 Put result in low part of TSPACE using upper part of TSPACE
213 MPN_MUL_N_RECURSE (tspace
, up
, vp
, hsize
, tspace
+ size
);
215 /*** Add/copy Product L (twice). */
217 cy
+= __mpn_add_n (prodp
+ hsize
, prodp
+ hsize
, tspace
, size
);
219 __mpn_add_1 (prodp
+ hsize
+ size
, prodp
+ hsize
+ size
, hsize
, cy
);
221 MPN_COPY (prodp
, tspace
, hsize
);
222 cy
= __mpn_add_n (prodp
+ hsize
, prodp
+ hsize
, tspace
+ hsize
, hsize
);
224 __mpn_add_1 (prodp
+ size
, prodp
+ size
, size
, 1);
230 ____mpn_sqr_n_basecase (mp_ptr prodp
, mp_srcptr up
, mp_size_t size
)
232 ____mpn_sqr_n_basecase (prodp
, up
, size
)
242 /* Multiply by the first limb in V separately, as the result can be
243 stored (not added) to PROD. We also avoid a loop for zeroing. */
248 MPN_COPY (prodp
, up
, size
);
250 MPN_ZERO (prodp
, size
);
254 cy_limb
= __mpn_mul_1 (prodp
, up
, size
, v_limb
);
256 prodp
[size
] = cy_limb
;
259 /* For each iteration in the outer loop, multiply one limb from
260 U with one limb from V, and add it to PROD. */
261 for (i
= 1; i
< size
; i
++)
268 cy_limb
= __mpn_add_n (prodp
, prodp
, up
, size
);
271 cy_limb
= __mpn_addmul_1 (prodp
, up
, size
, v_limb
);
273 prodp
[size
] = cy_limb
;
280 ____mpn_sqr_n (mp_ptr prodp
,
281 mp_srcptr up
, mp_size_t size
, mp_ptr tspace
)
283 ____mpn_sqr_n (prodp
, up
, size
, tspace
)
292 /* The size is odd, the code code below doesn't handle that.
293 Multiply the least significant (size - 1) limbs with a recursive
294 call, and handle the most significant limb of S1 and S2
296 /* A slightly faster way to do this would be to make the Karatsuba
297 code below behave as if the size were even, and let it check for
298 odd size in the end. I.e., in essence move this code to the end.
299 Doing so would save us a recursive call, and potentially make the
300 stack grow a lot less. */
302 mp_size_t esize
= size
- 1; /* even size */
305 MPN_SQR_N_RECURSE (prodp
, up
, esize
, tspace
);
306 cy_limb
= __mpn_addmul_1 (prodp
+ esize
, up
, esize
, up
[esize
]);
307 prodp
[esize
+ esize
] = cy_limb
;
308 cy_limb
= __mpn_addmul_1 (prodp
+ esize
, up
, size
, up
[esize
]);
310 prodp
[esize
+ size
] = cy_limb
;
314 mp_size_t hsize
= size
>> 1;
317 /*** Product H. ________________ ________________
318 |_____U1 x U1____||____U0 x U0_____| */
319 /* Put result in upper part of PROD and pass low part of TSPACE
321 MPN_SQR_N_RECURSE (prodp
+ size
, up
+ hsize
, hsize
, tspace
);
323 /*** Product M. ________________
324 |_(U1-U0)(U0-U1)_| */
325 if (__mpn_cmp (up
+ hsize
, up
, hsize
) >= 0)
327 __mpn_sub_n (prodp
, up
+ hsize
, up
, hsize
);
331 __mpn_sub_n (prodp
, up
, up
+ hsize
, hsize
);
334 /* Read temporary operands from low part of PROD.
335 Put result in low part of TSPACE using upper part of TSPACE
337 MPN_SQR_N_RECURSE (tspace
, prodp
, hsize
, tspace
+ size
);
339 /*** Add/copy product H. */
340 MPN_COPY (prodp
+ hsize
, prodp
+ size
, hsize
);
341 cy
= __mpn_add_n (prodp
+ size
, prodp
+ size
, prodp
+ size
+ hsize
, hsize
);
343 /*** Add product M (if NEGFLG M is a negative number). */
344 cy
-= __mpn_sub_n (prodp
+ hsize
, prodp
+ hsize
, tspace
, size
);
346 /*** Product L. ________________ ________________
347 |________________||____U0 x U0_____| */
348 /* Read temporary operands from low part of PROD.
349 Put result in low part of TSPACE using upper part of TSPACE
351 MPN_SQR_N_RECURSE (tspace
, up
, hsize
, tspace
+ size
);
353 /*** Add/copy Product L (twice). */
355 cy
+= __mpn_add_n (prodp
+ hsize
, prodp
+ hsize
, tspace
, size
);
357 __mpn_add_1 (prodp
+ hsize
+ size
, prodp
+ hsize
+ size
, hsize
, cy
);
359 MPN_COPY (prodp
, tspace
, hsize
);
360 cy
= __mpn_add_n (prodp
+ hsize
, prodp
+ hsize
, tspace
+ hsize
, hsize
);
362 __mpn_add_1 (prodp
+ size
, prodp
+ size
, size
, 1);
366 /* This should be made into an inline function in gmp.h. */
369 __mpn_mul_n (mp_ptr prodp
, mp_srcptr up
, mp_srcptr vp
, mp_size_t size
)
371 __mpn_mul_n (prodp
, up
, vp
, size
)
380 if (size
< KARATSUBA_THRESHOLD
)
382 ____mpn_sqr_n_basecase (prodp
, up
, size
);
387 tspace
= (mp_ptr
) alloca (2 * size
* BYTES_PER_MP_LIMB
);
388 ____mpn_sqr_n (prodp
, up
, size
, tspace
);
393 if (size
< KARATSUBA_THRESHOLD
)
395 ____mpn_mul_n_basecase (prodp
, up
, vp
, size
);
400 tspace
= (mp_ptr
) alloca (2 * size
* BYTES_PER_MP_LIMB
);
401 ____mpn_mul_n (prodp
, up
, vp
, size
, tspace
);