1 /* mpn_toom53_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 5/3
2 times as large as bn. Or more accurately, (4/3)bn < an < (5/2)bn.
4 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
6 The idea of applying toom to unbalanced multiplication is due to Marco
7 Bodrato and Alberto Zanoni.
9 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
10 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
11 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
13 Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc.
15 This file is part of the GNU MP Library.
17 The GNU MP Library is free software; you can redistribute it and/or modify
18 it under the terms of either:
20 * the GNU Lesser General Public License as published by the Free
21 Software Foundation; either version 3 of the License, or (at your
22 option) any later version.
26 * the GNU General Public License as published by the Free Software
27 Foundation; either version 2 of the License, or (at your option) any
30 or both in parallel, as here.
32 The GNU MP Library is distributed in the hope that it will be useful, but
33 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
34 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
37 You should have received copies of the GNU General Public License and the
38 GNU Lesser General Public License along with the GNU MP Library. If not,
39 see https://www.gnu.org/licenses/. */
45 /* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
47 <-s-><--n--><--n--><--n--><--n-->
48 ___ ______ ______ ______ ______
49 |a4_|___a3_|___a2_|___a1_|___a0_|
53 v0 = a0 * b0 # A(0)*B(0)
54 v1 = ( a0+ a1+ a2+ a3+ a4)*( b0+ b1+ b2) # A(1)*B(1) ah <= 4 bh <= 2
55 vm1 = ( a0- a1+ a2- a3+ a4)*( b0- b1+ b2) # A(-1)*B(-1) |ah| <= 2 bh <= 1
56 v2 = ( a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) # A(2)*B(2) ah <= 30 bh <= 6
57 vm2 = ( a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) # A(2)*B(2) -9<=ah<=20 -1<=bh<=4
58 vh = (16a0+8a1+4a2+2a3+ a4)*(4b0+2b1+ b2) # A(1/2)*B(1/2) ah <= 30 bh <= 6
59 vinf= a4 * b2 # A(inf)*B(inf)
63 mpn_toom53_mul (mp_ptr pp
,
64 mp_srcptr ap
, mp_size_t an
,
65 mp_srcptr bp
, mp_size_t bn
,
71 mp_ptr as1
, asm1
, as2
, asm2
, ash
;
72 mp_ptr bs1
, bsm1
, bs2
, bsm2
, bsh
;
74 enum toom7_flags flags
;
86 n
= 1 + (3 * an
>= 5 * bn
? (an
- 1) / (size_t) 5 : (bn
- 1) / (size_t) 3);
91 ASSERT (0 < s
&& s
<= n
);
92 ASSERT (0 < t
&& t
<= n
);
96 tmp
= TMP_ALLOC_LIMBS (10 * (n
+ 1));
97 as1
= tmp
; tmp
+= n
+ 1;
98 asm1
= tmp
; tmp
+= n
+ 1;
99 as2
= tmp
; tmp
+= n
+ 1;
100 asm2
= tmp
; tmp
+= n
+ 1;
101 ash
= tmp
; tmp
+= n
+ 1;
102 bs1
= tmp
; tmp
+= n
+ 1;
103 bsm1
= tmp
; tmp
+= n
+ 1;
104 bs2
= tmp
; tmp
+= n
+ 1;
105 bsm2
= tmp
; tmp
+= n
+ 1;
106 bsh
= tmp
; tmp
+= n
+ 1;
110 /* Compute as1 and asm1. */
111 flags
= (enum toom7_flags
) (toom7_w3_neg
& mpn_toom_eval_pm1 (as1
, asm1
, 4, ap
, n
, s
, gp
));
113 /* Compute as2 and asm2. */
114 flags
= (enum toom7_flags
) (flags
| (toom7_w1_neg
& mpn_toom_eval_pm2 (as2
, asm2
, 4, ap
, n
, s
, gp
)));
116 /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
117 = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4 */
118 #if HAVE_NATIVE_mpn_addlsh1_n
119 cy
= mpn_addlsh1_n (ash
, a1
, a0
, n
);
120 cy
= 2*cy
+ mpn_addlsh1_n (ash
, a2
, ash
, n
);
121 cy
= 2*cy
+ mpn_addlsh1_n (ash
, a3
, ash
, n
);
125 cy2
= mpn_addlsh1_n (ash
, a4
, ash
, s
);
126 ash
[n
] = 2*cy
+ mpn_lshift (ash
+ s
, ash
+ s
, n
- s
, 1);
127 MPN_INCR_U (ash
+ s
, n
+1-s
, cy2
);
130 ash
[n
] = 2*cy
+ mpn_addlsh1_n (ash
, a4
, ash
, n
);
132 cy
= mpn_lshift (ash
, a0
, n
, 1);
133 cy
+= mpn_add_n (ash
, ash
, a1
, n
);
134 cy
= 2*cy
+ mpn_lshift (ash
, ash
, n
, 1);
135 cy
+= mpn_add_n (ash
, ash
, a2
, n
);
136 cy
= 2*cy
+ mpn_lshift (ash
, ash
, n
, 1);
137 cy
+= mpn_add_n (ash
, ash
, a3
, n
);
138 cy
= 2*cy
+ mpn_lshift (ash
, ash
, n
, 1);
139 ash
[n
] = cy
+ mpn_add (ash
, ash
, n
, a4
, s
);
142 /* Compute bs1 and bsm1. */
143 bs1
[n
] = mpn_add (bs1
, b0
, n
, b2
, t
); /* b0 + b2 */
144 #if HAVE_NATIVE_mpn_add_n_sub_n
145 if (bs1
[n
] == 0 && mpn_cmp (bs1
, b1
, n
) < 0)
147 bs1
[n
] = mpn_add_n_sub_n (bs1
, bsm1
, b1
, bs1
, n
) >> 1;
149 flags
= (enum toom7_flags
) (flags
^ toom7_w3_neg
);
153 cy
= mpn_add_n_sub_n (bs1
, bsm1
, bs1
, b1
, n
);
154 bsm1
[n
] = bs1
[n
] - (cy
& 1);
158 if (bs1
[n
] == 0 && mpn_cmp (bs1
, b1
, n
) < 0)
160 mpn_sub_n (bsm1
, b1
, bs1
, n
);
162 flags
= (enum toom7_flags
) (flags
^ toom7_w3_neg
);
166 bsm1
[n
] = bs1
[n
] - mpn_sub_n (bsm1
, bs1
, b1
, n
);
168 bs1
[n
] += mpn_add_n (bs1
, bs1
, b1
, n
); /* b0+b1+b2 */
171 /* Compute bs2 and bsm2. */
172 #if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
173 #if HAVE_NATIVE_mpn_addlsh2_n
174 cy
= mpn_addlsh2_n (bs2
, b0
, b2
, t
);
175 #else /* HAVE_NATIVE_mpn_addlsh_n */
176 cy
= mpn_addlsh_n (bs2
, b0
, b2
, t
, 2);
179 cy
= mpn_add_1 (bs2
+ t
, b0
+ t
, n
- t
, cy
);
182 cy
= mpn_lshift (gp
, b2
, t
, 2);
183 bs2
[n
] = mpn_add (bs2
, b0
, n
, gp
, t
);
184 MPN_INCR_U (bs2
+ t
, n
+1-t
, cy
);
187 gp
[n
] = mpn_lshift (gp
, b1
, n
, 1);
189 #if HAVE_NATIVE_mpn_add_n_sub_n
190 if (mpn_cmp (bs2
, gp
, n
+1) < 0)
192 ASSERT_NOCARRY (mpn_add_n_sub_n (bs2
, bsm2
, gp
, bs2
, n
+1));
193 flags
= (enum toom7_flags
) (flags
^ toom7_w1_neg
);
197 ASSERT_NOCARRY (mpn_add_n_sub_n (bs2
, bsm2
, bs2
, gp
, n
+1));
200 if (mpn_cmp (bs2
, gp
, n
+1) < 0)
202 ASSERT_NOCARRY (mpn_sub_n (bsm2
, gp
, bs2
, n
+1));
203 flags
= (enum toom7_flags
) (flags
^ toom7_w1_neg
);
207 ASSERT_NOCARRY (mpn_sub_n (bsm2
, bs2
, gp
, n
+1));
209 mpn_add_n (bs2
, bs2
, gp
, n
+1);
212 /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2. */
213 #if HAVE_NATIVE_mpn_addlsh1_n
214 cy
= mpn_addlsh1_n (bsh
, b1
, b0
, n
);
218 cy2
= mpn_addlsh1_n (bsh
, b2
, bsh
, t
);
219 bsh
[n
] = 2*cy
+ mpn_lshift (bsh
+ t
, bsh
+ t
, n
- t
, 1);
220 MPN_INCR_U (bsh
+ t
, n
+1-t
, cy2
);
223 bsh
[n
] = 2*cy
+ mpn_addlsh1_n (bsh
, b2
, bsh
, n
);
225 cy
= mpn_lshift (bsh
, b0
, n
, 1);
226 cy
+= mpn_add_n (bsh
, bsh
, b1
, n
);
227 cy
= 2*cy
+ mpn_lshift (bsh
, bsh
, n
, 1);
228 bsh
[n
] = cy
+ mpn_add (bsh
, bsh
, n
, b2
, t
);
231 ASSERT (as1
[n
] <= 4);
232 ASSERT (bs1
[n
] <= 2);
233 ASSERT (asm1
[n
] <= 2);
234 ASSERT (bsm1
[n
] <= 1);
235 ASSERT (as2
[n
] <= 30);
236 ASSERT (bs2
[n
] <= 6);
237 ASSERT (asm2
[n
] <= 20);
238 ASSERT (bsm2
[n
] <= 4);
239 ASSERT (ash
[n
] <= 30);
240 ASSERT (bsh
[n
] <= 6);
242 #define v0 pp /* 2n */
243 #define v1 (pp + 2 * n) /* 2n+1 */
244 #define vinf (pp + 6 * n) /* s+t */
245 #define v2 scratch /* 2n+1 */
246 #define vm2 (scratch + 2 * n + 1) /* 2n+1 */
247 #define vh (scratch + 4 * n + 2) /* 2n+1 */
248 #define vm1 (scratch + 6 * n + 3) /* 2n+1 */
249 #define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
250 /* Total scratch need: 10*n+5 */
252 /* Must be in allocation order, as they overwrite one limb beyond
254 mpn_mul_n (v2
, as2
, bs2
, n
+ 1); /* v2, 2n+1 limbs */
255 mpn_mul_n (vm2
, asm2
, bsm2
, n
+ 1); /* vm2, 2n+1 limbs */
256 mpn_mul_n (vh
, ash
, bsh
, n
+ 1); /* vh, 2n+1 limbs */
258 /* vm1, 2n+1 limbs */
259 #ifdef SMALLER_RECURSION
260 mpn_mul_n (vm1
, asm1
, bsm1
, n
);
263 cy
= bsm1
[n
] + mpn_add_n (vm1
+ n
, vm1
+ n
, bsm1
, n
);
265 else if (asm1
[n
] == 2)
267 #if HAVE_NATIVE_mpn_addlsh1_n
268 cy
= 2 * bsm1
[n
] + mpn_addlsh1_n (vm1
+ n
, vm1
+ n
, bsm1
, n
);
270 cy
= 2 * bsm1
[n
] + mpn_addmul_1 (vm1
+ n
, bsm1
, n
, CNST_LIMB(2));
276 cy
+= mpn_add_n (vm1
+ n
, vm1
+ n
, asm1
, n
);
278 #else /* SMALLER_RECURSION */
280 mpn_mul_n (vm1
, asm1
, bsm1
, n
+ ((asm1
[n
] | bsm1
[n
]) != 0));
281 #endif /* SMALLER_RECURSION */
284 #ifdef SMALLER_RECURSION
285 mpn_mul_n (v1
, as1
, bs1
, n
);
288 cy
= bs1
[n
] + mpn_add_n (v1
+ n
, v1
+ n
, bs1
, n
);
290 else if (as1
[n
] == 2)
292 #if HAVE_NATIVE_mpn_addlsh1_n
293 cy
= 2 * bs1
[n
] + mpn_addlsh1_n (v1
+ n
, v1
+ n
, bs1
, n
);
295 cy
= 2 * bs1
[n
] + mpn_addmul_1 (v1
+ n
, bs1
, n
, CNST_LIMB(2));
298 else if (as1
[n
] != 0)
300 cy
= as1
[n
] * bs1
[n
] + mpn_addmul_1 (v1
+ n
, bs1
, n
, as1
[n
]);
306 cy
+= mpn_add_n (v1
+ n
, v1
+ n
, as1
, n
);
308 else if (bs1
[n
] == 2)
310 #if HAVE_NATIVE_mpn_addlsh1_n
311 cy
+= mpn_addlsh1_n (v1
+ n
, v1
+ n
, as1
, n
);
313 cy
+= mpn_addmul_1 (v1
+ n
, as1
, n
, CNST_LIMB(2));
317 #else /* SMALLER_RECURSION */
319 mpn_mul_n (v1
, as1
, bs1
, n
+ ((as1
[n
] | bs1
[n
]) != 0));
320 #endif /* SMALLER_RECURSION */
322 mpn_mul_n (v0
, a0
, b0
, n
); /* v0, 2n limbs */
324 /* vinf, s+t limbs */
325 if (s
> t
) mpn_mul (vinf
, a4
, s
, b2
, t
);
326 else mpn_mul (vinf
, b2
, t
, a4
, s
);
328 mpn_toom_interpolate_7pts (pp
, n
, flags
, vm2
, vm1
, v2
, vh
, s
+ t
,