1 /* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
2 size. Or more accurately, bn <= an < (3/2)bn.
4 Contributed to the GNU project by Torbjorn Granlund.
5 Additional improvements by Marco Bodrato.
7 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
8 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11 Copyright 2006-2008, 2010, 2012 Free Software Foundation, Inc.
13 This file is part of the GNU MP Library.
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of either:
18 * the GNU Lesser General Public License as published by the Free
19 Software Foundation; either version 3 of the License, or (at your
20 option) any later version.
24 * the GNU General Public License as published by the Free Software
25 Foundation; either version 2 of the License, or (at your option) any
28 or both in parallel, as here.
30 The GNU MP Library is distributed in the hope that it will be useful, but
31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
35 You should have received copies of the GNU General Public License and the
36 GNU Lesser General Public License along with the GNU MP Library. If not,
37 see https://www.gnu.org/licenses/. */
43 /* Evaluate in: -1, 0, +1, +2, +inf
51 v0 = a0 * b0 # A(0)*B(0)
52 v1 = (a0+ a1+ a2)*(b0+ b1+ b2) # A(1)*B(1) ah <= 2 bh <= 2
53 vm1 = (a0- a1+ a2)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 bh <= 1
54 v2 = (a0+2a1+4a2)*(b0+2b1+4b2) # A(2)*B(2) ah <= 6 bh <= 6
55 vinf= a2 * b2 # A(inf)*B(inf)
58 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
59 #define MAYBE_mul_basecase 1
60 #define MAYBE_mul_toom33 1
62 #define MAYBE_mul_basecase \
63 (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
64 #define MAYBE_mul_toom33 \
65 (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
68 /* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
69 multiplication at the infinity point. We may have
70 MAYBE_mul_basecase == 0, and still get s just below
71 MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
72 s == 1 and mpn_toom22_mul will crash.
75 #define TOOM33_MUL_N_REC(p, a, b, n, ws) \
77 if (MAYBE_mul_basecase \
78 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
79 mpn_mul_basecase (p, a, n, b, n); \
80 else if (! MAYBE_mul_toom33 \
81 || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
82 mpn_toom22_mul (p, a, n, b, n, ws); \
84 mpn_toom33_mul (p, a, n, b, n, ws); \
88 mpn_toom33_mul (mp_ptr pp
,
89 mp_srcptr ap
, mp_size_t an
,
90 mp_srcptr bp
, mp_size_t bn
,
93 const int __gmpn_cpuvec_initialized
= 1;
98 mp_ptr as1
, asm1
, as2
;
99 mp_ptr bs1
, bsm1
, bs2
;
103 #define a2 (ap + 2*n)
106 #define b2 (bp + 2*n)
108 n
= (an
+ 2) / (size_t) 3;
115 ASSERT (0 < s
&& s
<= n
);
116 ASSERT (0 < t
&& t
<= n
);
118 as1
= scratch
+ 4 * n
+ 4;
119 asm1
= scratch
+ 2 * n
+ 2;
123 bsm1
= scratch
+ 3 * n
+ 3; /* we need 4n+4 <= 4n+s+t */
124 bs2
= pp
+ 2 * n
+ 2;
130 /* Compute as1 and asm1. */
131 cy
= mpn_add (gp
, a0
, n
, a2
, s
);
132 #if HAVE_NATIVE_mpn_add_n_sub_n
133 if (cy
== 0 && mpn_cmp (gp
, a1
, n
) < 0)
135 cy
= mpn_add_n_sub_n (as1
, asm1
, a1
, gp
, n
);
143 cy2
= mpn_add_n_sub_n (as1
, asm1
, gp
, a1
, n
);
144 as1
[n
] = cy
+ (cy2
>> 1);
145 asm1
[n
] = cy
- (cy2
& 1);
148 as1
[n
] = cy
+ mpn_add_n (as1
, gp
, a1
, n
);
149 if (cy
== 0 && mpn_cmp (gp
, a1
, n
) < 0)
151 mpn_sub_n (asm1
, a1
, gp
, n
);
157 cy
-= mpn_sub_n (asm1
, gp
, a1
, n
);
163 #if HAVE_NATIVE_mpn_rsblsh1_n
164 cy
= mpn_add_n (as2
, a2
, as1
, s
);
166 cy
= mpn_add_1 (as2
+ s
, as1
+ s
, n
- s
, cy
);
168 cy
= 2 * cy
+ mpn_rsblsh1_n (as2
, a0
, as2
, n
);
170 #if HAVE_NATIVE_mpn_addlsh1_n
171 cy
= mpn_addlsh1_n (as2
, a1
, a2
, s
);
173 cy
= mpn_add_1 (as2
+ s
, a1
+ s
, n
- s
, cy
);
174 cy
= 2 * cy
+ mpn_addlsh1_n (as2
, a0
, as2
, n
);
176 cy
= mpn_add_n (as2
, a2
, as1
, s
);
178 cy
= mpn_add_1 (as2
+ s
, as1
+ s
, n
- s
, cy
);
180 cy
= 2 * cy
+ mpn_lshift (as2
, as2
, n
, 1);
181 cy
-= mpn_sub_n (as2
, as2
, a0
, n
);
186 /* Compute bs1 and bsm1. */
187 cy
= mpn_add (gp
, b0
, n
, b2
, t
);
188 #if HAVE_NATIVE_mpn_add_n_sub_n
189 if (cy
== 0 && mpn_cmp (gp
, b1
, n
) < 0)
191 cy
= mpn_add_n_sub_n (bs1
, bsm1
, b1
, gp
, n
);
199 cy2
= mpn_add_n_sub_n (bs1
, bsm1
, gp
, b1
, n
);
200 bs1
[n
] = cy
+ (cy2
>> 1);
201 bsm1
[n
] = cy
- (cy2
& 1);
204 bs1
[n
] = cy
+ mpn_add_n (bs1
, gp
, b1
, n
);
205 if (cy
== 0 && mpn_cmp (gp
, b1
, n
) < 0)
207 mpn_sub_n (bsm1
, b1
, gp
, n
);
213 cy
-= mpn_sub_n (bsm1
, gp
, b1
, n
);
219 #if HAVE_NATIVE_mpn_rsblsh1_n
220 cy
= mpn_add_n (bs2
, b2
, bs1
, t
);
222 cy
= mpn_add_1 (bs2
+ t
, bs1
+ t
, n
- t
, cy
);
224 cy
= 2 * cy
+ mpn_rsblsh1_n (bs2
, b0
, bs2
, n
);
226 #if HAVE_NATIVE_mpn_addlsh1_n
227 cy
= mpn_addlsh1_n (bs2
, b1
, b2
, t
);
229 cy
= mpn_add_1 (bs2
+ t
, b1
+ t
, n
- t
, cy
);
230 cy
= 2 * cy
+ mpn_addlsh1_n (bs2
, b0
, bs2
, n
);
232 cy
= mpn_add_n (bs2
, bs1
, b2
, t
);
234 cy
= mpn_add_1 (bs2
+ t
, bs1
+ t
, n
- t
, cy
);
236 cy
= 2 * cy
+ mpn_lshift (bs2
, bs2
, n
, 1);
237 cy
-= mpn_sub_n (bs2
, bs2
, b0
, n
);
242 ASSERT (as1
[n
] <= 2);
243 ASSERT (bs1
[n
] <= 2);
244 ASSERT (asm1
[n
] <= 1);
245 ASSERT (bsm1
[n
] <= 1);
246 ASSERT (as2
[n
] <= 6);
247 ASSERT (bs2
[n
] <= 6);
249 #define v0 pp /* 2n */
250 #define v1 (pp + 2 * n) /* 2n+1 */
251 #define vinf (pp + 4 * n) /* s+t */
252 #define vm1 scratch /* 2n+1 */
253 #define v2 (scratch + 2 * n + 1) /* 2n+2 */
254 #define scratch_out (scratch + 5 * n + 5)
256 /* vm1, 2n+1 limbs */
257 #ifdef SMALLER_RECURSION
258 TOOM33_MUL_N_REC (vm1
, asm1
, bsm1
, n
, scratch_out
);
261 cy
= bsm1
[n
] + mpn_add_n (vm1
+ n
, vm1
+ n
, bsm1
, n
);
263 cy
+= mpn_add_n (vm1
+ n
, vm1
+ n
, asm1
, n
);
266 TOOM33_MUL_N_REC (vm1
, asm1
, bsm1
, n
+ 1, scratch_out
);
269 TOOM33_MUL_N_REC (v2
, as2
, bs2
, n
+ 1, scratch_out
); /* v2, 2n+1 limbs */
271 /* vinf, s+t limbs */
272 if (s
> t
) mpn_mul (vinf
, a2
, s
, b2
, t
);
273 else TOOM33_MUL_N_REC (vinf
, a2
, b2
, s
, scratch_out
);
275 vinf0
= vinf
[0]; /* v1 overlaps with this */
277 #ifdef SMALLER_RECURSION
279 TOOM33_MUL_N_REC (v1
, as1
, bs1
, n
, scratch_out
);
282 cy
= bs1
[n
] + mpn_add_n (v1
+ n
, v1
+ n
, bs1
, n
);
284 else if (as1
[n
] != 0)
286 #if HAVE_NATIVE_mpn_addlsh1_n
287 cy
= 2 * bs1
[n
] + mpn_addlsh1_n (v1
+ n
, v1
+ n
, bs1
, n
);
289 cy
= 2 * bs1
[n
] + mpn_addmul_1 (v1
+ n
, bs1
, n
, CNST_LIMB(2));
296 cy
+= mpn_add_n (v1
+ n
, v1
+ n
, as1
, n
);
298 else if (bs1
[n
] != 0)
300 #if HAVE_NATIVE_mpn_addlsh1_n
301 cy
+= mpn_addlsh1_n (v1
+ n
, v1
+ n
, as1
, n
);
303 cy
+= mpn_addmul_1 (v1
+ n
, as1
, n
, CNST_LIMB(2));
309 TOOM33_MUL_N_REC (v1
, as1
, bs1
, n
+ 1, scratch_out
);
313 TOOM33_MUL_N_REC (v0
, ap
, bp
, n
, scratch_out
); /* v0, 2n limbs */
315 mpn_toom_interpolate_5pts (pp
, v2
, vm1
, n
, s
+ t
, vm1_neg
, vinf0
);