beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / toom8_sqr.c
blob0c93678815946275f5edcdf3eabeee373a50d4ab
1 /* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
3 Contributed to the GNU project by Marco Bodrato.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2009, 2012 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
38 #include "gmp.h"
39 #include "gmp-impl.h"
41 #if GMP_NUMB_BITS < 29
42 #error Not implemented.
43 #endif
45 #if GMP_NUMB_BITS < 43
46 #define BIT_CORRECTION 1
47 #define CORRECTION_BITS GMP_NUMB_BITS
48 #else
49 #define BIT_CORRECTION 0
50 #define CORRECTION_BITS 0
51 #endif
53 #ifndef SQR_TOOM8_THRESHOLD
54 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
55 #endif
57 #ifndef SQR_TOOM6_THRESHOLD
58 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
59 #endif
61 #if TUNE_PROGRAM_BUILD
62 #define MAYBE_sqr_basecase 1
63 #define MAYBE_sqr_above_basecase 1
64 #define MAYBE_sqr_toom2 1
65 #define MAYBE_sqr_above_toom2 1
66 #define MAYBE_sqr_toom3 1
67 #define MAYBE_sqr_above_toom3 1
68 #define MAYBE_sqr_toom4 1
69 #define MAYBE_sqr_above_toom4 1
70 #define MAYBE_sqr_above_toom6 1
71 #else
72 #define SQR_TOOM8_MAX \
73 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ? \
74 ((SQR_FFT_THRESHOLD+8*2-1+7)/8) \
75 : MP_SIZE_T_MAX )
76 #define MAYBE_sqr_basecase \
77 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
78 #define MAYBE_sqr_above_basecase \
79 (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
80 #define MAYBE_sqr_toom2 \
81 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
82 #define MAYBE_sqr_above_toom2 \
83 (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
84 #define MAYBE_sqr_toom3 \
85 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
86 #define MAYBE_sqr_above_toom3 \
87 (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
88 #define MAYBE_sqr_toom4 \
89 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
90 #define MAYBE_sqr_above_toom4 \
91 (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
92 #define MAYBE_sqr_above_toom6 \
93 (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
94 #endif
96 #define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws) \
97 do { \
98 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
99 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) { \
100 mpn_sqr_basecase (p, a, n); \
101 if (f) mpn_sqr_basecase (p2, a2, n); \
102 } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
103 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) { \
104 mpn_toom2_sqr (p, a, n, ws); \
105 if (f) mpn_toom2_sqr (p2, a2, n, ws); \
106 } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
107 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) { \
108 mpn_toom3_sqr (p, a, n, ws); \
109 if (f) mpn_toom3_sqr (p2, a2, n, ws); \
110 } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \
111 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) { \
112 mpn_toom4_sqr (p, a, n, ws); \
113 if (f) mpn_toom4_sqr (p2, a2, n, ws); \
114 } else if (! MAYBE_sqr_above_toom6 \
115 || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) { \
116 mpn_toom6_sqr (p, a, n, ws); \
117 if (f) mpn_toom6_sqr (p2, a2, n, ws); \
118 } else { \
119 mpn_toom8_sqr (p, a, n, ws); \
120 if (f) mpn_toom8_sqr (p2, a2, n, ws); \
122 } while (0)
124 void
125 mpn_toom8_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
127 mp_size_t n, s;
129 /***************************** decomposition *******************************/
131 ASSERT ( an >= 40 );
133 n = 1 + ((an - 1)>>3);
135 s = an - 7 * n;
137 ASSERT (0 < s && s <= n);
138 ASSERT ( s + s > 3 );
140 #define r6 (pp + 3 * n) /* 3n+1 */
141 #define r4 (pp + 7 * n) /* 3n+1 */
142 #define r2 (pp +11 * n) /* 3n+1 */
143 #define r0 (pp +15 * n) /* s+t <= 2*n */
144 #define r7 (scratch) /* 3n+1 */
145 #define r5 (scratch + 3 * n + 1) /* 3n+1 */
146 #define r3 (scratch + 6 * n + 2) /* 3n+1 */
147 #define r1 (scratch + 9 * n + 3) /* 3n+1 */
148 #define v0 (pp +11 * n) /* n+1 */
149 #define v2 (pp +13 * n+2) /* n+1 */
150 #define wse (scratch +12 * n + 4) /* 3n+1 */
152 /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
153 need all of them, when DO_mpn_sublsh_n usea a scratch */
154 /* if (scratch == NULL) */
155 /* scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
157 /********************** evaluation and recursive calls *********************/
158 /* $\pm1/8$ */
159 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
160 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
161 TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
162 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
164 /* $\pm1/4$ */
165 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
166 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
167 TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
168 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
170 /* $\pm2$ */
171 mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
172 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
173 TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
174 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
176 /* $\pm8$ */
177 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
178 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
179 TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
180 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
182 /* $\pm1/2$ */
183 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
184 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
185 TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
186 mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
188 /* $\pm1$ */
189 mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s, pp);
190 /* A(-1)*B(-1) */ /* A(1)*B(1) */
191 TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
192 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
194 /* $\pm4$ */
195 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
196 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
197 TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
198 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
200 #undef v0
201 #undef v2
203 /* A(0)*B(0) */
204 TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
206 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
208 #undef r0
209 #undef r1
210 #undef r2
211 #undef r3
212 #undef r4
213 #undef r5
214 #undef r6
215 #undef wse
219 #undef TOOM8_SQR_REC
220 #undef MAYBE_sqr_basecase
221 #undef MAYBE_sqr_above_basecase
222 #undef MAYBE_sqr_toom2
223 #undef MAYBE_sqr_above_toom2
224 #undef MAYBE_sqr_toom3
225 #undef MAYBE_sqr_above_toom3
226 #undef MAYBE_sqr_above_toom4