beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / sqr_basecase.c
blobfc6a043a94669ce5b40bb489c3be0c0bd847119f
1 /* mpn_sqr_basecase -- Internal routine to square a natural number
2 of length n.
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
9 Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
37 #include "gmp.h"
38 #include "gmp-impl.h"
39 #include "longlong.h"
42 #if HAVE_NATIVE_mpn_sqr_diagonal
43 #define MPN_SQR_DIAGONAL(rp, up, n) \
44 mpn_sqr_diagonal (rp, up, n)
45 #else
46 #define MPN_SQR_DIAGONAL(rp, up, n) \
47 do { \
48 mp_size_t _i; \
49 for (_i = 0; _i < (n); _i++) \
50 { \
51 mp_limb_t ul, lpl; \
52 ul = (up)[_i]; \
53 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
54 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
55 } \
56 } while (0)
57 #endif
59 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
60 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
61 mpn_sqr_diag_addlsh1 (rp, tp, up, n)
62 #else
63 #if HAVE_NATIVE_mpn_addlsh1_n
64 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
65 do { \
66 mp_limb_t cy; \
67 MPN_SQR_DIAGONAL (rp, up, n); \
68 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
69 rp[2 * n - 1] += cy; \
70 } while (0)
71 #else
72 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
73 do { \
74 mp_limb_t cy; \
75 MPN_SQR_DIAGONAL (rp, up, n); \
76 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
77 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
78 rp[2 * n - 1] += cy; \
79 } while (0)
80 #endif
81 #endif
84 #undef READY_WITH_mpn_sqr_basecase
87 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
88 void
89 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
91 mp_size_t i;
92 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
93 mp_ptr tp = tarr;
94 mp_limb_t cy;
96 /* must fit 2*n limbs in tarr */
97 ASSERT (n <= SQR_TOOM2_THRESHOLD);
99 if ((n & 1) != 0)
101 if (n == 1)
103 mp_limb_t ul, lpl;
104 ul = up[0];
105 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
106 rp[0] = lpl >> GMP_NAIL_BITS;
107 return;
110 MPN_ZERO (tp, n);
112 for (i = 0; i <= n - 2; i += 2)
114 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
115 tp[n + i] = cy;
118 else
120 if (n == 2)
122 #if HAVE_NATIVE_mpn_mul_2
123 rp[3] = mpn_mul_2 (rp, up, 2, up);
124 #else
125 rp[0] = 0;
126 rp[1] = 0;
127 rp[3] = mpn_addmul_2 (rp, up, 2, up);
128 #endif
129 return;
132 MPN_ZERO (tp, n);
134 for (i = 0; i <= n - 4; i += 2)
136 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
137 tp[n + i] = cy;
139 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
140 tp[2 * n - 3] = cy;
143 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
145 #define READY_WITH_mpn_sqr_basecase
146 #endif
149 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
151 /* mpn_sqr_basecase using plain mpn_addmul_2.
153 This is tricky, since we have to let mpn_addmul_2 make some undesirable
154 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
155 This forces us to conditionally add or subtract the mpn_sqr_diagonal
156 results. Examples of the product we form:
158 n = 4 n = 5 n = 6
159 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
160 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
161 u4 * u5
162 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
163 sub: u1 sub: u1 u3 sub: u1 u3
166 void
167 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
169 mp_size_t i;
170 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
171 mp_ptr tp = tarr;
172 mp_limb_t cy;
174 /* must fit 2*n limbs in tarr */
175 ASSERT (n <= SQR_TOOM2_THRESHOLD);
177 if ((n & 1) != 0)
179 mp_limb_t x0, x1;
181 if (n == 1)
183 mp_limb_t ul, lpl;
184 ul = up[0];
185 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
186 rp[0] = lpl >> GMP_NAIL_BITS;
187 return;
190 /* The code below doesn't like unnormalized operands. Since such
191 operands are unusual, handle them with a dumb recursion. */
192 if (up[n - 1] == 0)
194 rp[2 * n - 2] = 0;
195 rp[2 * n - 1] = 0;
196 mpn_sqr_basecase (rp, up, n - 1);
197 return;
200 MPN_ZERO (tp, n);
202 for (i = 0; i <= n - 2; i += 2)
204 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
205 tp[n + i] = cy;
208 MPN_SQR_DIAGONAL (rp, up, n);
210 for (i = 2;; i += 4)
212 x0 = rp[i + 0];
213 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
214 x1 = rp[i + 1];
215 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
216 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
217 if (i + 4 >= 2 * n)
218 break;
219 mpn_incr_u (rp + i + 4, cy);
222 else
224 mp_limb_t x0, x1;
226 if (n == 2)
228 #if HAVE_NATIVE_mpn_mul_2
229 rp[3] = mpn_mul_2 (rp, up, 2, up);
230 #else
231 rp[0] = 0;
232 rp[1] = 0;
233 rp[3] = mpn_addmul_2 (rp, up, 2, up);
234 #endif
235 return;
238 /* The code below doesn't like unnormalized operands. Since such
239 operands are unusual, handle them with a dumb recursion. */
240 if (up[n - 1] == 0)
242 rp[2 * n - 2] = 0;
243 rp[2 * n - 1] = 0;
244 mpn_sqr_basecase (rp, up, n - 1);
245 return;
248 MPN_ZERO (tp, n);
250 for (i = 0; i <= n - 4; i += 2)
252 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
253 tp[n + i] = cy;
255 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
256 tp[2 * n - 3] = cy;
258 MPN_SQR_DIAGONAL (rp, up, n);
260 for (i = 2;; i += 4)
262 x0 = rp[i + 0];
263 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
264 x1 = rp[i + 1];
265 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
266 if (i + 6 >= 2 * n)
267 break;
268 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
269 mpn_incr_u (rp + i + 4, cy);
271 mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
274 #if HAVE_NATIVE_mpn_addlsh1_n
275 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
276 #else
277 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
278 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
279 #endif
280 rp[2 * n - 1] += cy;
282 #define READY_WITH_mpn_sqr_basecase
283 #endif
286 #if ! defined (READY_WITH_mpn_sqr_basecase)
288 /* Default mpn_sqr_basecase using mpn_addmul_1. */
290 void
291 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
293 mp_size_t i;
295 ASSERT (n >= 1);
296 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
299 mp_limb_t ul, lpl;
300 ul = up[0];
301 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
302 rp[0] = lpl >> GMP_NAIL_BITS;
304 if (n > 1)
306 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
307 mp_ptr tp = tarr;
308 mp_limb_t cy;
310 /* must fit 2*n limbs in tarr */
311 ASSERT (n <= SQR_TOOM2_THRESHOLD);
313 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
314 tp[n - 1] = cy;
315 for (i = 2; i < n; i++)
317 mp_limb_t cy;
318 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
319 tp[n + i - 2] = cy;
322 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
325 #endif