1 /* mpn_sqr_basecase -- Internal routine to square a natural number
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
42 #if HAVE_NATIVE_mpn_sqr_diagonal
43 #define MPN_SQR_DIAGONAL(rp, up, n) \
44 mpn_sqr_diagonal (rp, up, n)
46 #define MPN_SQR_DIAGONAL(rp, up, n) \
49 for (_i = 0; _i < (n); _i++) \
53 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
54 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
59 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
60 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
61 mpn_sqr_diag_addlsh1 (rp, tp, up, n)
63 #if HAVE_NATIVE_mpn_addlsh1_n
64 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
67 MPN_SQR_DIAGONAL (rp, up, n); \
68 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
69 rp[2 * n - 1] += cy; \
72 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
75 MPN_SQR_DIAGONAL (rp, up, n); \
76 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
77 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
78 rp[2 * n - 1] += cy; \
84 #undef READY_WITH_mpn_sqr_basecase
87 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
89 mpn_sqr_basecase (mp_ptr rp
, mp_srcptr up
, mp_size_t n
)
92 mp_limb_t tarr
[2 * SQR_TOOM2_THRESHOLD
];
96 /* must fit 2*n limbs in tarr */
97 ASSERT (n
<= SQR_TOOM2_THRESHOLD
);
105 umul_ppmm (rp
[1], lpl
, ul
, ul
<< GMP_NAIL_BITS
);
106 rp
[0] = lpl
>> GMP_NAIL_BITS
;
112 for (i
= 0; i
<= n
- 2; i
+= 2)
114 cy
= mpn_addmul_2s (tp
+ 2 * i
, up
+ i
+ 1, n
- (i
+ 1), up
+ i
);
122 #if HAVE_NATIVE_mpn_mul_2
123 rp
[3] = mpn_mul_2 (rp
, up
, 2, up
);
127 rp
[3] = mpn_addmul_2 (rp
, up
, 2, up
);
134 for (i
= 0; i
<= n
- 4; i
+= 2)
136 cy
= mpn_addmul_2s (tp
+ 2 * i
, up
+ i
+ 1, n
- (i
+ 1), up
+ i
);
139 cy
= mpn_addmul_1 (tp
+ 2 * n
- 4, up
+ n
- 1, 1, up
[n
- 2]);
143 MPN_SQR_DIAG_ADDLSH1 (rp
, tp
, up
, n
);
145 #define READY_WITH_mpn_sqr_basecase
149 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
151 /* mpn_sqr_basecase using plain mpn_addmul_2.
153 This is tricky, since we have to let mpn_addmul_2 make some undesirable
154 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
155 This forces us to conditionally add or subtract the mpn_sqr_diagonal
156 results. Examples of the product we form:
159 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
160 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
162 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
163 sub: u1 sub: u1 u3 sub: u1 u3
167 mpn_sqr_basecase (mp_ptr rp
, mp_srcptr up
, mp_size_t n
)
170 mp_limb_t tarr
[2 * SQR_TOOM2_THRESHOLD
];
174 /* must fit 2*n limbs in tarr */
175 ASSERT (n
<= SQR_TOOM2_THRESHOLD
);
185 umul_ppmm (rp
[1], lpl
, ul
, ul
<< GMP_NAIL_BITS
);
186 rp
[0] = lpl
>> GMP_NAIL_BITS
;
190 /* The code below doesn't like unnormalized operands. Since such
191 operands are unusual, handle them with a dumb recursion. */
196 mpn_sqr_basecase (rp
, up
, n
- 1);
202 for (i
= 0; i
<= n
- 2; i
+= 2)
204 cy
= mpn_addmul_2 (tp
+ 2 * i
, up
+ i
+ 1, n
- (i
+ 1), up
+ i
);
208 MPN_SQR_DIAGONAL (rp
, up
, n
);
213 rp
[i
+ 0] = (-x0
) & GMP_NUMB_MASK
;
215 rp
[i
+ 1] = (-x1
- (x0
!= 0)) & GMP_NUMB_MASK
;
216 __GMPN_SUB_1 (cy
, rp
+ i
+ 2, rp
+ i
+ 2, 2, (x1
| x0
) != 0);
219 mpn_incr_u (rp
+ i
+ 4, cy
);
228 #if HAVE_NATIVE_mpn_mul_2
229 rp
[3] = mpn_mul_2 (rp
, up
, 2, up
);
233 rp
[3] = mpn_addmul_2 (rp
, up
, 2, up
);
238 /* The code below doesn't like unnormalized operands. Since such
239 operands are unusual, handle them with a dumb recursion. */
244 mpn_sqr_basecase (rp
, up
, n
- 1);
250 for (i
= 0; i
<= n
- 4; i
+= 2)
252 cy
= mpn_addmul_2 (tp
+ 2 * i
, up
+ i
+ 1, n
- (i
+ 1), up
+ i
);
255 cy
= mpn_addmul_1 (tp
+ 2 * n
- 4, up
+ n
- 1, 1, up
[n
- 2]);
258 MPN_SQR_DIAGONAL (rp
, up
, n
);
263 rp
[i
+ 0] = (-x0
) & GMP_NUMB_MASK
;
265 rp
[i
+ 1] = (-x1
- (x0
!= 0)) & GMP_NUMB_MASK
;
268 __GMPN_SUB_1 (cy
, rp
+ i
+ 2, rp
+ i
+ 2, 2, (x1
| x0
) != 0);
269 mpn_incr_u (rp
+ i
+ 4, cy
);
271 mpn_decr_u (rp
+ i
+ 2, (x1
| x0
) != 0);
274 #if HAVE_NATIVE_mpn_addlsh1_n
275 cy
= mpn_addlsh1_n (rp
+ 1, rp
+ 1, tp
, 2 * n
- 2);
277 cy
= mpn_lshift (tp
, tp
, 2 * n
- 2, 1);
278 cy
+= mpn_add_n (rp
+ 1, rp
+ 1, tp
, 2 * n
- 2);
282 #define READY_WITH_mpn_sqr_basecase
286 #if ! defined (READY_WITH_mpn_sqr_basecase)
288 /* Default mpn_sqr_basecase using mpn_addmul_1. */
291 mpn_sqr_basecase (mp_ptr rp
, mp_srcptr up
, mp_size_t n
)
296 ASSERT (! MPN_OVERLAP_P (rp
, 2*n
, up
, n
));
301 umul_ppmm (rp
[1], lpl
, ul
, ul
<< GMP_NAIL_BITS
);
302 rp
[0] = lpl
>> GMP_NAIL_BITS
;
306 mp_limb_t tarr
[2 * SQR_TOOM2_THRESHOLD
];
310 /* must fit 2*n limbs in tarr */
311 ASSERT (n
<= SQR_TOOM2_THRESHOLD
);
313 cy
= mpn_mul_1 (tp
, up
+ 1, n
- 1, up
[0]);
315 for (i
= 2; i
< n
; i
++)
318 cy
= mpn_addmul_1 (tp
+ 2 * i
- 2, up
+ i
, n
- i
, up
[i
- 1]);
322 MPN_SQR_DIAG_ADDLSH1 (rp
, tp
, up
, n
);