1 /* mpn_sqrtrem -- square root and remainder
3 Contributed to the GNU project by Paul Zimmermann (most code),
4 Torbjorn Granlund (mpn_sqrtrem1) and Marco Bodrato (mpn_dc_sqrt).
6 THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH A
7 MUTABLE INTERFACE. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED
8 INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
9 DISAPPEAR IN A FUTURE GMP RELEASE.
11 Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012, 2015 Free Software
14 This file is part of the GNU MP Library.
16 The GNU MP Library is free software; you can redistribute it and/or modify
17 it under the terms of either:
19 * the GNU Lesser General Public License as published by the Free
20 Software Foundation; either version 3 of the License, or (at your
21 option) any later version.
25 * the GNU General Public License as published by the Free Software
26 Foundation; either version 2 of the License, or (at your option) any
29 or both in parallel, as here.
31 The GNU MP Library is distributed in the hope that it will be useful, but
32 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
36 You should have received copies of the GNU General Public License and the
37 GNU Lesser General Public License along with the GNU MP Library. If not,
38 see https://www.gnu.org/licenses/. */
41 /* See "Karatsuba Square Root", reference in gmp.texi. */
50 #define USE_DIVAPPR_Q 1
53 static const unsigned char invsqrttab
[384] = /* The common 0x100 was removed */
55 0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
56 0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
57 0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
58 0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
59 0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
60 0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
61 0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
62 0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
63 0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
64 0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
65 0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
66 0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
67 0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
68 0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
69 0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
70 0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
71 0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
72 0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
73 0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
74 0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
75 0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
76 0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
77 0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
78 0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
79 0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
80 0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
81 0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
82 0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
83 0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
84 0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
85 0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
86 0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
87 0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
88 0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
89 0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
90 0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
91 0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
92 0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
93 0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
94 0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
95 0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
96 0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
97 0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
98 0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
99 0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
100 0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
101 0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
102 0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00 /* sqrt(1/1f8)..sqrt(1/1ff) */
105 /* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2. */
107 #if GMP_NUMB_BITS > 32
108 #define MAGIC CNST_LIMB(0x10000000000) /* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
110 #define MAGIC CNST_LIMB(0x100000) /* 0xfee6f < MAGIC < 0x29cbc8 */
114 mpn_sqrtrem1 (mp_ptr rp
, mp_limb_t a0
)
116 #if GMP_NUMB_BITS > 32
119 mp_limb_t x0
, t2
, t
, x2
;
122 ASSERT_ALWAYS (GMP_NAIL_BITS
== 0);
123 ASSERT_ALWAYS (GMP_LIMB_BITS
== 32 || GMP_LIMB_BITS
== 64);
124 ASSERT (a0
>= GMP_NUMB_HIGHBIT
/ 2);
126 /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
127 since we can do the former without division. As part of the last
128 iteration convert from 1/sqrt(a) to sqrt(a). */
130 abits
= a0
>> (GMP_LIMB_BITS
- 1 - 8); /* extract bits for table lookup */
131 x0
= 0x100 | invsqrttab
[abits
- 0x80]; /* initial 1/sqrt(a) */
133 /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
135 #if GMP_NUMB_BITS > 32
136 a1
= a0
>> (GMP_LIMB_BITS
- 1 - 32);
137 t
= (mp_limb_signed_t
) (CNST_LIMB(0x2000000000000) - 0x30000 - a1
* x0
* x0
) >> 16;
138 x0
= (x0
<< 16) + ((mp_limb_signed_t
) (x0
* t
) >> (16+2));
140 /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
142 t2
= x0
* (a0
>> (32-8));
144 t
= ((mp_limb_signed_t
) ((a0
<< 14) - t
* t
- MAGIC
) >> (32-8));
145 x0
= t2
+ ((mp_limb_signed_t
) (x0
* t
) >> 15);
148 t2
= x0
* (a0
>> (16-8));
150 t
= ((mp_limb_signed_t
) ((a0
<< 6) - t
* t
- MAGIC
) >> (16-8));
151 x0
= t2
+ ((mp_limb_signed_t
) (x0
* t
) >> 7);
155 /* x0 is now a full limb approximation of sqrt(a0) */
158 if (x2
+ 2*x0
<= a0
- 1)
169 #define Prec (GMP_NUMB_BITS >> 1)
171 /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
172 return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
174 mpn_sqrtrem2 (mp_ptr sp
, mp_ptr rp
, mp_srcptr np
)
176 mp_limb_t q
, u
, np0
, sp0
, rp0
, q2
;
179 ASSERT (np
[1] >= GMP_NUMB_HIGHBIT
/ 2);
182 sp0
= mpn_sqrtrem1 (rp
, np
[1]);
184 /* rp0 <= 2*sp0 < 2^(Prec + 1) */
185 rp0
= (rp0
<< (Prec
- 1)) + (np0
>> (Prec
+ 1));
187 /* q <= 2^Prec, if q = 2^Prec, reduce the overestimate. */
189 /* now we have q < 2^Prec */
191 /* now we have (rp[0]<<Prec + np0>>Prec)/2 = q * sp0 + u */
192 sp0
= (sp0
<< Prec
) | q
;
193 cc
= u
>> (Prec
- 1);
194 rp0
= ((u
<< (Prec
+ 1)) & GMP_NUMB_MASK
) + (np0
& ((CNST_LIMB (1) << (Prec
+ 1)) - 1));
195 /* subtract q * q from rp */
213 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
214 and in {np, n} the low n limbs of the remainder, returns the high
215 limb of the remainder (which is 0 or 1).
216 Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
217 where B=2^GMP_NUMB_BITS.
218 Needs a scratch of n/2+1 limbs. */
220 mpn_dc_sqrtrem (mp_ptr sp
, mp_ptr np
, mp_size_t n
, mp_limb_t approx
, mp_ptr scratch
)
222 mp_limb_t q
; /* carry out of {sp, n} */
223 int c
, b
; /* carry out of remainder */
226 ASSERT (np
[2 * n
- 1] >= GMP_NUMB_HIGHBIT
/ 2);
229 c
= mpn_sqrtrem2 (sp
, np
, np
);
234 q
= mpn_dc_sqrtrem (sp
+ l
, np
+ 2 * l
, h
, 0, scratch
);
236 ASSERT_CARRY (mpn_sub_n (np
+ 2 * l
, np
+ 2 * l
, sp
+ l
, h
));
237 TRACE(printf("tdiv_qr(,,,,%u,,%u) -> %u\n", (unsigned) n
, (unsigned) h
, (unsigned) (n
- h
+ 1)));
238 mpn_tdiv_qr (scratch
, np
+ l
, 0, np
+ l
, n
, sp
+ l
, h
);
241 mpn_rshift (sp
, scratch
, l
, 1);
242 sp
[l
- 1] |= (q
<< (GMP_NUMB_BITS
- 1)) & GMP_NUMB_MASK
;
243 if (UNLIKELY ((sp
[0] & approx
) != 0)) /* (sp[0] & mask) > 1 */
244 return 1; /* Remainder is non-zero */
247 c
= mpn_add_n (np
+ l
, np
+ l
, sp
+ l
, h
);
248 TRACE(printf("sqr(,,%u)\n", (unsigned) l
));
249 mpn_sqr (np
+ n
, sp
, l
);
250 b
= q
+ mpn_sub_n (np
, np
, np
+ n
, 2 * l
);
251 c
-= (l
== h
) ? b
: mpn_sub_1 (np
+ 2 * l
, np
+ 2 * l
, 1, (mp_limb_t
) b
);
255 q
= mpn_add_1 (sp
+ l
, sp
+ l
, h
, q
);
256 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
257 c
+= mpn_addlsh1_n_ip1 (np
, sp
, n
) + 2 * q
;
259 c
+= mpn_addmul_1 (np
, sp
, n
, CNST_LIMB(2)) + 2 * q
;
261 c
-= mpn_sub_1 (np
, np
, n
, CNST_LIMB(1));
262 q
-= mpn_sub_1 (sp
, sp
, n
, CNST_LIMB(1));
271 mpn_divappr_q (mp_ptr qp
, mp_srcptr np
, mp_size_t nn
, mp_srcptr dp
, mp_size_t dn
, mp_ptr scratch
)
277 ASSERT ((dp
[dn
-1] & GMP_NUMB_HIGHBIT
) != 0);
279 MPN_COPY (scratch
, np
, nn
);
280 invert_pi1 (inv
, dp
[dn
-1], dp
[dn
-2]);
281 if (BELOW_THRESHOLD (dn
, DC_DIVAPPR_Q_THRESHOLD
))
282 qh
= mpn_sbpi1_divappr_q (qp
, scratch
, nn
, dp
, dn
, inv
.inv32
);
283 else if (BELOW_THRESHOLD (dn
, MU_DIVAPPR_Q_THRESHOLD
))
284 qh
= mpn_dcpi1_divappr_q (qp
, scratch
, nn
, dp
, dn
, &inv
);
287 mp_size_t itch
= mpn_mu_divappr_q_itch (nn
, dn
, 0);
290 /* Sadly, scratch is too small. */
291 qh
= mpn_mu_divappr_q (qp
, np
, nn
, dp
, dn
, TMP_ALLOC_LIMBS (itch
));
298 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n-odd},
299 returns zero if the operand was a perfect square, one otherwise.
300 Assumes {np, 2n-odd}*4^nsh is normalized, i.e. B > np[2n-1-odd]*4^nsh >= B/4
301 where B=2^GMP_NUMB_BITS.
302 THINK: In the odd case, three more (dummy) limbs are taken into account,
303 when nsh is maximal, two limbs are discarded from the result of the
304 division. Too much? Is a single dummy limb enough? */
306 mpn_dc_sqrt (mp_ptr sp
, mp_srcptr np
, mp_size_t n
, unsigned nsh
, unsigned odd
)
308 mp_limb_t q
; /* carry out of {sp, n} */
309 int c
; /* carry out of remainder */
311 mp_ptr qp
, tp
, scratch
;
315 ASSERT (np
[2 * n
- 1 - odd
] != 0);
317 ASSERT (nsh
< GMP_NUMB_BITS
/ 2);
321 ASSERT (n
>= l
+ 2 && l
+ 2 >= h
&& h
> l
&& l
>= 1 + odd
);
322 scratch
= TMP_ALLOC_LIMBS (l
+ 2 * n
+ 5 - USE_DIVAPPR_Q
); /* n + 2-USE_DIVAPPR_Q */
323 tp
= scratch
+ n
+ 2 - USE_DIVAPPR_Q
; /* n + h + 1, but tp [-1] is writable */
326 /* o is used to exactly set the lowest bits of the dividend, is it needed? */
327 int o
= l
> (1 + odd
);
328 ASSERT_NOCARRY (mpn_lshift (tp
- o
, np
+ l
- 1 - o
- odd
, n
+ h
+ 1 + o
, 2 * nsh
));
331 MPN_COPY (tp
, np
+ l
- 1 - odd
, n
+ h
+ 1);
332 q
= mpn_dc_sqrtrem (sp
+ l
, tp
+ l
+ 1, h
, 0, scratch
);
334 ASSERT_CARRY (mpn_sub_n (tp
+ l
+ 1, tp
+ l
+ 1, sp
+ l
, h
));
335 qp
= tp
+ n
+ 1; /* l + 2 */
336 TRACE(printf("div(appr)_q(,,%u,,%u) -> %u \n", (unsigned) n
+1, (unsigned) h
, (unsigned) (n
+ 1 - h
+ 1)));
338 mpn_divappr_q (qp
, tp
, n
+ 1, sp
+ l
, h
, scratch
);
340 mpn_div_q (qp
, tp
, n
+ 1, sp
+ l
, h
, scratch
);
346 /* FIXME: if s!=0 we will shift later, a noop on this area. */
347 MPN_FILL (sp
, l
, GMP_NUMB_MAX
);
351 /* FIXME: if s!=0 we will shift again later, shift just once. */
352 mpn_rshift (sp
, qp
+ 1, l
, 1);
353 sp
[l
- 1] |= q
<< (GMP_NUMB_BITS
- 1);
354 if (((qp
[0] >> (2 + USE_DIVAPPR_Q
)) | /* < 3 + 4*USE_DIVAPPR_Q */
355 (qp
[1] & (GMP_NUMB_MASK
>> ((GMP_NUMB_BITS
>> odd
)- nsh
- 1)))) == 0)
358 /* Approximation is not good enough, the extra limb(+ nsh bits)
359 is smaller than needed to absorb the possible error. */
360 /* {qp + 1, l + 1} equals 2*{sp, l} */
361 /* FIXME: use mullo or wrap-around, or directly evaluate
362 remainder with a single sqrmod_bnm1. */
363 TRACE(printf("mul(,,%u,,%u)\n", (unsigned) h
, (unsigned) (l
+1)));
364 ASSERT_NOCARRY (mpn_mul (scratch
, sp
+ l
, h
, qp
+ 1, l
+ 1));
365 /* Compute the remainder of the previous mpn_div(appr)_q. */
366 cy
= mpn_sub_n (tp
+ 1, tp
+ 1, scratch
, h
);
367 #if USE_DIVAPPR_Q || WANT_ASSERT
368 MPN_DECR_U (tp
+ 1 + h
, l
, cy
);
370 ASSERT (mpn_cmp (tp
+ 1 + h
, scratch
+ h
, l
) <= 0);
371 if (mpn_cmp (tp
+ 1 + h
, scratch
+ h
, l
) < 0)
373 /* May happen only if div result was not exact. */
374 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
375 cy
= mpn_addlsh1_n_ip1 (tp
+ 1, sp
+ l
, h
);
377 cy
= mpn_addmul_1 (tp
+ 1, sp
+ l
, h
, CNST_LIMB(2));
379 ASSERT_NOCARRY (mpn_add_1 (tp
+ 1 + h
, tp
+ 1 + h
, l
, cy
));
380 MPN_DECR_U (sp
, l
, 1);
382 /* Can the root be exact when a correction was needed? We
383 did not find an example, but it depends on divappr
384 internals, and we can not assume it true in general...*/
386 #else /* WANT_ASSERT */
387 ASSERT (mpn_cmp (tp
+ 1 + h
, scratch
+ h
, l
) == 0);
390 if (mpn_zero_p (tp
+ l
+ 1, h
- l
))
392 TRACE(printf("sqr(,,%u)\n", (unsigned) l
));
393 mpn_sqr (scratch
, sp
, l
);
394 c
= mpn_cmp (tp
+ 1, scratch
+ l
, l
);
399 mpn_lshift (tp
, np
, l
, 2 * nsh
);
402 c
= mpn_cmp (np
, scratch
+ odd
, l
- odd
);
406 MPN_DECR_U (sp
, l
, 1);
414 if ((odd
| nsh
) != 0)
415 mpn_rshift (sp
, sp
, n
, nsh
+ (odd
? GMP_NUMB_BITS
/ 2 : 0));
421 mpn_sqrtrem (mp_ptr sp
, mp_ptr rp
, mp_srcptr np
, mp_size_t nn
)
423 mp_limb_t
*tp
, s0
[1], cc
, high
, rl
;
431 ASSERT (np
[nn
- 1] != 0);
432 ASSERT (rp
== NULL
|| MPN_SAME_OR_SEPARATE_P (np
, rp
, nn
));
433 ASSERT (rp
== NULL
|| ! MPN_OVERLAP_P (sp
, (nn
+ 1) / 2, rp
, nn
));
434 ASSERT (! MPN_OVERLAP_P (sp
, (nn
+ 1) / 2, np
, nn
));
437 if (high
& (GMP_NUMB_HIGHBIT
| (GMP_NUMB_HIGHBIT
/ 2)))
441 count_leading_zeros (c
, high
);
444 c
= c
/ 2; /* we have to shift left by 2c bits to normalize {np, nn} */
449 sp
[0] = mpn_sqrtrem1 (&rl
, high
);
455 cc
= mpn_sqrtrem1 (&rl
, high
<< (2*c
)) >> c
;
458 rp
[0] = rl
= high
- cc
*cc
;
462 tn
= (nn
+ 1) / 2; /* 2*tn is the smallest even integer >= nn */
464 if ((rp
== NULL
) && (nn
> 8))
465 return mpn_dc_sqrt (sp
, np
, tn
, c
, nn
& 1);
467 if (((nn
& 1) | c
) != 0)
471 TMP_ALLOC_LIMBS_2 (tp
, 2 * tn
, scratch
, tn
/ 2 + 1);
472 tp
[0] = 0; /* needed only when 2*tn > nn, but saves a test */
474 mpn_lshift (tp
+ (nn
& 1), np
, nn
, 2 * c
);
476 MPN_COPY (tp
+ (nn
& 1), np
, nn
);
477 c
+= (nn
& 1) ? GMP_NUMB_BITS
/ 2 : 0; /* c now represents k */
478 mask
= (CNST_LIMB (1) << c
) - 1;
479 rl
= mpn_dc_sqrtrem (sp
, tp
, tn
, (rp
== NULL
) ? mask
- 1 : 0, scratch
);
480 /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
481 thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
482 s0
[0] = sp
[0] & mask
; /* S mod 2^k */
483 rl
+= mpn_addmul_1 (tp
, sp
, tn
, 2 * s0
[0]); /* R = R + 2*s0*S */
484 cc
= mpn_submul_1 (tp
, s0
, 1, s0
[0]);
485 rl
-= (tn
> 1) ? mpn_sub_1 (tp
+ 1, tp
+ 1, tn
- 1, cc
) : cc
;
486 mpn_rshift (sp
, sp
, tn
, c
);
491 if (c
< GMP_NUMB_BITS
)
499 mpn_rshift (rp
, tp
, tn
, c
);
501 MPN_COPY_INCR (rp
, tp
, tn
);
508 if (rp
== NULL
) /* nn <= 8 */
509 rp
= TMP_SALLOC_LIMBS (nn
);
510 MPN_COPY (rp
, np
, nn
);
512 rn
= tn
+ (rp
[tn
] = mpn_dc_sqrtrem (sp
, rp
, tn
, 0, TMP_ALLOC_LIMBS(tn
/ 2 + 1)));
515 MPN_NORMALIZE (rp
, rn
);