1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 /*****************************************************************************
31 *****************************************************************************
33 * Algorithm description:
35 * if(exponent_x is odd)
36 * scale coefficient_x by 10, adjust exponent
37 * - get lower estimate for number of digits in coefficient_x
38 * - scale coefficient x to between 31 and 33 decimal digits
39 * - in parallel, check for exact case and return if true
40 * - get high part of result coefficient using double precision sqrt
41 * - compute remainder and refine coefficient in one iteration (which
42 * modifies it by at most 1)
43 * - result exponent is easy to compute from the adjusted arg. exponent
45 ****************************************************************************/
47 #include "bid_internal.h"
48 #include "bid_sqrt_macros.h"
49 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
52 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
55 extern double sqrt (double);
57 #if DECIMAL_CALL_BY_REFERENCE
60 bid64_sqrt (UINT64
* pres
,
62 px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
68 bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
69 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
72 UINT64 sign_x
, coefficient_x
;
73 UINT64 Q
, Q2
, A10
, C4
, R
, R2
, QE
, res
;
77 double da
, dq
, da_h
, da_l
, dqe
;
78 int exponent_x
, exponent_q
, bin_expon_cx
;
81 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
82 fexcept_t binaryflags
= 0;
85 #if DECIMAL_CALL_BY_REFERENCE
86 #if !DECIMAL_GLOBAL_ROUNDING
87 _IDEC_round rnd_mode
= *prnd_mode
;
92 // unpack arguments, check for NaN or Infinity
93 if (!unpack_BID64 (&sign_x
, &exponent_x
, &coefficient_x
, x
)) {
94 // x is Inf. or NaN or 0
95 if ((x
& INFINITY_MASK64
) == INFINITY_MASK64
) {
97 if ((coefficient_x
& SSNAN_MASK64
) == SINFINITY_MASK64
) // -Infinity
100 #ifdef SET_STATUS_FLAGS
101 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
104 #ifdef SET_STATUS_FLAGS
105 if ((x
& SNAN_MASK64
) == SNAN_MASK64
) // sNaN
106 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
108 BID_RETURN (res
& QUIET_MASK64
);
111 exponent_x
= (exponent_x
+ DECIMAL_EXPONENT_BIAS
) >> 1;
112 res
= sign_x
| (((UINT64
) exponent_x
) << 53);
116 if (sign_x
&& coefficient_x
) {
118 #ifdef SET_STATUS_FLAGS
119 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
123 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
124 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
126 //--- get number of bits in the coefficient of x ---
127 tempx
.d
= (float) coefficient_x
;
128 bin_expon_cx
= ((tempx
.i
>> 23) & 0xff) - 0x7f;
129 digits_x
= estimate_decimal_digits
[bin_expon_cx
];
130 // add test for range
131 if (coefficient_x
>= power10_index_binexp
[bin_expon_cx
])
135 if (exponent_x
& 1) {
136 A10
= (A10
<< 2) + A10
;
140 dqe
= sqrt ((double) A10
);
142 if (QE
* QE
== A10
) {
144 very_fast_get_BID64 (0, (exponent_x
+ DECIMAL_EXPONENT_BIAS
) >> 1,
146 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
147 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
151 // if exponent is odd, scale coefficient by 10
152 scale
= 31 - digits_x
;
153 exponent_q
= exponent_x
- scale
;
154 scale
+= (exponent_q
& 1); // exp. bias is even
156 CT
= power10_table_128
[scale
];
157 __mul_64x128_short (CA
, coefficient_x
, CT
);
160 t_scale
.i
= 0x43f0000000000000ull
;
164 da
= da_h
* t_scale
.d
+ da_l
;
170 // get sign(sqrt(CA)-Q)
172 R
= ((SINT64
) R
) >> 63;
175 exponent_q
= (exponent_q
+ DECIMAL_EXPONENT_BIAS
) >> 1;
177 #ifdef SET_STATUS_FLAGS
178 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
181 #ifndef IEEE_ROUND_NEAREST
182 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
183 if (!((rnd_mode
) & 3)) {
191 // get sign(-sqrt(CA)+Midpoint)
193 R2
= ((SINT64
) R2
) >> 63;
197 #ifndef IEEE_ROUND_NEAREST
198 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
202 if ((SINT64
) (Q
* Q
- C4
) > 0)
204 if (rnd_mode
== ROUNDING_UP
)
210 res
= fast_get_BID64 (0, exponent_q
, Q
);
211 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
212 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
218 TYPE0_FUNCTION_ARG1 (UINT64
, bid64q_sqrt
, x
)
220 UINT256 M256
, C4
, C8
;
221 UINT128 CX
, CX2
, A10
, S2
, T128
, CS
, CSM
, CS2
, C256
, CS1
,
222 mul_factor2_long
= { {0x0ull
, 0x0ull
} }, QH
, Tmp
, TP128
, Qh
, Ql
;
223 UINT64 sign_x
, Carry
, B10
, res
, mul_factor
, mul_factor2
= 0x0ull
, CS0
;
226 int exponent_x
, bin_expon_cx
, done
= 0;
227 int digits
, scale
, exponent_q
= 0, exact
= 1, amount
, extra_digits
;
228 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
229 fexcept_t binaryflags
= 0;
232 // unpack arguments, check for NaN or Infinity
233 if (!unpack_BID128_value (&sign_x
, &exponent_x
, &CX
, x
)) {
236 if ((x
.w
[1] & 0x7c00000000000000ull
) == 0x7c00000000000000ull
) {
237 #ifdef SET_STATUS_FLAGS
238 if ((x
.w
[1] & 0x7e00000000000000ull
) == 0x7e00000000000000ull
) // sNaN
239 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
241 Tmp
.w
[1] = (CX
.w
[1] & 0x00003fffffffffffull
);
243 TP128
= reciprocals10_128
[18];
244 __mul_128x128_full (Qh
, Ql
, Tmp
, TP128
);
245 amount
= recip_scale
[18];
246 __shr_128 (Tmp
, Qh
, amount
);
247 res
= (CX
.w
[1] & 0xfc00000000000000ull
) | Tmp
.w
[0];
251 if ((x
.w
[1] & 0x7800000000000000ull
) == 0x7800000000000000ull
) {
254 res
= 0x7c00000000000000ull
;
255 #ifdef SET_STATUS_FLAGS
256 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
264 ((exponent_x
- DECIMAL_EXPONENT_BIAS_128
) >> 1) +
265 DECIMAL_EXPONENT_BIAS
;
268 if (exponent_x
> DECIMAL_MAX_EXPON_64
)
269 exponent_x
= DECIMAL_MAX_EXPON_64
;
270 //res= sign_x | (((UINT64)exponent_x)<<53);
271 res
= get_BID64 (sign_x
, exponent_x
, 0, rnd_mode
, pfpsf
);
275 res
= 0x7c00000000000000ull
;
276 #ifdef SET_STATUS_FLAGS
277 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
281 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
282 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
289 fx
.d
= (float) CX
.w
[1] * f64
.d
+ (float) CX
.w
[0];
290 bin_expon_cx
= ((fx
.i
>> 23) & 0xff) - 0x7f;
291 digits
= estimate_decimal_digits
[bin_expon_cx
];
294 if (exponent_x
& 1) {
295 A10
.w
[1] = (CX
.w
[1] << 3) | (CX
.w
[0] >> 61);
296 A10
.w
[0] = CX
.w
[0] << 3;
297 CX2
.w
[1] = (CX
.w
[1] << 1) | (CX
.w
[0] >> 63);
298 CX2
.w
[0] = CX
.w
[0] << 1;
299 __add_128_128 (A10
, A10
, CX2
);
302 C256
.w
[1] = A10
.w
[1];
303 C256
.w
[0] = A10
.w
[0];
304 CS
.w
[0] = short_sqrt128 (A10
);
307 // check for exact result
308 if (CS
.w
[0] < 10000000000000000ull) {
309 if (CS
.w
[0] * CS
.w
[0] == A10
.w
[0]) {
310 __sqr64_fast (S2
, CS
.w
[0]);
311 if (S2
.w
[1] == A10
.w
[1]) // && S2.w[0]==A10.w[0])
315 ((exponent_x
- DECIMAL_EXPONENT_BIAS_128
) >> 1) +
316 DECIMAL_EXPONENT_BIAS
, CS
.w
[0], rnd_mode
, pfpsf
);
317 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
318 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
323 if (CS
.w
[0] >= 1000000000000000ull) {
325 exponent_q
= exponent_x
;
326 C256
.w
[1] = A10
.w
[1];
327 C256
.w
[0] = A10
.w
[0];
329 #ifdef SET_STATUS_FLAGS
330 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
334 B10
= 0x3333333333333334ull
;
335 __mul_64x64_to_128_full (CS2
, CS
.w
[0], B10
);
337 if (CS
.w
[0] != ((CS0
<< 3) + (CS0
<< 1))) {
338 #ifdef SET_STATUS_FLAGS
339 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
345 exponent_q
= exponent_x
+ 2;
348 if (CS
.w
[0] >= 10000000000000000ull) {
349 __mul_64x64_to_128_full (CS2
, CS
.w
[0], B10
);
351 if (CS
.w
[0] != ((CS0
<< 3) + (CS0
<< 1))) {
352 #ifdef SET_STATUS_FLAGS
353 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
363 CS0
= CS
.w
[0] * mul_factor
;
364 __sqr64_fast (CS1
, CS0
)
365 if ((CS1
.w
[0] != A10
.w
[0]) || (CS1
.w
[1] != A10
.w
[1])) {
366 #ifdef SET_STATUS_FLAGS
367 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
375 // get number of digits in CX
376 D
= CX
.w
[1] - power10_index_binexp_128
[bin_expon_cx
].w
[1];
378 || (!D
&& CX
.w
[0] >= power10_index_binexp_128
[bin_expon_cx
].w
[0]))
381 // if exponent is odd, scale coefficient by 10
383 exponent_q
= exponent_x
- scale
;
384 scale
+= (exponent_q
& 1); // exp. bias is even
386 T128
= power10_table_128
[scale
];
387 __mul_128x128_low (C256
, CX
, T128
);
390 CS
.w
[0] = short_sqrt128 (C256
);
392 //printf("CS=%016I64x\n",CS.w[0]);
395 ((exponent_q
- DECIMAL_EXPONENT_BIAS_128
) >> 1) +
396 DECIMAL_EXPONENT_BIAS
;
397 if ((exponent_q
< 0) && (exponent_q
+ MAX_FORMAT_DIGITS
>= 0)) {
398 extra_digits
= -exponent_q
;
401 // get coeff*(2^M[extra_digits])/10^extra_digits
402 __mul_64x64_to_128 (QH
, CS
.w
[0], reciprocals10_64
[extra_digits
]);
404 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
405 amount
= short_recip_scale
[extra_digits
];
407 CS0
= QH
.w
[1] >> amount
;
409 #ifdef SET_STATUS_FLAGS
411 if (CS
.w
[0] != CS0
* power10_table_128
[extra_digits
].w
[0])
415 __set_status_flags (pfpsf
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
421 mul_factor
*= power10_table_128
[extra_digits
].w
[0];
422 __mul_64x64_to_128 (mul_factor2_long
, mul_factor
, mul_factor
);
423 if (mul_factor2_long
.w
[1])
426 mul_factor2
= mul_factor2_long
.w
[1];
429 C4
.w
[1] = (C256
.w
[1] << 2) | (C256
.w
[0] >> 62);
430 C4
.w
[0] = C256
.w
[0] << 2;
432 #ifndef IEEE_ROUND_NEAREST
433 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
434 if (!((rnd_mode
) & 3)) {
437 // compare to midpoints
438 CSM
.w
[0] = (CS
.w
[0] + CS
.w
[0]) | 1;
439 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]);
441 CSM
.w
[0] *= mul_factor
;
443 __mul_64x64_to_128 (M256
, CSM
.w
[0], CSM
.w
[0]);
444 //__mul_128x128_to_256(M256, CSM, CSM);
446 if (C4
.w
[1] > M256
.w
[1] ||
447 (C4
.w
[1] == M256
.w
[1] && C4
.w
[0] > M256
.w
[0])) {
451 C8
.w
[0] = CS
.w
[0] << 3;
455 __mul_64x64_to_128 (C8
, C8
.w
[0], mul_factor2
);
457 __mul_64x128_low (C8
, C8
.w
[0], mul_factor2_long
);
461 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
462 M256
.w
[1] = M256
.w
[1] - C8
.w
[1] - Carry
;
464 // if CSM' > C256, round up
465 if (M256
.w
[1] > C4
.w
[1] ||
466 (M256
.w
[1] == C4
.w
[1] && M256
.w
[0] > C4
.w
[0])) {
472 #ifndef IEEE_ROUND_NEAREST
473 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
477 C8
.w
[0] = CSM
.w
[0] << 1;
479 CSM
.w
[0] *= mul_factor
;
480 __mul_64x64_to_128 (M256
, CSM
.w
[0], CSM
.w
[0]);
484 __mul_64x64_to_128 (C8
, C8
.w
[0], mul_factor2
);
486 __mul_64x128_low (C8
, C8
.w
[0], mul_factor2_long
);
489 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]);
491 if (M256
.w
[1] > C256
.w
[1] ||
492 (M256
.w
[1] == C256
.w
[1] && M256
.w
[0] > C256
.w
[0])) {
493 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
494 M256
.w
[1] = M256
.w
[1] - Carry
- C8
.w
[1];
501 if ((M256
.w
[1] > C256
.w
[1] ||
502 (M256
.w
[1] == C256
.w
[1] && M256
.w
[0] > C256
.w
[0]))
508 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
509 M256
.w
[1] = M256
.w
[1] - Carry
- C8
.w
[1];
515 if (M256
.w
[1] > C256
.w
[1] ||
516 (M256
.w
[1] == C256
.w
[1] && M256
.w
[0] > C256
.w
[0]))
523 /*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]);
524 M256.w[1] = M256.w[1] + Carry + C8.w[1];
531 if(M256.w[1]<C256.w[1] ||
532 (M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0]))
538 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
540 if (((rnd_mode
) != ROUNDING_UP
) || exact
) {
548 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
550 res
= get_BID64 (0, exponent_q
, CS
.w
[0], rnd_mode
, pfpsf
);
551 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
552 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);