1 /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 /*****************************************************************************
26 *****************************************************************************
28 * Algorithm description:
30 * if(exponent_x is odd)
31 * scale coefficient_x by 10, adjust exponent
32 * - get lower estimate for number of digits in coefficient_x
33 * - scale coefficient x to between 31 and 33 decimal digits
34 * - in parallel, check for exact case and return if true
35 * - get high part of result coefficient using double precision sqrt
36 * - compute remainder and refine coefficient in one iteration (which
37 * modifies it by at most 1)
38 * - result exponent is easy to compute from the adjusted arg. exponent
40 ****************************************************************************/
42 #include "bid_internal.h"
43 #include "bid_sqrt_macros.h"
44 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
47 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
50 extern double sqrt (double);
52 #if DECIMAL_CALL_BY_REFERENCE
55 bid64_sqrt (UINT64
* pres
,
57 px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
63 bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
64 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
67 UINT64 sign_x
, coefficient_x
;
68 UINT64 Q
, Q2
, A10
, C4
, R
, R2
, QE
, res
;
72 double da
, dq
, da_h
, da_l
, dqe
;
73 int exponent_x
, exponent_q
, bin_expon_cx
;
76 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
77 fexcept_t binaryflags
= 0;
80 #if DECIMAL_CALL_BY_REFERENCE
81 #if !DECIMAL_GLOBAL_ROUNDING
82 _IDEC_round rnd_mode
= *prnd_mode
;
87 // unpack arguments, check for NaN or Infinity
88 if (!unpack_BID64 (&sign_x
, &exponent_x
, &coefficient_x
, x
)) {
89 // x is Inf. or NaN or 0
90 if ((x
& INFINITY_MASK64
) == INFINITY_MASK64
) {
92 if ((coefficient_x
& SSNAN_MASK64
) == SINFINITY_MASK64
) // -Infinity
95 #ifdef SET_STATUS_FLAGS
96 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
99 #ifdef SET_STATUS_FLAGS
100 if ((x
& SNAN_MASK64
) == SNAN_MASK64
) // sNaN
101 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
103 BID_RETURN (res
& QUIET_MASK64
);
106 exponent_x
= (exponent_x
+ DECIMAL_EXPONENT_BIAS
) >> 1;
107 res
= sign_x
| (((UINT64
) exponent_x
) << 53);
111 if (sign_x
&& coefficient_x
) {
113 #ifdef SET_STATUS_FLAGS
114 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
118 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
119 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
121 //--- get number of bits in the coefficient of x ---
122 tempx
.d
= (float) coefficient_x
;
123 bin_expon_cx
= ((tempx
.i
>> 23) & 0xff) - 0x7f;
124 digits_x
= estimate_decimal_digits
[bin_expon_cx
];
125 // add test for range
126 if (coefficient_x
>= power10_index_binexp
[bin_expon_cx
])
130 if (exponent_x
& 1) {
131 A10
= (A10
<< 2) + A10
;
135 dqe
= sqrt ((double) A10
);
137 if (QE
* QE
== A10
) {
139 very_fast_get_BID64 (0, (exponent_x
+ DECIMAL_EXPONENT_BIAS
) >> 1,
141 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
142 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
146 // if exponent is odd, scale coefficient by 10
147 scale
= 31 - digits_x
;
148 exponent_q
= exponent_x
- scale
;
149 scale
+= (exponent_q
& 1); // exp. bias is even
151 CT
= power10_table_128
[scale
];
152 __mul_64x128_short (CA
, coefficient_x
, CT
);
155 t_scale
.i
= 0x43f0000000000000ull
;
159 da
= da_h
* t_scale
.d
+ da_l
;
165 // get sign(sqrt(CA)-Q)
167 R
= ((SINT64
) R
) >> 63;
170 exponent_q
= (exponent_q
+ DECIMAL_EXPONENT_BIAS
) >> 1;
172 #ifdef SET_STATUS_FLAGS
173 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
176 #ifndef IEEE_ROUND_NEAREST
177 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
178 if (!((rnd_mode
) & 3)) {
186 // get sign(-sqrt(CA)+Midpoint)
188 R2
= ((SINT64
) R2
) >> 63;
192 #ifndef IEEE_ROUND_NEAREST
193 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
197 if ((SINT64
) (Q
* Q
- C4
) > 0)
199 if (rnd_mode
== ROUNDING_UP
)
205 res
= fast_get_BID64 (0, exponent_q
, Q
);
206 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
207 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
213 TYPE0_FUNCTION_ARG1 (UINT64
, bid64q_sqrt
, x
)
215 UINT256 M256
, C4
, C8
;
216 UINT128 CX
, CX2
, A10
, S2
, T128
, CS
, CSM
, CS2
, C256
, CS1
,
217 mul_factor2_long
= { {0x0ull
, 0x0ull
} }, QH
, Tmp
, TP128
, Qh
, Ql
;
218 UINT64 sign_x
, Carry
, B10
, res
, mul_factor
, mul_factor2
= 0x0ull
, CS0
;
221 int exponent_x
, bin_expon_cx
, done
= 0;
222 int digits
, scale
, exponent_q
= 0, exact
= 1, amount
, extra_digits
;
223 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
224 fexcept_t binaryflags
= 0;
227 // unpack arguments, check for NaN or Infinity
228 if (!unpack_BID128_value (&sign_x
, &exponent_x
, &CX
, x
)) {
231 if ((x
.w
[1] & 0x7c00000000000000ull
) == 0x7c00000000000000ull
) {
232 #ifdef SET_STATUS_FLAGS
233 if ((x
.w
[1] & 0x7e00000000000000ull
) == 0x7e00000000000000ull
) // sNaN
234 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
236 Tmp
.w
[1] = (CX
.w
[1] & 0x00003fffffffffffull
);
238 TP128
= reciprocals10_128
[18];
239 __mul_128x128_full (Qh
, Ql
, Tmp
, TP128
);
240 amount
= recip_scale
[18];
241 __shr_128 (Tmp
, Qh
, amount
);
242 res
= (CX
.w
[1] & 0xfc00000000000000ull
) | Tmp
.w
[0];
246 if ((x
.w
[1] & 0x7800000000000000ull
) == 0x7800000000000000ull
) {
249 res
= 0x7c00000000000000ull
;
250 #ifdef SET_STATUS_FLAGS
251 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
259 ((exponent_x
- DECIMAL_EXPONENT_BIAS_128
) >> 1) +
260 DECIMAL_EXPONENT_BIAS
;
263 if (exponent_x
> DECIMAL_MAX_EXPON_64
)
264 exponent_x
= DECIMAL_MAX_EXPON_64
;
265 //res= sign_x | (((UINT64)exponent_x)<<53);
266 res
= get_BID64 (sign_x
, exponent_x
, 0, rnd_mode
, pfpsf
);
270 res
= 0x7c00000000000000ull
;
271 #ifdef SET_STATUS_FLAGS
272 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
276 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
277 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
284 fx
.d
= (float) CX
.w
[1] * f64
.d
+ (float) CX
.w
[0];
285 bin_expon_cx
= ((fx
.i
>> 23) & 0xff) - 0x7f;
286 digits
= estimate_decimal_digits
[bin_expon_cx
];
289 if (exponent_x
& 1) {
290 A10
.w
[1] = (CX
.w
[1] << 3) | (CX
.w
[0] >> 61);
291 A10
.w
[0] = CX
.w
[0] << 3;
292 CX2
.w
[1] = (CX
.w
[1] << 1) | (CX
.w
[0] >> 63);
293 CX2
.w
[0] = CX
.w
[0] << 1;
294 __add_128_128 (A10
, A10
, CX2
);
297 C256
.w
[1] = A10
.w
[1];
298 C256
.w
[0] = A10
.w
[0];
299 CS
.w
[0] = short_sqrt128 (A10
);
302 // check for exact result
303 if (CS
.w
[0] < 10000000000000000ull) {
304 if (CS
.w
[0] * CS
.w
[0] == A10
.w
[0]) {
305 __sqr64_fast (S2
, CS
.w
[0]);
306 if (S2
.w
[1] == A10
.w
[1]) // && S2.w[0]==A10.w[0])
310 ((exponent_x
- DECIMAL_EXPONENT_BIAS_128
) >> 1) +
311 DECIMAL_EXPONENT_BIAS
, CS
.w
[0], rnd_mode
, pfpsf
);
312 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
313 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
318 if (CS
.w
[0] >= 1000000000000000ull) {
320 exponent_q
= exponent_x
;
321 C256
.w
[1] = A10
.w
[1];
322 C256
.w
[0] = A10
.w
[0];
324 #ifdef SET_STATUS_FLAGS
325 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
329 B10
= 0x3333333333333334ull
;
330 __mul_64x64_to_128_full (CS2
, CS
.w
[0], B10
);
332 if (CS
.w
[0] != ((CS0
<< 3) + (CS0
<< 1))) {
333 #ifdef SET_STATUS_FLAGS
334 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
340 exponent_q
= exponent_x
+ 2;
343 if (CS
.w
[0] >= 10000000000000000ull) {
344 __mul_64x64_to_128_full (CS2
, CS
.w
[0], B10
);
346 if (CS
.w
[0] != ((CS0
<< 3) + (CS0
<< 1))) {
347 #ifdef SET_STATUS_FLAGS
348 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
358 CS0
= CS
.w
[0] * mul_factor
;
359 __sqr64_fast (CS1
, CS0
)
360 if ((CS1
.w
[0] != A10
.w
[0]) || (CS1
.w
[1] != A10
.w
[1])) {
361 #ifdef SET_STATUS_FLAGS
362 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
370 // get number of digits in CX
371 D
= CX
.w
[1] - power10_index_binexp_128
[bin_expon_cx
].w
[1];
373 || (!D
&& CX
.w
[0] >= power10_index_binexp_128
[bin_expon_cx
].w
[0]))
376 // if exponent is odd, scale coefficient by 10
378 exponent_q
= exponent_x
- scale
;
379 scale
+= (exponent_q
& 1); // exp. bias is even
381 T128
= power10_table_128
[scale
];
382 __mul_128x128_low (C256
, CX
, T128
);
385 CS
.w
[0] = short_sqrt128 (C256
);
387 //printf("CS=%016I64x\n",CS.w[0]);
390 ((exponent_q
- DECIMAL_EXPONENT_BIAS_128
) >> 1) +
391 DECIMAL_EXPONENT_BIAS
;
392 if ((exponent_q
< 0) && (exponent_q
+ MAX_FORMAT_DIGITS
>= 0)) {
393 extra_digits
= -exponent_q
;
396 // get coeff*(2^M[extra_digits])/10^extra_digits
397 __mul_64x64_to_128 (QH
, CS
.w
[0], reciprocals10_64
[extra_digits
]);
399 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
400 amount
= short_recip_scale
[extra_digits
];
402 CS0
= QH
.w
[1] >> amount
;
404 #ifdef SET_STATUS_FLAGS
406 if (CS
.w
[0] != CS0
* power10_table_128
[extra_digits
].w
[0])
410 __set_status_flags (pfpsf
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
416 mul_factor
*= power10_table_128
[extra_digits
].w
[0];
417 __mul_64x64_to_128 (mul_factor2_long
, mul_factor
, mul_factor
);
418 if (mul_factor2_long
.w
[1])
421 mul_factor2
= mul_factor2_long
.w
[1];
424 C4
.w
[1] = (C256
.w
[1] << 2) | (C256
.w
[0] >> 62);
425 C4
.w
[0] = C256
.w
[0] << 2;
427 #ifndef IEEE_ROUND_NEAREST
428 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
429 if (!((rnd_mode
) & 3)) {
432 // compare to midpoints
433 CSM
.w
[0] = (CS
.w
[0] + CS
.w
[0]) | 1;
434 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]);
436 CSM
.w
[0] *= mul_factor
;
438 __mul_64x64_to_128 (M256
, CSM
.w
[0], CSM
.w
[0]);
439 //__mul_128x128_to_256(M256, CSM, CSM);
441 if (C4
.w
[1] > M256
.w
[1] ||
442 (C4
.w
[1] == M256
.w
[1] && C4
.w
[0] > M256
.w
[0])) {
446 C8
.w
[0] = CS
.w
[0] << 3;
450 __mul_64x64_to_128 (C8
, C8
.w
[0], mul_factor2
);
452 __mul_64x128_low (C8
, C8
.w
[0], mul_factor2_long
);
456 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
457 M256
.w
[1] = M256
.w
[1] - C8
.w
[1] - Carry
;
459 // if CSM' > C256, round up
460 if (M256
.w
[1] > C4
.w
[1] ||
461 (M256
.w
[1] == C4
.w
[1] && M256
.w
[0] > C4
.w
[0])) {
467 #ifndef IEEE_ROUND_NEAREST
468 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
472 C8
.w
[0] = CSM
.w
[0] << 1;
474 CSM
.w
[0] *= mul_factor
;
475 __mul_64x64_to_128 (M256
, CSM
.w
[0], CSM
.w
[0]);
479 __mul_64x64_to_128 (C8
, C8
.w
[0], mul_factor2
);
481 __mul_64x128_low (C8
, C8
.w
[0], mul_factor2_long
);
484 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]);
486 if (M256
.w
[1] > C256
.w
[1] ||
487 (M256
.w
[1] == C256
.w
[1] && M256
.w
[0] > C256
.w
[0])) {
488 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
489 M256
.w
[1] = M256
.w
[1] - Carry
- C8
.w
[1];
496 if ((M256
.w
[1] > C256
.w
[1] ||
497 (M256
.w
[1] == C256
.w
[1] && M256
.w
[0] > C256
.w
[0]))
503 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
504 M256
.w
[1] = M256
.w
[1] - Carry
- C8
.w
[1];
510 if (M256
.w
[1] > C256
.w
[1] ||
511 (M256
.w
[1] == C256
.w
[1] && M256
.w
[0] > C256
.w
[0]))
518 /*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]);
519 M256.w[1] = M256.w[1] + Carry + C8.w[1];
526 if(M256.w[1]<C256.w[1] ||
527 (M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0]))
533 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
535 if (((rnd_mode
) != ROUNDING_UP
) || exact
) {
543 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
545 res
= get_BID64 (0, exponent_q
, CS
.w
[0], rnd_mode
, pfpsf
);
546 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
547 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);