1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "bid_internal.h"
31 #include "bid_sqrt_macros.h"
32 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
35 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
38 BID128_FUNCTION_ARG1 (bid128_sqrt
, x
)
40 UINT256 M256
, C256
, C4
, C8
;
41 UINT128 CX
, CX1
, CX2
, A10
, S2
, T128
, TP128
, CS
, CSM
, res
;
45 int exponent_x
, bin_expon_cx
;
46 int digits
, scale
, exponent_q
;
47 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
48 fexcept_t binaryflags
= 0;
51 // unpack arguments, check for NaN or Infinity
52 if (!unpack_BID128_value (&sign_x
, &exponent_x
, &CX
, x
)) {
56 if ((x
.w
[1] & 0x7c00000000000000ull
) == 0x7c00000000000000ull
) {
57 #ifdef SET_STATUS_FLAGS
58 if ((x
.w
[1] & 0x7e00000000000000ull
) == 0x7e00000000000000ull
) // sNaN
59 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
61 res
.w
[1] = CX
.w
[1] & QUIET_MASK64
;
65 if ((x
.w
[1] & 0x7800000000000000ull
) == 0x7800000000000000ull
) {
69 res
.w
[1] = 0x7c00000000000000ull
;
70 #ifdef SET_STATUS_FLAGS
71 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
80 ((((UINT64
) (exponent_x
+ DECIMAL_EXPONENT_BIAS_128
)) >> 1) << 49);
85 res
.w
[1] = 0x7c00000000000000ull
;
87 #ifdef SET_STATUS_FLAGS
88 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
92 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
93 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
99 fx
.d
= (float) CX
.w
[1] * f64
.d
+ (float) CX
.w
[0];
100 bin_expon_cx
= ((fx
.i
>> 23) & 0xff) - 0x7f;
101 digits
= estimate_decimal_digits
[bin_expon_cx
];
104 if (exponent_x
& 1) {
105 A10
.w
[1] = (CX
.w
[1] << 3) | (CX
.w
[0] >> 61);
106 A10
.w
[0] = CX
.w
[0] << 3;
107 CX2
.w
[1] = (CX
.w
[1] << 1) | (CX
.w
[0] >> 63);
108 CX2
.w
[0] = CX
.w
[0] << 1;
109 __add_128_128 (A10
, A10
, CX2
);
112 CS
.w
[0] = short_sqrt128 (A10
);
114 // check for exact result
115 if (CS
.w
[0] * CS
.w
[0] == A10
.w
[0]) {
116 __mul_64x64_to_128_fast (S2
, CS
.w
[0], CS
.w
[0]);
117 if (S2
.w
[1] == A10
.w
[1]) // && S2.w[0]==A10.w[0])
119 get_BID128_very_fast (&res
, 0,
121 DECIMAL_EXPONENT_BIAS_128
) >> 1, CS
);
122 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
123 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
128 // get number of digits in CX
129 D
= CX
.w
[1] - power10_index_binexp_128
[bin_expon_cx
].w
[1];
131 || (!D
&& CX
.w
[0] >= power10_index_binexp_128
[bin_expon_cx
].w
[0]))
134 // if exponent is odd, scale coefficient by 10
136 exponent_q
= exponent_x
- scale
;
137 scale
+= (exponent_q
& 1); // exp. bias is even
140 T128
= power10_table_128
[scale
- 37];
141 __mul_128x128_low (CX1
, CX
, T128
);
143 TP128
= power10_table_128
[37];
144 __mul_128x128_to_256 (C256
, CX1
, TP128
);
146 T128
= power10_table_128
[scale
];
147 __mul_128x128_to_256 (C256
, CX
, T128
);
152 C4
.w
[3] = (C256
.w
[3] << 2) | (C256
.w
[2] >> 62);
153 C4
.w
[2] = (C256
.w
[2] << 2) | (C256
.w
[1] >> 62);
154 C4
.w
[1] = (C256
.w
[1] << 2) | (C256
.w
[0] >> 62);
155 C4
.w
[0] = C256
.w
[0] << 2;
157 long_sqrt128 (&CS
, C256
);
159 #ifndef IEEE_ROUND_NEAREST
160 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
161 if (!((rnd_mode
) & 3)) {
164 // compare to midpoints
165 CSM
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
166 CSM
.w
[0] = (CS
.w
[0] + CS
.w
[0]) | 1;
168 //__mul_128x128_to_256(M256, CSM, CSM);
169 __sqr128_to_256 (M256
, CSM
);
171 if (C4
.w
[3] > M256
.w
[3]
172 || (C4
.w
[3] == M256
.w
[3]
173 && (C4
.w
[2] > M256
.w
[2]
174 || (C4
.w
[2] == M256
.w
[2]
175 && (C4
.w
[1] > M256
.w
[1]
176 || (C4
.w
[1] == M256
.w
[1]
177 && C4
.w
[0] > M256
.w
[0])))))) {
183 C8
.w
[1] = (CS
.w
[1] << 3) | (CS
.w
[0] >> 61);
184 C8
.w
[0] = CS
.w
[0] << 3;
186 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
187 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
188 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
189 M256
.w
[3] = M256
.w
[3] - Carry
;
191 // if CSM' > C256, round up
192 if (M256
.w
[3] > C4
.w
[3]
193 || (M256
.w
[3] == C4
.w
[3]
194 && (M256
.w
[2] > C4
.w
[2]
195 || (M256
.w
[2] == C4
.w
[2]
196 && (M256
.w
[1] > C4
.w
[1]
197 || (M256
.w
[1] == C4
.w
[1]
198 && M256
.w
[0] > C4
.w
[0])))))) {
205 #ifndef IEEE_ROUND_NEAREST
206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
208 __sqr128_to_256 (M256
, CS
);
209 C8
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
210 C8
.w
[0] = CS
.w
[0] << 1;
211 if (M256
.w
[3] > C256
.w
[3]
212 || (M256
.w
[3] == C256
.w
[3]
213 && (M256
.w
[2] > C256
.w
[2]
214 || (M256
.w
[2] == C256
.w
[2]
215 && (M256
.w
[1] > C256
.w
[1]
216 || (M256
.w
[1] == C256
.w
[1]
217 && M256
.w
[0] > C256
.w
[0])))))) {
218 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
219 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
220 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
221 M256
.w
[3] = M256
.w
[3] - Carry
;
236 if (M256
.w
[3] > C256
.w
[3]
237 || (M256
.w
[3] == C256
.w
[3]
238 && (M256
.w
[2] > C256
.w
[2]
239 || (M256
.w
[2] == C256
.w
[2]
240 && (M256
.w
[1] > C256
.w
[1]
241 || (M256
.w
[1] == C256
.w
[1]
242 && M256
.w
[0] > C256
.w
[0])))))) {
251 __add_carry_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
252 __add_carry_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
253 __add_carry_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
254 M256
.w
[3] = M256
.w
[3] + Carry
;
264 if (M256
.w
[3] < C256
.w
[3]
265 || (M256
.w
[3] == C256
.w
[3]
266 && (M256
.w
[2] < C256
.w
[2]
267 || (M256
.w
[2] == C256
.w
[2]
268 && (M256
.w
[1] < C256
.w
[1]
269 || (M256
.w
[1] == C256
.w
[1]
270 && M256
.w
[0] <= C256
.w
[0])))))) {
278 if ((rnd_mode
) == ROUNDING_UP
) {
288 #ifdef SET_STATUS_FLAGS
289 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
291 get_BID128_fast (&res
, 0,
292 (exponent_q
+ DECIMAL_EXPONENT_BIAS_128
) >> 1, CS
);
293 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
294 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
301 BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt
, UINT64
, x
)
303 UINT256 M256
, C256
, C4
, C8
;
304 UINT128 CX
, CX1
, CX2
, A10
, S2
, T128
, TP128
, CS
, CSM
, res
;
305 UINT64 sign_x
, Carry
;
308 int exponent_x
, bin_expon_cx
;
309 int digits
, scale
, exponent_q
;
310 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
311 fexcept_t binaryflags
= 0;
314 // unpack arguments, check for NaN or Infinity
315 // unpack arguments, check for NaN or Infinity
317 if (!unpack_BID64 (&sign_x
, &exponent_x
, &CX
.w
[0], x
)) {
321 if ((x
& 0x7c00000000000000ull
) == 0x7c00000000000000ull
) {
322 #ifdef SET_STATUS_FLAGS
323 if ((x
& SNAN_MASK64
) == SNAN_MASK64
) // sNaN
324 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
326 res
.w
[0] = (CX
.w
[0] & 0x0003ffffffffffffull
);
327 __mul_64x64_to_128 (res
, res
.w
[0], power10_table_128
[18].w
[0]);
328 res
.w
[1] |= ((CX
.w
[0]) & 0xfc00000000000000ull
);
332 if ((x
& 0x7800000000000000ull
) == 0x7800000000000000ull
) {
335 res
.w
[1] = 0x7c00000000000000ull
;
336 #ifdef SET_STATUS_FLAGS
337 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
345 exponent_x
- DECIMAL_EXPONENT_BIAS
+ DECIMAL_EXPONENT_BIAS_128
;
347 sign_x
| ((((UINT64
) (exponent_x
+ DECIMAL_EXPONENT_BIAS_128
)) >> 1)
353 res
.w
[1] = 0x7c00000000000000ull
;
355 #ifdef SET_STATUS_FLAGS
356 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
360 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
361 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
364 exponent_x
- DECIMAL_EXPONENT_BIAS
+ DECIMAL_EXPONENT_BIAS_128
;
370 fx
.d
= (float) CX
.w
[1] * f64
.d
+ (float) CX
.w
[0];
371 bin_expon_cx
= ((fx
.i
>> 23) & 0xff) - 0x7f;
372 digits
= estimate_decimal_digits
[bin_expon_cx
];
375 if (exponent_x
& 1) {
376 A10
.w
[1] = (CX
.w
[1] << 3) | (CX
.w
[0] >> 61);
377 A10
.w
[0] = CX
.w
[0] << 3;
378 CX2
.w
[1] = (CX
.w
[1] << 1) | (CX
.w
[0] >> 63);
379 CX2
.w
[0] = CX
.w
[0] << 1;
380 __add_128_128 (A10
, A10
, CX2
);
383 CS
.w
[0] = short_sqrt128 (A10
);
385 // check for exact result
386 if (CS
.w
[0] * CS
.w
[0] == A10
.w
[0]) {
387 __mul_64x64_to_128_fast (S2
, CS
.w
[0], CS
.w
[0]);
388 if (S2
.w
[1] == A10
.w
[1]) {
389 get_BID128_very_fast (&res
, 0,
390 (exponent_x
+ DECIMAL_EXPONENT_BIAS_128
) >> 1,
392 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
393 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
398 // get number of digits in CX
399 D
= CX
.w
[1] - power10_index_binexp_128
[bin_expon_cx
].w
[1];
401 || (!D
&& CX
.w
[0] >= power10_index_binexp_128
[bin_expon_cx
].w
[0]))
404 // if exponent is odd, scale coefficient by 10
406 exponent_q
= exponent_x
- scale
;
407 scale
+= (exponent_q
& 1); // exp. bias is even
410 T128
= power10_table_128
[scale
- 37];
411 __mul_128x128_low (CX1
, CX
, T128
);
413 TP128
= power10_table_128
[37];
414 __mul_128x128_to_256 (C256
, CX1
, TP128
);
416 T128
= power10_table_128
[scale
];
417 __mul_128x128_to_256 (C256
, CX
, T128
);
422 C4
.w
[3] = (C256
.w
[3] << 2) | (C256
.w
[2] >> 62);
423 C4
.w
[2] = (C256
.w
[2] << 2) | (C256
.w
[1] >> 62);
424 C4
.w
[1] = (C256
.w
[1] << 2) | (C256
.w
[0] >> 62);
425 C4
.w
[0] = C256
.w
[0] << 2;
427 long_sqrt128 (&CS
, C256
);
429 #ifndef IEEE_ROUND_NEAREST
430 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
431 if (!((rnd_mode
) & 3)) {
434 // compare to midpoints
435 CSM
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
436 CSM
.w
[0] = (CS
.w
[0] + CS
.w
[0]) | 1;
438 //__mul_128x128_to_256(M256, CSM, CSM);
439 __sqr128_to_256 (M256
, CSM
);
441 if (C4
.w
[3] > M256
.w
[3]
442 || (C4
.w
[3] == M256
.w
[3]
443 && (C4
.w
[2] > M256
.w
[2]
444 || (C4
.w
[2] == M256
.w
[2]
445 && (C4
.w
[1] > M256
.w
[1]
446 || (C4
.w
[1] == M256
.w
[1]
447 && C4
.w
[0] > M256
.w
[0])))))) {
453 C8
.w
[1] = (CS
.w
[1] << 3) | (CS
.w
[0] >> 61);
454 C8
.w
[0] = CS
.w
[0] << 3;
456 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
457 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
458 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
459 M256
.w
[3] = M256
.w
[3] - Carry
;
461 // if CSM' > C256, round up
462 if (M256
.w
[3] > C4
.w
[3]
463 || (M256
.w
[3] == C4
.w
[3]
464 && (M256
.w
[2] > C4
.w
[2]
465 || (M256
.w
[2] == C4
.w
[2]
466 && (M256
.w
[1] > C4
.w
[1]
467 || (M256
.w
[1] == C4
.w
[1]
468 && M256
.w
[0] > C4
.w
[0])))))) {
475 #ifndef IEEE_ROUND_NEAREST
476 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
478 __sqr128_to_256 (M256
, CS
);
479 C8
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
480 C8
.w
[0] = CS
.w
[0] << 1;
481 if (M256
.w
[3] > C256
.w
[3]
482 || (M256
.w
[3] == C256
.w
[3]
483 && (M256
.w
[2] > C256
.w
[2]
484 || (M256
.w
[2] == C256
.w
[2]
485 && (M256
.w
[1] > C256
.w
[1]
486 || (M256
.w
[1] == C256
.w
[1]
487 && M256
.w
[0] > C256
.w
[0])))))) {
488 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
489 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
490 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
491 M256
.w
[3] = M256
.w
[3] - Carry
;
506 if (M256
.w
[3] > C256
.w
[3]
507 || (M256
.w
[3] == C256
.w
[3]
508 && (M256
.w
[2] > C256
.w
[2]
509 || (M256
.w
[2] == C256
.w
[2]
510 && (M256
.w
[1] > C256
.w
[1]
511 || (M256
.w
[1] == C256
.w
[1]
512 && M256
.w
[0] > C256
.w
[0])))))) {
521 __add_carry_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
522 __add_carry_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
523 __add_carry_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
524 M256
.w
[3] = M256
.w
[3] + Carry
;
534 if (M256
.w
[3] < C256
.w
[3]
535 || (M256
.w
[3] == C256
.w
[3]
536 && (M256
.w
[2] < C256
.w
[2]
537 || (M256
.w
[2] == C256
.w
[2]
538 && (M256
.w
[1] < C256
.w
[1]
539 || (M256
.w
[1] == C256
.w
[1]
540 && M256
.w
[0] <= C256
.w
[0])))))) {
548 if ((rnd_mode
) == ROUNDING_UP
) {
558 #ifdef SET_STATUS_FLAGS
559 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
561 get_BID128_fast (&res
, 0, (exponent_q
+ DECIMAL_EXPONENT_BIAS_128
) >> 1,
563 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
564 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);