1 /* Copyright (C) 2007-2023 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
25 #include "bid_internal.h"
26 #include "bid_sqrt_macros.h"
27 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
30 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
33 BID128_FUNCTION_ARG1 (bid128_sqrt
, x
)
35 UINT256 M256
, C256
, C4
, C8
;
36 UINT128 CX
, CX1
, CX2
, A10
, S2
, T128
, TP128
, CS
, CSM
, res
;
40 int exponent_x
, bin_expon_cx
;
41 int digits
, scale
, exponent_q
;
42 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
43 fexcept_t binaryflags
= 0;
46 // unpack arguments, check for NaN or Infinity
47 if (!unpack_BID128_value (&sign_x
, &exponent_x
, &CX
, x
)) {
51 if ((x
.w
[1] & 0x7c00000000000000ull
) == 0x7c00000000000000ull
) {
52 #ifdef SET_STATUS_FLAGS
53 if ((x
.w
[1] & 0x7e00000000000000ull
) == 0x7e00000000000000ull
) // sNaN
54 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
56 res
.w
[1] = CX
.w
[1] & QUIET_MASK64
;
60 if ((x
.w
[1] & 0x7800000000000000ull
) == 0x7800000000000000ull
) {
64 res
.w
[1] = 0x7c00000000000000ull
;
65 #ifdef SET_STATUS_FLAGS
66 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
75 ((((UINT64
) (exponent_x
+ DECIMAL_EXPONENT_BIAS_128
)) >> 1) << 49);
80 res
.w
[1] = 0x7c00000000000000ull
;
82 #ifdef SET_STATUS_FLAGS
83 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
87 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
88 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
94 fx
.d
= (float) CX
.w
[1] * f64
.d
+ (float) CX
.w
[0];
95 bin_expon_cx
= ((fx
.i
>> 23) & 0xff) - 0x7f;
96 digits
= estimate_decimal_digits
[bin_expon_cx
];
100 A10
.w
[1] = (CX
.w
[1] << 3) | (CX
.w
[0] >> 61);
101 A10
.w
[0] = CX
.w
[0] << 3;
102 CX2
.w
[1] = (CX
.w
[1] << 1) | (CX
.w
[0] >> 63);
103 CX2
.w
[0] = CX
.w
[0] << 1;
104 __add_128_128 (A10
, A10
, CX2
);
107 CS
.w
[0] = short_sqrt128 (A10
);
109 // check for exact result
110 if (CS
.w
[0] * CS
.w
[0] == A10
.w
[0]) {
111 __mul_64x64_to_128_fast (S2
, CS
.w
[0], CS
.w
[0]);
112 if (S2
.w
[1] == A10
.w
[1]) // && S2.w[0]==A10.w[0])
114 get_BID128_very_fast (&res
, 0,
116 DECIMAL_EXPONENT_BIAS_128
) >> 1, CS
);
117 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
118 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
123 // get number of digits in CX
124 D
= CX
.w
[1] - power10_index_binexp_128
[bin_expon_cx
].w
[1];
126 || (!D
&& CX
.w
[0] >= power10_index_binexp_128
[bin_expon_cx
].w
[0]))
129 // if exponent is odd, scale coefficient by 10
131 exponent_q
= exponent_x
- scale
;
132 scale
+= (exponent_q
& 1); // exp. bias is even
135 T128
= power10_table_128
[scale
- 37];
136 __mul_128x128_low (CX1
, CX
, T128
);
138 TP128
= power10_table_128
[37];
139 __mul_128x128_to_256 (C256
, CX1
, TP128
);
141 T128
= power10_table_128
[scale
];
142 __mul_128x128_to_256 (C256
, CX
, T128
);
147 C4
.w
[3] = (C256
.w
[3] << 2) | (C256
.w
[2] >> 62);
148 C4
.w
[2] = (C256
.w
[2] << 2) | (C256
.w
[1] >> 62);
149 C4
.w
[1] = (C256
.w
[1] << 2) | (C256
.w
[0] >> 62);
150 C4
.w
[0] = C256
.w
[0] << 2;
152 long_sqrt128 (&CS
, C256
);
154 #ifndef IEEE_ROUND_NEAREST
155 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
156 if (!((rnd_mode
) & 3)) {
159 // compare to midpoints
160 CSM
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
161 CSM
.w
[0] = (CS
.w
[0] + CS
.w
[0]) | 1;
163 //__mul_128x128_to_256(M256, CSM, CSM);
164 __sqr128_to_256 (M256
, CSM
);
166 if (C4
.w
[3] > M256
.w
[3]
167 || (C4
.w
[3] == M256
.w
[3]
168 && (C4
.w
[2] > M256
.w
[2]
169 || (C4
.w
[2] == M256
.w
[2]
170 && (C4
.w
[1] > M256
.w
[1]
171 || (C4
.w
[1] == M256
.w
[1]
172 && C4
.w
[0] > M256
.w
[0])))))) {
178 C8
.w
[1] = (CS
.w
[1] << 3) | (CS
.w
[0] >> 61);
179 C8
.w
[0] = CS
.w
[0] << 3;
181 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
182 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
183 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
184 M256
.w
[3] = M256
.w
[3] - Carry
;
186 // if CSM' > C256, round up
187 if (M256
.w
[3] > C4
.w
[3]
188 || (M256
.w
[3] == C4
.w
[3]
189 && (M256
.w
[2] > C4
.w
[2]
190 || (M256
.w
[2] == C4
.w
[2]
191 && (M256
.w
[1] > C4
.w
[1]
192 || (M256
.w
[1] == C4
.w
[1]
193 && M256
.w
[0] > C4
.w
[0])))))) {
200 #ifndef IEEE_ROUND_NEAREST
201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
203 __sqr128_to_256 (M256
, CS
);
204 C8
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
205 C8
.w
[0] = CS
.w
[0] << 1;
206 if (M256
.w
[3] > C256
.w
[3]
207 || (M256
.w
[3] == C256
.w
[3]
208 && (M256
.w
[2] > C256
.w
[2]
209 || (M256
.w
[2] == C256
.w
[2]
210 && (M256
.w
[1] > C256
.w
[1]
211 || (M256
.w
[1] == C256
.w
[1]
212 && M256
.w
[0] > C256
.w
[0])))))) {
213 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
214 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
215 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
216 M256
.w
[3] = M256
.w
[3] - Carry
;
231 if (M256
.w
[3] > C256
.w
[3]
232 || (M256
.w
[3] == C256
.w
[3]
233 && (M256
.w
[2] > C256
.w
[2]
234 || (M256
.w
[2] == C256
.w
[2]
235 && (M256
.w
[1] > C256
.w
[1]
236 || (M256
.w
[1] == C256
.w
[1]
237 && M256
.w
[0] > C256
.w
[0])))))) {
246 __add_carry_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
247 __add_carry_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
248 __add_carry_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
249 M256
.w
[3] = M256
.w
[3] + Carry
;
259 if (M256
.w
[3] < C256
.w
[3]
260 || (M256
.w
[3] == C256
.w
[3]
261 && (M256
.w
[2] < C256
.w
[2]
262 || (M256
.w
[2] == C256
.w
[2]
263 && (M256
.w
[1] < C256
.w
[1]
264 || (M256
.w
[1] == C256
.w
[1]
265 && M256
.w
[0] <= C256
.w
[0])))))) {
273 if ((rnd_mode
) == ROUNDING_UP
) {
283 #ifdef SET_STATUS_FLAGS
284 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
286 get_BID128_fast (&res
, 0,
287 (exponent_q
+ DECIMAL_EXPONENT_BIAS_128
) >> 1, CS
);
288 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
289 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
296 BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt
, UINT64
, x
)
298 UINT256 M256
, C256
, C4
, C8
;
299 UINT128 CX
, CX1
, CX2
, A10
, S2
, T128
, TP128
, CS
, CSM
, res
;
300 UINT64 sign_x
, Carry
;
303 int exponent_x
, bin_expon_cx
;
304 int digits
, scale
, exponent_q
;
305 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
306 fexcept_t binaryflags
= 0;
309 // unpack arguments, check for NaN or Infinity
310 // unpack arguments, check for NaN or Infinity
312 if (!unpack_BID64 (&sign_x
, &exponent_x
, &CX
.w
[0], x
)) {
316 if ((x
& 0x7c00000000000000ull
) == 0x7c00000000000000ull
) {
317 #ifdef SET_STATUS_FLAGS
318 if ((x
& SNAN_MASK64
) == SNAN_MASK64
) // sNaN
319 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
321 res
.w
[0] = (CX
.w
[0] & 0x0003ffffffffffffull
);
322 __mul_64x64_to_128 (res
, res
.w
[0], power10_table_128
[18].w
[0]);
323 res
.w
[1] |= ((CX
.w
[0]) & 0xfc00000000000000ull
);
327 if ((x
& 0x7800000000000000ull
) == 0x7800000000000000ull
) {
330 res
.w
[1] = 0x7c00000000000000ull
;
331 #ifdef SET_STATUS_FLAGS
332 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
340 exponent_x
- DECIMAL_EXPONENT_BIAS
+ DECIMAL_EXPONENT_BIAS_128
;
342 sign_x
| ((((UINT64
) (exponent_x
+ DECIMAL_EXPONENT_BIAS_128
)) >> 1)
348 res
.w
[1] = 0x7c00000000000000ull
;
350 #ifdef SET_STATUS_FLAGS
351 __set_status_flags (pfpsf
, INVALID_EXCEPTION
);
355 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
356 (void) fegetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
359 exponent_x
- DECIMAL_EXPONENT_BIAS
+ DECIMAL_EXPONENT_BIAS_128
;
365 fx
.d
= (float) CX
.w
[1] * f64
.d
+ (float) CX
.w
[0];
366 bin_expon_cx
= ((fx
.i
>> 23) & 0xff) - 0x7f;
367 digits
= estimate_decimal_digits
[bin_expon_cx
];
370 if (exponent_x
& 1) {
371 A10
.w
[1] = (CX
.w
[1] << 3) | (CX
.w
[0] >> 61);
372 A10
.w
[0] = CX
.w
[0] << 3;
373 CX2
.w
[1] = (CX
.w
[1] << 1) | (CX
.w
[0] >> 63);
374 CX2
.w
[0] = CX
.w
[0] << 1;
375 __add_128_128 (A10
, A10
, CX2
);
378 CS
.w
[0] = short_sqrt128 (A10
);
380 // check for exact result
381 if (CS
.w
[0] * CS
.w
[0] == A10
.w
[0]) {
382 __mul_64x64_to_128_fast (S2
, CS
.w
[0], CS
.w
[0]);
383 if (S2
.w
[1] == A10
.w
[1]) {
384 get_BID128_very_fast (&res
, 0,
385 (exponent_x
+ DECIMAL_EXPONENT_BIAS_128
) >> 1,
387 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
388 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);
393 // get number of digits in CX
394 D
= CX
.w
[1] - power10_index_binexp_128
[bin_expon_cx
].w
[1];
396 || (!D
&& CX
.w
[0] >= power10_index_binexp_128
[bin_expon_cx
].w
[0]))
399 // if exponent is odd, scale coefficient by 10
401 exponent_q
= exponent_x
- scale
;
402 scale
+= (exponent_q
& 1); // exp. bias is even
405 T128
= power10_table_128
[scale
- 37];
406 __mul_128x128_low (CX1
, CX
, T128
);
408 TP128
= power10_table_128
[37];
409 __mul_128x128_to_256 (C256
, CX1
, TP128
);
411 T128
= power10_table_128
[scale
];
412 __mul_128x128_to_256 (C256
, CX
, T128
);
417 C4
.w
[3] = (C256
.w
[3] << 2) | (C256
.w
[2] >> 62);
418 C4
.w
[2] = (C256
.w
[2] << 2) | (C256
.w
[1] >> 62);
419 C4
.w
[1] = (C256
.w
[1] << 2) | (C256
.w
[0] >> 62);
420 C4
.w
[0] = C256
.w
[0] << 2;
422 long_sqrt128 (&CS
, C256
);
424 #ifndef IEEE_ROUND_NEAREST
425 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
426 if (!((rnd_mode
) & 3)) {
429 // compare to midpoints
430 CSM
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
431 CSM
.w
[0] = (CS
.w
[0] + CS
.w
[0]) | 1;
433 //__mul_128x128_to_256(M256, CSM, CSM);
434 __sqr128_to_256 (M256
, CSM
);
436 if (C4
.w
[3] > M256
.w
[3]
437 || (C4
.w
[3] == M256
.w
[3]
438 && (C4
.w
[2] > M256
.w
[2]
439 || (C4
.w
[2] == M256
.w
[2]
440 && (C4
.w
[1] > M256
.w
[1]
441 || (C4
.w
[1] == M256
.w
[1]
442 && C4
.w
[0] > M256
.w
[0])))))) {
448 C8
.w
[1] = (CS
.w
[1] << 3) | (CS
.w
[0] >> 61);
449 C8
.w
[0] = CS
.w
[0] << 3;
451 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
452 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
453 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
454 M256
.w
[3] = M256
.w
[3] - Carry
;
456 // if CSM' > C256, round up
457 if (M256
.w
[3] > C4
.w
[3]
458 || (M256
.w
[3] == C4
.w
[3]
459 && (M256
.w
[2] > C4
.w
[2]
460 || (M256
.w
[2] == C4
.w
[2]
461 && (M256
.w
[1] > C4
.w
[1]
462 || (M256
.w
[1] == C4
.w
[1]
463 && M256
.w
[0] > C4
.w
[0])))))) {
470 #ifndef IEEE_ROUND_NEAREST
471 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
473 __sqr128_to_256 (M256
, CS
);
474 C8
.w
[1] = (CS
.w
[1] << 1) | (CS
.w
[0] >> 63);
475 C8
.w
[0] = CS
.w
[0] << 1;
476 if (M256
.w
[3] > C256
.w
[3]
477 || (M256
.w
[3] == C256
.w
[3]
478 && (M256
.w
[2] > C256
.w
[2]
479 || (M256
.w
[2] == C256
.w
[2]
480 && (M256
.w
[1] > C256
.w
[1]
481 || (M256
.w
[1] == C256
.w
[1]
482 && M256
.w
[0] > C256
.w
[0])))))) {
483 __sub_borrow_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
484 __sub_borrow_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
485 __sub_borrow_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
486 M256
.w
[3] = M256
.w
[3] - Carry
;
501 if (M256
.w
[3] > C256
.w
[3]
502 || (M256
.w
[3] == C256
.w
[3]
503 && (M256
.w
[2] > C256
.w
[2]
504 || (M256
.w
[2] == C256
.w
[2]
505 && (M256
.w
[1] > C256
.w
[1]
506 || (M256
.w
[1] == C256
.w
[1]
507 && M256
.w
[0] > C256
.w
[0])))))) {
516 __add_carry_out (M256
.w
[0], Carry
, M256
.w
[0], C8
.w
[0]);
517 __add_carry_in_out (M256
.w
[1], Carry
, M256
.w
[1], C8
.w
[1], Carry
);
518 __add_carry_in_out (M256
.w
[2], Carry
, M256
.w
[2], 0, Carry
);
519 M256
.w
[3] = M256
.w
[3] + Carry
;
529 if (M256
.w
[3] < C256
.w
[3]
530 || (M256
.w
[3] == C256
.w
[3]
531 && (M256
.w
[2] < C256
.w
[2]
532 || (M256
.w
[2] == C256
.w
[2]
533 && (M256
.w
[1] < C256
.w
[1]
534 || (M256
.w
[1] == C256
.w
[1]
535 && M256
.w
[0] <= C256
.w
[0])))))) {
543 if ((rnd_mode
) == ROUNDING_UP
) {
553 #ifdef SET_STATUS_FLAGS
554 __set_status_flags (pfpsf
, INEXACT_EXCEPTION
);
556 get_BID128_fast (&res
, 0, (exponent_q
+ DECIMAL_EXPONENT_BIAS_128
) >> 1,
558 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
559 (void) fesetexceptflag (&binaryflags
, FE_ALL_FLAGS
);