Merge -r 127928:132243 from trunk
[official-gcc.git] / libgcc / config / libbid / bid128_sqrt.c
blob0677c39e95175576cb002480369af337d3b15f34
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
29 #define BID_128RES
30 #include "bid_internal.h"
31 #include "bid_sqrt_macros.h"
32 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
33 #include <fenv.h>
35 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
36 #endif
38 BID128_FUNCTION_ARG1 (bid128_sqrt, x)
40 UINT256 M256, C256, C4, C8;
41 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
42 UINT64 sign_x, Carry;
43 SINT64 D;
44 int_float fx, f64;
45 int exponent_x, bin_expon_cx;
46 int digits, scale, exponent_q;
47 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
48 fexcept_t binaryflags = 0;
49 #endif
51 // unpack arguments, check for NaN or Infinity
52 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
53 res.w[1] = CX.w[1];
54 res.w[0] = CX.w[0];
55 // NaN ?
56 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
57 #ifdef SET_STATUS_FLAGS
58 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
59 __set_status_flags (pfpsf, INVALID_EXCEPTION);
60 #endif
61 res.w[1] = CX.w[1] & QUIET_MASK64;
62 BID_RETURN (res);
64 // x is Infinity?
65 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
66 res.w[1] = CX.w[1];
67 if (sign_x) {
68 // -Inf, return NaN
69 res.w[1] = 0x7c00000000000000ull;
70 #ifdef SET_STATUS_FLAGS
71 __set_status_flags (pfpsf, INVALID_EXCEPTION);
72 #endif
74 BID_RETURN (res);
76 // x is 0 otherwise
78 res.w[1] =
79 sign_x |
80 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49);
81 res.w[0] = 0;
82 BID_RETURN (res);
84 if (sign_x) {
85 res.w[1] = 0x7c00000000000000ull;
86 res.w[0] = 0;
87 #ifdef SET_STATUS_FLAGS
88 __set_status_flags (pfpsf, INVALID_EXCEPTION);
89 #endif
90 BID_RETURN (res);
92 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
93 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
94 #endif
95 // 2^64
96 f64.i = 0x5f800000;
98 // fx ~ CX
99 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
100 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
101 digits = estimate_decimal_digits[bin_expon_cx];
103 A10 = CX;
104 if (exponent_x & 1) {
105 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
106 A10.w[0] = CX.w[0] << 3;
107 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
108 CX2.w[0] = CX.w[0] << 1;
109 __add_128_128 (A10, A10, CX2);
112 CS.w[0] = short_sqrt128 (A10);
113 CS.w[1] = 0;
114 // check for exact result
115 if (CS.w[0] * CS.w[0] == A10.w[0]) {
116 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
117 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
119 get_BID128_very_fast (&res, 0,
120 (exponent_x +
121 DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
122 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
123 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
124 #endif
125 BID_RETURN (res);
128 // get number of digits in CX
129 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
130 if (D > 0
131 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
132 digits++;
134 // if exponent is odd, scale coefficient by 10
135 scale = 67 - digits;
136 exponent_q = exponent_x - scale;
137 scale += (exponent_q & 1); // exp. bias is even
139 if (scale > 38) {
140 T128 = power10_table_128[scale - 37];
141 __mul_128x128_low (CX1, CX, T128);
143 TP128 = power10_table_128[37];
144 __mul_128x128_to_256 (C256, CX1, TP128);
145 } else {
146 T128 = power10_table_128[scale];
147 __mul_128x128_to_256 (C256, CX, T128);
151 // 4*C256
152 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
153 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
154 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
155 C4.w[0] = C256.w[0] << 2;
157 long_sqrt128 (&CS, C256);
159 #ifndef IEEE_ROUND_NEAREST
160 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
161 if (!((rnd_mode) & 3)) {
162 #endif
163 #endif
164 // compare to midpoints
165 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
166 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
167 // CSM^2
168 //__mul_128x128_to_256(M256, CSM, CSM);
169 __sqr128_to_256 (M256, CSM);
171 if (C4.w[3] > M256.w[3]
172 || (C4.w[3] == M256.w[3]
173 && (C4.w[2] > M256.w[2]
174 || (C4.w[2] == M256.w[2]
175 && (C4.w[1] > M256.w[1]
176 || (C4.w[1] == M256.w[1]
177 && C4.w[0] > M256.w[0])))))) {
178 // round up
179 CS.w[0]++;
180 if (!CS.w[0])
181 CS.w[1]++;
182 } else {
183 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
184 C8.w[0] = CS.w[0] << 3;
185 // M256 - 8*CSM
186 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
187 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
188 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
189 M256.w[3] = M256.w[3] - Carry;
191 // if CSM' > C256, round up
192 if (M256.w[3] > C4.w[3]
193 || (M256.w[3] == C4.w[3]
194 && (M256.w[2] > C4.w[2]
195 || (M256.w[2] == C4.w[2]
196 && (M256.w[1] > C4.w[1]
197 || (M256.w[1] == C4.w[1]
198 && M256.w[0] > C4.w[0])))))) {
199 // round down
200 if (!CS.w[0])
201 CS.w[1]--;
202 CS.w[0]--;
205 #ifndef IEEE_ROUND_NEAREST
206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
207 } else {
208 __sqr128_to_256 (M256, CS);
209 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
210 C8.w[0] = CS.w[0] << 1;
211 if (M256.w[3] > C256.w[3]
212 || (M256.w[3] == C256.w[3]
213 && (M256.w[2] > C256.w[2]
214 || (M256.w[2] == C256.w[2]
215 && (M256.w[1] > C256.w[1]
216 || (M256.w[1] == C256.w[1]
217 && M256.w[0] > C256.w[0])))))) {
218 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
219 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
220 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
221 M256.w[3] = M256.w[3] - Carry;
222 M256.w[0]++;
223 if (!M256.w[0]) {
224 M256.w[1]++;
225 if (!M256.w[1]) {
226 M256.w[2]++;
227 if (!M256.w[2])
228 M256.w[3]++;
232 if (!CS.w[0])
233 CS.w[1]--;
234 CS.w[0]--;
236 if (M256.w[3] > C256.w[3]
237 || (M256.w[3] == C256.w[3]
238 && (M256.w[2] > C256.w[2]
239 || (M256.w[2] == C256.w[2]
240 && (M256.w[1] > C256.w[1]
241 || (M256.w[1] == C256.w[1]
242 && M256.w[0] > C256.w[0])))))) {
244 if (!CS.w[0])
245 CS.w[1]--;
246 CS.w[0]--;
250 else {
251 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
252 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
253 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
254 M256.w[3] = M256.w[3] + Carry;
255 M256.w[0]++;
256 if (!M256.w[0]) {
257 M256.w[1]++;
258 if (!M256.w[1]) {
259 M256.w[2]++;
260 if (!M256.w[2])
261 M256.w[3]++;
264 if (M256.w[3] < C256.w[3]
265 || (M256.w[3] == C256.w[3]
266 && (M256.w[2] < C256.w[2]
267 || (M256.w[2] == C256.w[2]
268 && (M256.w[1] < C256.w[1]
269 || (M256.w[1] == C256.w[1]
270 && M256.w[0] <= C256.w[0])))))) {
272 CS.w[0]++;
273 if (!CS.w[0])
274 CS.w[1]++;
277 // RU?
278 if ((rnd_mode) == ROUNDING_UP) {
279 CS.w[0]++;
280 if (!CS.w[0])
281 CS.w[1]++;
285 #endif
286 #endif
288 #ifdef SET_STATUS_FLAGS
289 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
290 #endif
291 get_BID128_fast (&res, 0,
292 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
293 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
294 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
295 #endif
296 BID_RETURN (res);
301 BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x)
303 UINT256 M256, C256, C4, C8;
304 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
305 UINT64 sign_x, Carry;
306 SINT64 D;
307 int_float fx, f64;
308 int exponent_x, bin_expon_cx;
309 int digits, scale, exponent_q;
310 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
311 fexcept_t binaryflags = 0;
312 #endif
314 // unpack arguments, check for NaN or Infinity
315 // unpack arguments, check for NaN or Infinity
316 CX.w[1] = 0;
317 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) {
318 res.w[1] = CX.w[0];
319 res.w[0] = 0;
320 // NaN ?
321 if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
322 #ifdef SET_STATUS_FLAGS
323 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
324 __set_status_flags (pfpsf, INVALID_EXCEPTION);
325 #endif
326 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
327 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
328 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
329 BID_RETURN (res);
331 // x is Infinity?
332 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
333 if (sign_x) {
334 // -Inf, return NaN
335 res.w[1] = 0x7c00000000000000ull;
336 #ifdef SET_STATUS_FLAGS
337 __set_status_flags (pfpsf, INVALID_EXCEPTION);
338 #endif
340 BID_RETURN (res);
342 // x is 0 otherwise
344 exponent_x =
345 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
346 res.w[1] =
347 sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1)
348 << 49);
349 res.w[0] = 0;
350 BID_RETURN (res);
352 if (sign_x) {
353 res.w[1] = 0x7c00000000000000ull;
354 res.w[0] = 0;
355 #ifdef SET_STATUS_FLAGS
356 __set_status_flags (pfpsf, INVALID_EXCEPTION);
357 #endif
358 BID_RETURN (res);
360 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
361 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
362 #endif
363 exponent_x =
364 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
366 // 2^64
367 f64.i = 0x5f800000;
369 // fx ~ CX
370 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
371 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
372 digits = estimate_decimal_digits[bin_expon_cx];
374 A10 = CX;
375 if (exponent_x & 1) {
376 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
377 A10.w[0] = CX.w[0] << 3;
378 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
379 CX2.w[0] = CX.w[0] << 1;
380 __add_128_128 (A10, A10, CX2);
383 CS.w[0] = short_sqrt128 (A10);
384 CS.w[1] = 0;
385 // check for exact result
386 if (CS.w[0] * CS.w[0] == A10.w[0]) {
387 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
388 if (S2.w[1] == A10.w[1]) {
389 get_BID128_very_fast (&res, 0,
390 (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1,
391 CS);
392 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
393 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
394 #endif
395 BID_RETURN (res);
398 // get number of digits in CX
399 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
400 if (D > 0
401 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
402 digits++;
404 // if exponent is odd, scale coefficient by 10
405 scale = 67 - digits;
406 exponent_q = exponent_x - scale;
407 scale += (exponent_q & 1); // exp. bias is even
409 if (scale > 38) {
410 T128 = power10_table_128[scale - 37];
411 __mul_128x128_low (CX1, CX, T128);
413 TP128 = power10_table_128[37];
414 __mul_128x128_to_256 (C256, CX1, TP128);
415 } else {
416 T128 = power10_table_128[scale];
417 __mul_128x128_to_256 (C256, CX, T128);
421 // 4*C256
422 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
423 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
424 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
425 C4.w[0] = C256.w[0] << 2;
427 long_sqrt128 (&CS, C256);
429 #ifndef IEEE_ROUND_NEAREST
430 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
431 if (!((rnd_mode) & 3)) {
432 #endif
433 #endif
434 // compare to midpoints
435 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
436 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
437 // CSM^2
438 //__mul_128x128_to_256(M256, CSM, CSM);
439 __sqr128_to_256 (M256, CSM);
441 if (C4.w[3] > M256.w[3]
442 || (C4.w[3] == M256.w[3]
443 && (C4.w[2] > M256.w[2]
444 || (C4.w[2] == M256.w[2]
445 && (C4.w[1] > M256.w[1]
446 || (C4.w[1] == M256.w[1]
447 && C4.w[0] > M256.w[0])))))) {
448 // round up
449 CS.w[0]++;
450 if (!CS.w[0])
451 CS.w[1]++;
452 } else {
453 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
454 C8.w[0] = CS.w[0] << 3;
455 // M256 - 8*CSM
456 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
457 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
458 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
459 M256.w[3] = M256.w[3] - Carry;
461 // if CSM' > C256, round up
462 if (M256.w[3] > C4.w[3]
463 || (M256.w[3] == C4.w[3]
464 && (M256.w[2] > C4.w[2]
465 || (M256.w[2] == C4.w[2]
466 && (M256.w[1] > C4.w[1]
467 || (M256.w[1] == C4.w[1]
468 && M256.w[0] > C4.w[0])))))) {
469 // round down
470 if (!CS.w[0])
471 CS.w[1]--;
472 CS.w[0]--;
475 #ifndef IEEE_ROUND_NEAREST
476 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
477 } else {
478 __sqr128_to_256 (M256, CS);
479 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
480 C8.w[0] = CS.w[0] << 1;
481 if (M256.w[3] > C256.w[3]
482 || (M256.w[3] == C256.w[3]
483 && (M256.w[2] > C256.w[2]
484 || (M256.w[2] == C256.w[2]
485 && (M256.w[1] > C256.w[1]
486 || (M256.w[1] == C256.w[1]
487 && M256.w[0] > C256.w[0])))))) {
488 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
489 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
490 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
491 M256.w[3] = M256.w[3] - Carry;
492 M256.w[0]++;
493 if (!M256.w[0]) {
494 M256.w[1]++;
495 if (!M256.w[1]) {
496 M256.w[2]++;
497 if (!M256.w[2])
498 M256.w[3]++;
502 if (!CS.w[0])
503 CS.w[1]--;
504 CS.w[0]--;
506 if (M256.w[3] > C256.w[3]
507 || (M256.w[3] == C256.w[3]
508 && (M256.w[2] > C256.w[2]
509 || (M256.w[2] == C256.w[2]
510 && (M256.w[1] > C256.w[1]
511 || (M256.w[1] == C256.w[1]
512 && M256.w[0] > C256.w[0])))))) {
514 if (!CS.w[0])
515 CS.w[1]--;
516 CS.w[0]--;
520 else {
521 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
522 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
523 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
524 M256.w[3] = M256.w[3] + Carry;
525 M256.w[0]++;
526 if (!M256.w[0]) {
527 M256.w[1]++;
528 if (!M256.w[1]) {
529 M256.w[2]++;
530 if (!M256.w[2])
531 M256.w[3]++;
534 if (M256.w[3] < C256.w[3]
535 || (M256.w[3] == C256.w[3]
536 && (M256.w[2] < C256.w[2]
537 || (M256.w[2] == C256.w[2]
538 && (M256.w[1] < C256.w[1]
539 || (M256.w[1] == C256.w[1]
540 && M256.w[0] <= C256.w[0])))))) {
542 CS.w[0]++;
543 if (!CS.w[0])
544 CS.w[1]++;
547 // RU?
548 if ((rnd_mode) == ROUNDING_UP) {
549 CS.w[0]++;
550 if (!CS.w[0])
551 CS.w[1]++;
555 #endif
556 #endif
558 #ifdef SET_STATUS_FLAGS
559 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
560 #endif
561 get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1,
562 CS);
563 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
564 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
565 #endif
566 BID_RETURN (res);