2007-09-27 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / libgcc / config / libbid / bid64_fma.c
blob6073d59a39bb1c2462faeb3a6189e8f44c0ba93b
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
29 /*****************************************************************************
30 * BID64 fma
31 *****************************************************************************
33 * Algorithm description:
35 * if multiplication is guranteed exact (short coefficients)
36 * call the unpacked arg. equivalent of bid64_add(x*y, z)
37 * else
38 * get full coefficient_x*coefficient_y product
39 * call subroutine to perform addition of 64-bit argument
40 * to 128-bit product
42 ****************************************************************************/
44 #include "bid_inline_add.h"
46 #if DECIMAL_CALL_BY_REFERENCE
47 extern void bid64_mul (UINT64 * pres, UINT64 * px,
48 UINT64 *
49 py _RND_MODE_PARAM _EXC_FLAGS_PARAM
50 _EXC_MASKS_PARAM _EXC_INFO_PARAM);
51 #else
53 extern UINT64 bid64_mul (UINT64 x,
54 UINT64 y _RND_MODE_PARAM
55 _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
56 _EXC_INFO_PARAM);
57 #endif
59 #if DECIMAL_CALL_BY_REFERENCE
61 void
62 bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py,
63 UINT64 *
64 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
65 _EXC_INFO_PARAM) {
66 UINT64 x, y, z;
67 #else
69 UINT64
70 bid64_fma (UINT64 x, UINT64 y,
71 UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
72 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
73 #endif
74 UINT128 P, PU, CT, CZ;
75 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z,
76 coefficient_z;
77 UINT64 C64, remainder_y, res;
78 UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z;
79 int_double tempx, tempy;
80 int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy,
81 bin_expon_product, rmode;
82 int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey,
83 scale_z, uf_status;
85 #if DECIMAL_CALL_BY_REFERENCE
86 #if !DECIMAL_GLOBAL_ROUNDING
87 _IDEC_round rnd_mode = *prnd_mode;
88 #endif
89 x = *px;
90 y = *py;
91 z = *pz;
92 #endif
94 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
95 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
96 valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z);
98 // unpack arguments, check for NaN, Infinity, or 0
99 if (!valid_x || !valid_y || !valid_z) {
101 if ((y & MASK_NAN) == MASK_NAN) { // y is NAN
102 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
103 // check first for non-canonical NaN payload
104 y = y & 0xfe03ffffffffffffull; // clear G6-G12
105 if ((y & 0x0003ffffffffffffull) > 999999999999999ull) {
106 y = y & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
108 if ((y & MASK_SNAN) == MASK_SNAN) { // y is SNAN
109 // set invalid flag
110 *pfpsf |= INVALID_EXCEPTION;
111 // return quiet (y)
112 res = y & 0xfdffffffffffffffull;
113 } else { // y is QNaN
114 // return y
115 res = y;
116 // if z = SNaN or x = SNaN signal invalid exception
117 if ((z & MASK_SNAN) == MASK_SNAN
118 || (x & MASK_SNAN) == MASK_SNAN) {
119 // set invalid flag
120 *pfpsf |= INVALID_EXCEPTION;
123 BID_RETURN (res)
124 } else if ((z & MASK_NAN) == MASK_NAN) { // z is NAN
125 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
126 // check first for non-canonical NaN payload
127 z = z & 0xfe03ffffffffffffull; // clear G6-G12
128 if ((z & 0x0003ffffffffffffull) > 999999999999999ull) {
129 z = z & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
131 if ((z & MASK_SNAN) == MASK_SNAN) { // z is SNAN
132 // set invalid flag
133 *pfpsf |= INVALID_EXCEPTION;
134 // return quiet (z)
135 res = z & 0xfdffffffffffffffull;
136 } else { // z is QNaN
137 // return z
138 res = z;
139 // if x = SNaN signal invalid exception
140 if ((x & MASK_SNAN) == MASK_SNAN) {
141 // set invalid flag
142 *pfpsf |= INVALID_EXCEPTION;
145 BID_RETURN (res)
146 } else if ((x & MASK_NAN) == MASK_NAN) { // x is NAN
147 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
148 // check first for non-canonical NaN payload
149 x = x & 0xfe03ffffffffffffull; // clear G6-G12
150 if ((x & 0x0003ffffffffffffull) > 999999999999999ull) {
151 x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
153 if ((x & MASK_SNAN) == MASK_SNAN) { // x is SNAN
154 // set invalid flag
155 *pfpsf |= INVALID_EXCEPTION;
156 // return quiet (x)
157 res = x & 0xfdffffffffffffffull;
158 } else { // x is QNaN
159 // return x
160 res = x; // clear out G[6]-G[16]
162 BID_RETURN (res)
165 if (!valid_x) {
166 // x is Inf. or 0
168 // x is Infinity?
169 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
170 // check if y is 0
171 if (!coefficient_y) {
172 // y==0, return NaN
173 #ifdef SET_STATUS_FLAGS
174 if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull)
175 __set_status_flags (pfpsf, INVALID_EXCEPTION);
176 #endif
177 BID_RETURN (0x7c00000000000000ull);
179 // test if z is Inf of oposite sign
180 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
181 && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
182 // return NaN
183 #ifdef SET_STATUS_FLAGS
184 __set_status_flags (pfpsf, INVALID_EXCEPTION);
185 #endif
186 BID_RETURN (0x7c00000000000000ull);
188 // otherwise return +/-Inf
189 BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
190 0x7800000000000000ull);
192 // x is 0
193 if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)
194 && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
196 if (coefficient_z) {
197 exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y;
199 sign_z = z & 0x8000000000000000ull;
201 if (exponent_y >= exponent_z)
202 BID_RETURN (z);
203 res =
204 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
205 &rnd_mode, pfpsf);
206 BID_RETURN (res);
210 if (!valid_y) {
211 // y is Inf. or 0
213 // y is Infinity?
214 if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) {
215 // check if x is 0
216 if (!coefficient_x) {
217 // y==0, return NaN
218 #ifdef SET_STATUS_FLAGS
219 __set_status_flags (pfpsf, INVALID_EXCEPTION);
220 #endif
221 BID_RETURN (0x7c00000000000000ull);
223 // test if z is Inf of oposite sign
224 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
225 && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
226 #ifdef SET_STATUS_FLAGS
227 __set_status_flags (pfpsf, INVALID_EXCEPTION);
228 #endif
229 // return NaN
230 BID_RETURN (0x7c00000000000000ull);
232 // otherwise return +/-Inf
233 BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
234 0x7800000000000000ull);
236 // y is 0
237 if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
239 if (coefficient_z) {
240 exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS;
242 sign_z = z & 0x8000000000000000ull;
244 if (exponent_y >= exponent_z)
245 BID_RETURN (z);
246 res =
247 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
248 &rnd_mode, pfpsf);
249 BID_RETURN (res);
254 if (!valid_z) {
255 // y is Inf. or 0
257 // test if y is NaN/Inf
258 if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) {
259 BID_RETURN (coefficient_z & QUIET_MASK64);
261 // z is 0, return x*y
262 if ((!coefficient_x) || (!coefficient_y)) {
263 //0+/-0
264 exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS;
265 if (exponent_x > DECIMAL_MAX_EXPON_64)
266 exponent_x = DECIMAL_MAX_EXPON_64;
267 else if (exponent_x < 0)
268 exponent_x = 0;
269 if (exponent_x <= exponent_z)
270 res = ((UINT64) exponent_x) << 53;
271 else
272 res = ((UINT64) exponent_z) << 53;
273 if ((sign_x ^ sign_y) == sign_z)
274 res |= sign_z;
275 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
276 #ifndef IEEE_ROUND_NEAREST
277 else if (rnd_mode == ROUNDING_DOWN)
278 res |= 0x8000000000000000ull;
279 #endif
280 #endif
281 BID_RETURN (res);
286 /* get binary coefficients of x and y */
288 //--- get number of bits in the coefficients of x and y ---
289 // version 2 (original)
290 tempx.d = (double) coefficient_x;
291 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52);
293 tempy.d = (double) coefficient_y;
294 bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52);
296 // magnitude estimate for coefficient_x*coefficient_y is
297 // 2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx)
298 bin_expon_product = bin_expon_cx + bin_expon_cy;
300 // check if coefficient_x*coefficient_y<2^(10*k+3)
301 // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1
302 if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) {
303 // easy multiply
304 C64 = coefficient_x * coefficient_y;
305 final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS;
306 if ((final_exponent > 0) || (!coefficient_z)) {
307 res =
308 get_add64 (sign_x ^ sign_y,
309 final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf);
310 BID_RETURN (res);
311 } else {
312 P.w[0] = C64;
313 P.w[1] = 0;
314 extra_digits = 0;
316 } else {
317 if (!coefficient_z) {
318 #if DECIMAL_CALL_BY_REFERENCE
319 bid64_mul (&res, px,
320 py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
321 _EXC_INFO_ARG);
322 #else
323 res =
324 bid64_mul (x,
325 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
326 _EXC_INFO_ARG);
327 #endif
328 BID_RETURN (res);
330 // get 128-bit product: coefficient_x*coefficient_y
331 __mul_64x64_to_128 (P, coefficient_x, coefficient_y);
333 // tighten binary range of P: leading bit is 2^bp
334 // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1
335 bin_expon_product -= 2 * BINARY_EXPONENT_BIAS;
336 __tight_bin_range_128 (bp, P, bin_expon_product);
338 // get number of decimal digits in the product
339 digits_p = estimate_decimal_digits[bp];
340 if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P)))
341 digits_p++; // if power10_table_128[digits_p] <= P
343 // determine number of decimal digits to be rounded out
344 extra_digits = digits_p - MAX_FORMAT_DIGITS;
345 final_exponent =
346 exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS;
349 if (((unsigned) final_exponent) >= 3 * 256) {
350 if (final_exponent < 0) {
351 //--- get number of bits in the coefficients of z ---
352 tempx.d = (double) coefficient_z;
353 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
354 // get number of decimal digits in the coeff_x
355 digits_z = estimate_decimal_digits[bin_expon_cx];
356 if (coefficient_z >= power10_table_128[digits_z].w[0])
357 digits_z++;
358 // underflow
359 if ((final_exponent + 16 < 0)
360 || (exponent_z + digits_z > 33 + final_exponent)) {
361 res =
362 BID_normalize (sign_z, exponent_z, coefficient_z,
363 sign_x ^ sign_y, 1, rnd_mode, pfpsf);
364 BID_RETURN (res);
367 ez = exponent_z + digits_z - 16;
368 if (ez < 0)
369 ez = 0;
370 scale_z = exponent_z - ez;
371 coefficient_z *= power10_table_128[scale_z].w[0];
372 ey = final_exponent - extra_digits;
373 extra_digits = ez - ey;
374 if (extra_digits > 33) {
375 res =
376 BID_normalize (sign_z, exponent_z, coefficient_z,
377 sign_x ^ sign_y, 1, rnd_mode, pfpsf);
378 BID_RETURN (res);
380 //else // extra_digits<=32
382 if (extra_digits > 17) {
383 CYh = __truncate (P, 16);
384 // get remainder
385 T = power10_table_128[16].w[0];
386 __mul_64x64_to_64 (CY0L, CYh, T);
387 remainder_y = P.w[0] - CY0L;
389 extra_digits -= 16;
390 P.w[0] = CYh;
391 P.w[1] = 0;
392 } else
393 remainder_y = 0;
395 // align coeff_x, CYh
396 __mul_64x64_to_128 (CZ, coefficient_z,
397 power10_table_128[extra_digits].w[0]);
399 if (sign_z == (sign_y ^ sign_x)) {
400 __add_128_128 (CT, CZ, P);
401 if (__unsigned_compare_ge_128
402 (CT, power10_table_128[16 + extra_digits])) {
403 extra_digits++;
404 ez++;
406 } else {
407 if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) {
408 P.w[0]++;
409 if (!P.w[0])
410 P.w[1]++;
412 __sub_128_128 (CT, CZ, P);
413 if (((SINT64) CT.w[1]) < 0) {
414 sign_z = sign_y ^ sign_x;
415 CT.w[0] = 0 - CT.w[0];
416 CT.w[1] = 0 - CT.w[1];
417 if (CT.w[0])
418 CT.w[1]--;
419 } else if(!(CT.w[1]|CT.w[0]))
420 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull;
421 if (ez
423 (__unsigned_compare_gt_128
424 (power10_table_128[15 + extra_digits], CT))) {
425 extra_digits--;
426 ez--;
430 #ifdef SET_STATUS_FLAGS
431 uf_status = 0;
432 if ((!ez)
434 __unsigned_compare_gt_128 (power10_table_128
435 [extra_digits + 15], CT)) {
436 rmode = rnd_mode;
437 if (sign_z && (unsigned) (rmode - 1) < 2)
438 rmode = 3 - rmode;
439 //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]);
440 PU = power10_table_128[extra_digits + 15];
441 PU.w[0]--;
442 if (__unsigned_compare_gt_128 (PU, CT)
443 || (rmode == ROUNDING_DOWN)
444 || (rmode == ROUNDING_TO_ZERO))
445 uf_status = UNDERFLOW_EXCEPTION;
446 else if (extra_digits < 2) {
447 if ((rmode == ROUNDING_UP)) {
448 if (!extra_digits)
449 uf_status = UNDERFLOW_EXCEPTION;
450 else {
451 if (remainder_y && (sign_z != (sign_y ^ sign_x)))
452 remainder_y = power10_table_128[16].w[0] - remainder_y;
454 if (power10_table_128[15].w[0] > remainder_y)
455 uf_status = UNDERFLOW_EXCEPTION;
457 } else // RN or RN_away
459 if (remainder_y && (sign_z != (sign_y ^ sign_x)))
460 remainder_y = power10_table_128[16].w[0] - remainder_y;
462 if (!extra_digits) {
463 remainder_y += round_const_table[rmode][15];
464 if (remainder_y < power10_table_128[16].w[0])
465 uf_status = UNDERFLOW_EXCEPTION;
466 } else {
467 if (remainder_y < round_const_table[rmode][16])
468 uf_status = UNDERFLOW_EXCEPTION;
471 //__set_status_flags (pfpsf, uf_status);
474 #endif
475 res =
476 __bid_full_round64_remainder (sign_z, ez - extra_digits, CT,
477 extra_digits, remainder_y,
478 rnd_mode, pfpsf, uf_status);
479 BID_RETURN (res);
481 } else {
482 if ((sign_z == (sign_x ^ sign_y))
483 || (final_exponent > 3 * 256 + 15)) {
484 res =
485 fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent,
486 1000000000000000ull, rnd_mode,
487 pfpsf);
488 BID_RETURN (res);
494 if (extra_digits > 0) {
495 res =
496 get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y,
497 final_exponent, P, extra_digits, rnd_mode, pfpsf);
498 BID_RETURN (res);
500 // go to convert_format and exit
501 else {
502 C64 = __low_64 (P);
504 res =
505 get_add64 (sign_x ^ sign_y,
506 exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64,
507 sign_z, exponent_z, coefficient_z,
508 rnd_mode, pfpsf);
509 BID_RETURN (res);