4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
21 case float_class_snan:
22 float_raise(float_flag_invalid, s);
23 if (s->default_nan_mode) {
24 parts_default_nan(a, s);
26 parts_silence_nan(a, s);
29 case float_class_qnan:
30 if (s->default_nan_mode) {
31 parts_default_nan(a, s);
35 g_assert_not_reached();
39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
42 if (is_snan(a->cls) || is_snan(b->cls)) {
43 float_raise(float_flag_invalid, s);
46 if (s->default_nan_mode) {
47 parts_default_nan(a, s);
49 int cmp = frac_cmp(a, b);
51 cmp = a->sign < b->sign;
54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
57 if (is_snan(a->cls)) {
58 parts_silence_nan(a, s);
64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65 FloatPartsN *c, float_status *s,
66 int ab_mask, int abc_mask)
70 if (unlikely(abc_mask & float_cmask_snan)) {
71 float_raise(float_flag_invalid, s);
74 which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75 ab_mask == float_cmask_infzero, s);
77 if (s->default_nan_mode || which == 3) {
79 * Note that this check is after pickNaNMulAdd so that function
80 * has an opportunity to set the Invalid flag for infzero.
82 parts_default_nan(a, s);
96 g_assert_not_reached();
98 if (is_snan(a->cls)) {
99 parts_silence_nan(a, s);
105 * Canonicalize the FloatParts structure. Determine the class,
106 * unbias the exponent, and normalize the fraction.
108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
111 if (unlikely(p->exp == 0)) {
112 if (likely(frac_eqz(p))) {
113 p->cls = float_class_zero;
114 } else if (status->flush_inputs_to_zero) {
115 float_raise(float_flag_input_denormal, status);
116 p->cls = float_class_zero;
119 int shift = frac_normalize(p);
120 p->cls = float_class_normal;
121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124 p->cls = float_class_normal;
125 p->exp -= fmt->exp_bias;
126 frac_shl(p, fmt->frac_shift);
127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128 } else if (likely(frac_eqz(p))) {
129 p->cls = float_class_inf;
131 frac_shl(p, fmt->frac_shift);
132 p->cls = (parts_is_snan_frac(p->frac_hi, status)
133 ? float_class_snan : float_class_qnan);
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
143 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
146 const int exp_max = fmt->exp_max;
147 const int frac_shift = fmt->frac_shift;
148 const uint64_t frac_lsb = fmt->frac_lsb;
149 const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
150 const uint64_t round_mask = fmt->round_mask;
151 const uint64_t roundeven_mask = fmt->roundeven_mask;
156 if (unlikely(p->cls != float_class_normal)) {
158 case float_class_zero:
162 case float_class_inf:
163 g_assert(!fmt->arm_althp);
164 p->exp = fmt->exp_max;
167 case float_class_qnan:
168 case float_class_snan:
169 g_assert(!fmt->arm_althp);
170 p->exp = fmt->exp_max;
171 frac_shr(p, fmt->frac_shift);
176 g_assert_not_reached();
179 switch (s->float_rounding_mode) {
180 case float_round_nearest_even:
181 overflow_norm = false;
182 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
184 case float_round_ties_away:
185 overflow_norm = false;
188 case float_round_to_zero:
189 overflow_norm = true;
193 inc = p->sign ? 0 : round_mask;
194 overflow_norm = p->sign;
196 case float_round_down:
197 inc = p->sign ? round_mask : 0;
198 overflow_norm = !p->sign;
200 case float_round_to_odd:
201 overflow_norm = true;
202 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
205 g_assert_not_reached();
208 exp = p->exp + fmt->exp_bias;
209 if (likely(exp > 0)) {
210 if (p->frac_lo & round_mask) {
211 flags |= float_flag_inexact;
212 if (frac_addi(p, p, inc)) {
214 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
218 frac_shr(p, frac_shift);
220 if (fmt->arm_althp) {
221 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
222 if (unlikely(exp > exp_max)) {
223 /* Overflow. Return the maximum normal. */
224 flags = float_flag_invalid;
228 } else if (unlikely(exp >= exp_max)) {
229 flags |= float_flag_overflow | float_flag_inexact;
234 p->cls = float_class_inf;
239 } else if (s->flush_to_zero) {
240 flags |= float_flag_output_denormal;
241 p->cls = float_class_zero;
245 bool is_tiny = s->tininess_before_rounding || exp < 0;
249 is_tiny = !frac_addi(&discard, p, inc);
252 frac_shrjam(p, 1 - exp);
254 if (p->frac_lo & round_mask) {
255 /* Need to recompute round-to-even/round-to-odd. */
256 switch (s->float_rounding_mode) {
257 case float_round_nearest_even:
258 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
261 case float_round_to_odd:
262 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
267 flags |= float_flag_inexact;
268 frac_addi(p, p, inc);
271 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
272 frac_shr(p, frac_shift);
274 if (is_tiny && (flags & float_flag_inexact)) {
275 flags |= float_flag_underflow;
277 if (exp == 0 && frac_eqz(p)) {
278 p->cls = float_class_zero;
282 float_raise(flags, s);
286 * Returns the result of adding or subtracting the values of the
287 * floating-point values `a' and `b'. The operation is performed
288 * according to the IEC/IEEE Standard for Binary Floating-Point
291 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
292 float_status *s, bool subtract)
294 bool b_sign = b->sign ^ subtract;
295 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
297 if (a->sign != b_sign) {
299 if (likely(ab_mask == float_cmask_normal)) {
300 if (parts_sub_normal(a, b)) {
303 /* Subtract was exact, fall through to set sign. */
304 ab_mask = float_cmask_zero;
307 if (ab_mask == float_cmask_zero) {
308 a->sign = s->float_rounding_mode == float_round_down;
312 if (unlikely(ab_mask & float_cmask_anynan)) {
316 if (ab_mask & float_cmask_inf) {
317 if (a->cls != float_class_inf) {
321 if (b->cls != float_class_inf) {
326 float_raise(float_flag_invalid, s);
327 parts_default_nan(a, s);
332 if (likely(ab_mask == float_cmask_normal)) {
333 parts_add_normal(a, b);
337 if (ab_mask == float_cmask_zero) {
341 if (unlikely(ab_mask & float_cmask_anynan)) {
345 if (ab_mask & float_cmask_inf) {
346 a->cls = float_class_inf;
351 if (b->cls == float_class_zero) {
352 g_assert(a->cls == float_class_normal);
356 g_assert(a->cls == float_class_zero);
357 g_assert(b->cls == float_class_normal);
363 return parts_pick_nan(a, b, s);
367 * Returns the result of multiplying the floating-point values `a' and
368 * `b'. The operation is performed according to the IEC/IEEE Standard
369 * for Binary Floating-Point Arithmetic.
371 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
374 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
375 bool sign = a->sign ^ b->sign;
377 if (likely(ab_mask == float_cmask_normal)) {
380 frac_mulw(&tmp, a, b);
381 frac_truncjam(a, &tmp);
383 a->exp += b->exp + 1;
384 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
393 /* Inf * Zero == NaN */
394 if (unlikely(ab_mask == float_cmask_infzero)) {
395 float_raise(float_flag_invalid, s);
396 parts_default_nan(a, s);
400 if (unlikely(ab_mask & float_cmask_anynan)) {
401 return parts_pick_nan(a, b, s);
404 /* Multiply by 0 or Inf */
405 if (ab_mask & float_cmask_inf) {
406 a->cls = float_class_inf;
411 g_assert(ab_mask & float_cmask_zero);
412 a->cls = float_class_zero;
418 * Returns the result of multiplying the floating-point values `a' and
419 * `b' then adding 'c', with no intermediate rounding step after the
420 * multiplication. The operation is performed according to the
421 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
422 * The flags argument allows the caller to select negation of the
423 * addend, the intermediate product, or the final result. (The
424 * difference between this and having the caller do a separate
425 * negation is that negating externally will flip the sign bit on NaNs.)
427 * Requires A and C extracted into a double-sized structure to provide the
428 * extra space for the widening multiply.
430 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
431 FloatPartsN *c, int flags, float_status *s)
433 int ab_mask, abc_mask;
434 FloatPartsW p_widen, c_widen;
436 ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
437 abc_mask = float_cmask(c->cls) | ab_mask;
440 * It is implementation-defined whether the cases of (0,inf,qnan)
441 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
442 * they return if they do), so we have to hand this information
443 * off to the target-specific pick-a-NaN routine.
445 if (unlikely(abc_mask & float_cmask_anynan)) {
446 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
449 if (flags & float_muladd_negate_c) {
453 /* Compute the sign of the product into A. */
455 if (flags & float_muladd_negate_product) {
459 if (unlikely(ab_mask != float_cmask_normal)) {
460 if (unlikely(ab_mask == float_cmask_infzero)) {
464 if (ab_mask & float_cmask_inf) {
465 if (c->cls == float_class_inf && a->sign != c->sign) {
471 g_assert(ab_mask & float_cmask_zero);
472 if (c->cls == float_class_normal) {
476 if (c->cls == float_class_zero) {
477 if (a->sign != c->sign) {
478 goto return_sub_zero;
482 g_assert(c->cls == float_class_inf);
485 if (unlikely(c->cls == float_class_inf)) {
490 /* Perform the multiplication step. */
491 p_widen.sign = a->sign;
492 p_widen.exp = a->exp + b->exp + 1;
493 frac_mulw(&p_widen, a, b);
494 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
495 frac_add(&p_widen, &p_widen, &p_widen);
499 /* Perform the addition step. */
500 if (c->cls != float_class_zero) {
501 /* Zero-extend C to less significant bits. */
502 frac_widen(&c_widen, c);
503 c_widen.exp = c->exp;
505 if (a->sign == c->sign) {
506 parts_add_normal(&p_widen, &c_widen);
507 } else if (!parts_sub_normal(&p_widen, &c_widen)) {
508 goto return_sub_zero;
512 /* Narrow with sticky bit, for proper rounding later. */
513 frac_truncjam(a, &p_widen);
514 a->sign = p_widen.sign;
515 a->exp = p_widen.exp;
518 if (flags & float_muladd_halve_result) {
522 if (flags & float_muladd_negate_result) {
528 a->sign = s->float_rounding_mode == float_round_down;
530 a->cls = float_class_zero;
534 a->cls = float_class_inf;
538 float_raise(float_flag_invalid, s);
539 parts_default_nan(a, s);
544 * Returns the result of dividing the floating-point value `a' by the
545 * corresponding value `b'. The operation is performed according to
546 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
548 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
551 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
552 bool sign = a->sign ^ b->sign;
554 if (likely(ab_mask == float_cmask_normal)) {
556 a->exp -= b->exp + frac_div(a, b);
560 /* 0/0 or Inf/Inf => NaN */
561 if (unlikely(ab_mask == float_cmask_zero) ||
562 unlikely(ab_mask == float_cmask_inf)) {
563 float_raise(float_flag_invalid, s);
564 parts_default_nan(a, s);
568 /* All the NaN cases */
569 if (unlikely(ab_mask & float_cmask_anynan)) {
570 return parts_pick_nan(a, b, s);
576 if (a->cls == float_class_inf) {
581 if (a->cls == float_class_zero) {
586 if (b->cls == float_class_inf) {
587 a->cls = float_class_zero;
592 g_assert(b->cls == float_class_zero);
593 float_raise(float_flag_divbyzero, s);
594 a->cls = float_class_inf;
599 * Rounds the floating-point value `a' to an integer, and returns the
600 * result as a floating-point value. The operation is performed
601 * according to the IEC/IEEE Standard for Binary Floating-Point
604 * parts_round_to_int_normal is an internal helper function for
605 * normal numbers only, returning true for inexact but not directly
606 * raising float_flag_inexact.
608 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
609 int scale, int frac_size)
611 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
614 scale = MIN(MAX(scale, -0x10000), 0x10000);
622 case float_round_nearest_even:
626 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
627 frac_add(&tmp, a, a);
628 /* Anything remaining means frac > 0.5. */
629 one = !frac_eqz(&tmp);
632 case float_round_ties_away:
635 case float_round_to_zero:
641 case float_round_down:
644 case float_round_to_odd:
648 g_assert_not_reached();
654 a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
656 a->cls = float_class_zero;
661 if (a->exp >= frac_size) {
666 if (N > 64 && a->exp < N - 64) {
668 * Rounding is not in the low word -- shift lsb to bit 2,
669 * which leaves room for sticky and rounding bit.
671 shift_adj = (N - 1) - (a->exp + 2);
672 frac_shrjam(a, shift_adj);
676 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
679 frac_lsbm1 = frac_lsb >> 1;
680 rnd_mask = frac_lsb - 1;
681 rnd_even_mask = rnd_mask | frac_lsb;
683 if (!(a->frac_lo & rnd_mask)) {
684 /* Fractional bits already clear, undo the shift above. */
685 frac_shl(a, shift_adj);
690 case float_round_nearest_even:
691 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
693 case float_round_ties_away:
696 case float_round_to_zero:
700 inc = a->sign ? 0 : rnd_mask;
702 case float_round_down:
703 inc = a->sign ? rnd_mask : 0;
705 case float_round_to_odd:
706 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
709 g_assert_not_reached();
712 if (shift_adj == 0) {
713 if (frac_addi(a, a, inc)) {
715 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
718 a->frac_lo &= ~rnd_mask;
720 frac_addi(a, a, inc);
721 a->frac_lo &= ~rnd_mask;
722 /* Be careful shifting back, not to overflow */
723 frac_shl(a, shift_adj - 1);
724 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
733 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
734 int scale, float_status *s,
738 case float_class_qnan:
739 case float_class_snan:
740 parts_return_nan(a, s);
742 case float_class_zero:
743 case float_class_inf:
745 case float_class_normal:
746 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
747 float_raise(float_flag_inexact, s);
751 g_assert_not_reached();
756 * Returns the result of converting the floating-point value `a' to
757 * the two's complement integer format. The conversion is performed
758 * according to the IEC/IEEE Standard for Binary Floating-Point
759 * Arithmetic---which means in particular that the conversion is
760 * rounded according to the current rounding mode. If `a' is a NaN,
761 * the largest positive integer is returned. Otherwise, if the
762 * conversion overflows, the largest integer with the same sign as `a'
765 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
766 int scale, int64_t min, int64_t max,
773 case float_class_snan:
774 case float_class_qnan:
775 flags = float_flag_invalid;
779 case float_class_inf:
780 flags = float_flag_invalid;
781 r = p->sign ? min : max;
784 case float_class_zero:
787 case float_class_normal:
788 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
789 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
790 flags = float_flag_inexact;
793 if (p->exp <= DECOMPOSED_BINARY_POINT) {
794 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
799 if (r <= -(uint64_t)min) {
802 flags = float_flag_invalid;
805 } else if (r > max) {
806 flags = float_flag_invalid;
812 g_assert_not_reached();
815 float_raise(flags, s);