4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
21 case float_class_snan:
22 float_raise(float_flag_invalid, s);
23 if (s->default_nan_mode) {
24 parts_default_nan(a, s);
26 parts_silence_nan(a, s);
29 case float_class_qnan:
30 if (s->default_nan_mode) {
31 parts_default_nan(a, s);
35 g_assert_not_reached();
39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
42 if (is_snan(a->cls) || is_snan(b->cls)) {
43 float_raise(float_flag_invalid, s);
46 if (s->default_nan_mode) {
47 parts_default_nan(a, s);
49 int cmp = frac_cmp(a, b);
51 cmp = a->sign < b->sign;
54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
57 if (is_snan(a->cls)) {
58 parts_silence_nan(a, s);
64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65 FloatPartsN *c, float_status *s,
66 int ab_mask, int abc_mask)
70 if (unlikely(abc_mask & float_cmask_snan)) {
71 float_raise(float_flag_invalid, s);
74 which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75 ab_mask == float_cmask_infzero, s);
77 if (s->default_nan_mode || which == 3) {
79 * Note that this check is after pickNaNMulAdd so that function
80 * has an opportunity to set the Invalid flag for infzero.
82 parts_default_nan(a, s);
96 g_assert_not_reached();
98 if (is_snan(a->cls)) {
99 parts_silence_nan(a, s);
105 * Canonicalize the FloatParts structure. Determine the class,
106 * unbias the exponent, and normalize the fraction.
108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
111 if (unlikely(p->exp == 0)) {
112 if (likely(frac_eqz(p))) {
113 p->cls = float_class_zero;
114 } else if (status->flush_inputs_to_zero) {
115 float_raise(float_flag_input_denormal, status);
116 p->cls = float_class_zero;
119 int shift = frac_normalize(p);
120 p->cls = float_class_normal;
121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124 p->cls = float_class_normal;
125 p->exp -= fmt->exp_bias;
126 frac_shl(p, fmt->frac_shift);
127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128 } else if (likely(frac_eqz(p))) {
129 p->cls = float_class_inf;
131 frac_shl(p, fmt->frac_shift);
132 p->cls = (parts_is_snan_frac(p->frac_hi, status)
133 ? float_class_snan : float_class_qnan);
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
143 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
146 const int exp_max = fmt->exp_max;
147 const int frac_shift = fmt->frac_shift;
148 const uint64_t frac_lsb = fmt->frac_lsb;
149 const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
150 const uint64_t round_mask = fmt->round_mask;
151 const uint64_t roundeven_mask = fmt->roundeven_mask;
156 if (unlikely(p->cls != float_class_normal)) {
158 case float_class_zero:
162 case float_class_inf:
163 g_assert(!fmt->arm_althp);
164 p->exp = fmt->exp_max;
167 case float_class_qnan:
168 case float_class_snan:
169 g_assert(!fmt->arm_althp);
170 p->exp = fmt->exp_max;
171 frac_shr(p, fmt->frac_shift);
176 g_assert_not_reached();
179 overflow_norm = false;
180 switch (s->float_rounding_mode) {
181 case float_round_nearest_even:
182 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
184 case float_round_ties_away:
187 case float_round_to_zero:
188 overflow_norm = true;
192 inc = p->sign ? 0 : round_mask;
193 overflow_norm = p->sign;
195 case float_round_down:
196 inc = p->sign ? round_mask : 0;
197 overflow_norm = !p->sign;
199 case float_round_to_odd:
200 overflow_norm = true;
202 case float_round_to_odd_inf:
203 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
206 g_assert_not_reached();
209 exp = p->exp + fmt->exp_bias;
210 if (likely(exp > 0)) {
211 if (p->frac_lo & round_mask) {
212 flags |= float_flag_inexact;
213 if (frac_addi(p, p, inc)) {
215 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
219 frac_shr(p, frac_shift);
221 if (fmt->arm_althp) {
222 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
223 if (unlikely(exp > exp_max)) {
224 /* Overflow. Return the maximum normal. */
225 flags = float_flag_invalid;
229 } else if (unlikely(exp >= exp_max)) {
230 flags |= float_flag_overflow | float_flag_inexact;
235 p->cls = float_class_inf;
240 } else if (s->flush_to_zero) {
241 flags |= float_flag_output_denormal;
242 p->cls = float_class_zero;
246 bool is_tiny = s->tininess_before_rounding || exp < 0;
250 is_tiny = !frac_addi(&discard, p, inc);
253 frac_shrjam(p, 1 - exp);
255 if (p->frac_lo & round_mask) {
256 /* Need to recompute round-to-even/round-to-odd. */
257 switch (s->float_rounding_mode) {
258 case float_round_nearest_even:
259 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
262 case float_round_to_odd:
263 case float_round_to_odd_inf:
264 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
269 flags |= float_flag_inexact;
270 frac_addi(p, p, inc);
273 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
274 frac_shr(p, frac_shift);
276 if (is_tiny && (flags & float_flag_inexact)) {
277 flags |= float_flag_underflow;
279 if (exp == 0 && frac_eqz(p)) {
280 p->cls = float_class_zero;
284 float_raise(flags, s);
288 * Returns the result of adding or subtracting the values of the
289 * floating-point values `a' and `b'. The operation is performed
290 * according to the IEC/IEEE Standard for Binary Floating-Point
293 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
294 float_status *s, bool subtract)
296 bool b_sign = b->sign ^ subtract;
297 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
299 if (a->sign != b_sign) {
301 if (likely(ab_mask == float_cmask_normal)) {
302 if (parts_sub_normal(a, b)) {
305 /* Subtract was exact, fall through to set sign. */
306 ab_mask = float_cmask_zero;
309 if (ab_mask == float_cmask_zero) {
310 a->sign = s->float_rounding_mode == float_round_down;
314 if (unlikely(ab_mask & float_cmask_anynan)) {
318 if (ab_mask & float_cmask_inf) {
319 if (a->cls != float_class_inf) {
323 if (b->cls != float_class_inf) {
328 float_raise(float_flag_invalid, s);
329 parts_default_nan(a, s);
334 if (likely(ab_mask == float_cmask_normal)) {
335 parts_add_normal(a, b);
339 if (ab_mask == float_cmask_zero) {
343 if (unlikely(ab_mask & float_cmask_anynan)) {
347 if (ab_mask & float_cmask_inf) {
348 a->cls = float_class_inf;
353 if (b->cls == float_class_zero) {
354 g_assert(a->cls == float_class_normal);
358 g_assert(a->cls == float_class_zero);
359 g_assert(b->cls == float_class_normal);
365 return parts_pick_nan(a, b, s);
369 * Returns the result of multiplying the floating-point values `a' and
370 * `b'. The operation is performed according to the IEC/IEEE Standard
371 * for Binary Floating-Point Arithmetic.
373 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
376 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
377 bool sign = a->sign ^ b->sign;
379 if (likely(ab_mask == float_cmask_normal)) {
382 frac_mulw(&tmp, a, b);
383 frac_truncjam(a, &tmp);
385 a->exp += b->exp + 1;
386 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
395 /* Inf * Zero == NaN */
396 if (unlikely(ab_mask == float_cmask_infzero)) {
397 float_raise(float_flag_invalid, s);
398 parts_default_nan(a, s);
402 if (unlikely(ab_mask & float_cmask_anynan)) {
403 return parts_pick_nan(a, b, s);
406 /* Multiply by 0 or Inf */
407 if (ab_mask & float_cmask_inf) {
408 a->cls = float_class_inf;
413 g_assert(ab_mask & float_cmask_zero);
414 a->cls = float_class_zero;
420 * Returns the result of multiplying the floating-point values `a' and
421 * `b' then adding 'c', with no intermediate rounding step after the
422 * multiplication. The operation is performed according to the
423 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
424 * The flags argument allows the caller to select negation of the
425 * addend, the intermediate product, or the final result. (The
426 * difference between this and having the caller do a separate
427 * negation is that negating externally will flip the sign bit on NaNs.)
429 * Requires A and C extracted into a double-sized structure to provide the
430 * extra space for the widening multiply.
432 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
433 FloatPartsN *c, int flags, float_status *s)
435 int ab_mask, abc_mask;
436 FloatPartsW p_widen, c_widen;
438 ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
439 abc_mask = float_cmask(c->cls) | ab_mask;
442 * It is implementation-defined whether the cases of (0,inf,qnan)
443 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
444 * they return if they do), so we have to hand this information
445 * off to the target-specific pick-a-NaN routine.
447 if (unlikely(abc_mask & float_cmask_anynan)) {
448 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
451 if (flags & float_muladd_negate_c) {
455 /* Compute the sign of the product into A. */
457 if (flags & float_muladd_negate_product) {
461 if (unlikely(ab_mask != float_cmask_normal)) {
462 if (unlikely(ab_mask == float_cmask_infzero)) {
466 if (ab_mask & float_cmask_inf) {
467 if (c->cls == float_class_inf && a->sign != c->sign) {
473 g_assert(ab_mask & float_cmask_zero);
474 if (c->cls == float_class_normal) {
478 if (c->cls == float_class_zero) {
479 if (a->sign != c->sign) {
480 goto return_sub_zero;
484 g_assert(c->cls == float_class_inf);
487 if (unlikely(c->cls == float_class_inf)) {
492 /* Perform the multiplication step. */
493 p_widen.sign = a->sign;
494 p_widen.exp = a->exp + b->exp + 1;
495 frac_mulw(&p_widen, a, b);
496 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
497 frac_add(&p_widen, &p_widen, &p_widen);
501 /* Perform the addition step. */
502 if (c->cls != float_class_zero) {
503 /* Zero-extend C to less significant bits. */
504 frac_widen(&c_widen, c);
505 c_widen.exp = c->exp;
507 if (a->sign == c->sign) {
508 parts_add_normal(&p_widen, &c_widen);
509 } else if (!parts_sub_normal(&p_widen, &c_widen)) {
510 goto return_sub_zero;
514 /* Narrow with sticky bit, for proper rounding later. */
515 frac_truncjam(a, &p_widen);
516 a->sign = p_widen.sign;
517 a->exp = p_widen.exp;
520 if (flags & float_muladd_halve_result) {
524 if (flags & float_muladd_negate_result) {
530 a->sign = s->float_rounding_mode == float_round_down;
532 a->cls = float_class_zero;
536 a->cls = float_class_inf;
540 float_raise(float_flag_invalid, s);
541 parts_default_nan(a, s);
546 * Returns the result of dividing the floating-point value `a' by the
547 * corresponding value `b'. The operation is performed according to
548 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
550 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
553 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
554 bool sign = a->sign ^ b->sign;
556 if (likely(ab_mask == float_cmask_normal)) {
558 a->exp -= b->exp + frac_div(a, b);
562 /* 0/0 or Inf/Inf => NaN */
563 if (unlikely(ab_mask == float_cmask_zero) ||
564 unlikely(ab_mask == float_cmask_inf)) {
565 float_raise(float_flag_invalid, s);
566 parts_default_nan(a, s);
570 /* All the NaN cases */
571 if (unlikely(ab_mask & float_cmask_anynan)) {
572 return parts_pick_nan(a, b, s);
578 if (a->cls == float_class_inf) {
583 if (a->cls == float_class_zero) {
588 if (b->cls == float_class_inf) {
589 a->cls = float_class_zero;
594 g_assert(b->cls == float_class_zero);
595 float_raise(float_flag_divbyzero, s);
596 a->cls = float_class_inf;
601 * Rounds the floating-point value `a' to an integer, and returns the
602 * result as a floating-point value. The operation is performed
603 * according to the IEC/IEEE Standard for Binary Floating-Point
606 * parts_round_to_int_normal is an internal helper function for
607 * normal numbers only, returning true for inexact but not directly
608 * raising float_flag_inexact.
610 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
611 int scale, int frac_size)
613 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
616 scale = MIN(MAX(scale, -0x10000), 0x10000);
624 case float_round_nearest_even:
628 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
629 frac_add(&tmp, a, a);
630 /* Anything remaining means frac > 0.5. */
631 one = !frac_eqz(&tmp);
634 case float_round_ties_away:
637 case float_round_to_zero:
643 case float_round_down:
646 case float_round_to_odd:
650 g_assert_not_reached();
656 a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
658 a->cls = float_class_zero;
663 if (a->exp >= frac_size) {
668 if (N > 64 && a->exp < N - 64) {
670 * Rounding is not in the low word -- shift lsb to bit 2,
671 * which leaves room for sticky and rounding bit.
673 shift_adj = (N - 1) - (a->exp + 2);
674 frac_shrjam(a, shift_adj);
678 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
681 frac_lsbm1 = frac_lsb >> 1;
682 rnd_mask = frac_lsb - 1;
683 rnd_even_mask = rnd_mask | frac_lsb;
685 if (!(a->frac_lo & rnd_mask)) {
686 /* Fractional bits already clear, undo the shift above. */
687 frac_shl(a, shift_adj);
692 case float_round_nearest_even:
693 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
695 case float_round_ties_away:
698 case float_round_to_zero:
702 inc = a->sign ? 0 : rnd_mask;
704 case float_round_down:
705 inc = a->sign ? rnd_mask : 0;
707 case float_round_to_odd:
708 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
711 g_assert_not_reached();
714 if (shift_adj == 0) {
715 if (frac_addi(a, a, inc)) {
717 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
720 a->frac_lo &= ~rnd_mask;
722 frac_addi(a, a, inc);
723 a->frac_lo &= ~rnd_mask;
724 /* Be careful shifting back, not to overflow */
725 frac_shl(a, shift_adj - 1);
726 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
735 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
736 int scale, float_status *s,
740 case float_class_qnan:
741 case float_class_snan:
742 parts_return_nan(a, s);
744 case float_class_zero:
745 case float_class_inf:
747 case float_class_normal:
748 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
749 float_raise(float_flag_inexact, s);
753 g_assert_not_reached();
758 * Returns the result of converting the floating-point value `a' to
759 * the two's complement integer format. The conversion is performed
760 * according to the IEC/IEEE Standard for Binary Floating-Point
761 * Arithmetic---which means in particular that the conversion is
762 * rounded according to the current rounding mode. If `a' is a NaN,
763 * the largest positive integer is returned. Otherwise, if the
764 * conversion overflows, the largest integer with the same sign as `a'
767 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
768 int scale, int64_t min, int64_t max,
775 case float_class_snan:
776 case float_class_qnan:
777 flags = float_flag_invalid;
781 case float_class_inf:
782 flags = float_flag_invalid;
783 r = p->sign ? min : max;
786 case float_class_zero:
789 case float_class_normal:
790 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
791 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
792 flags = float_flag_inexact;
795 if (p->exp <= DECOMPOSED_BINARY_POINT) {
796 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
801 if (r <= -(uint64_t)min) {
804 flags = float_flag_invalid;
807 } else if (r > max) {
808 flags = float_flag_invalid;
814 g_assert_not_reached();
817 float_raise(flags, s);