fpu/softfloat-parts.c.inc

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
  19 {
  20     switch (a->cls) {
  21     case float_class_snan:
  22         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  23         if (s->default_nan_mode) {
  24             parts_default_nan(a, s);
  25         } else {
  26             parts_silence_nan(a, s);
  27         }
  28         break;
  29     case float_class_qnan:
  30         if (s->default_nan_mode) {
  31             parts_default_nan(a, s);
  32         }
  33         break;
  34     default:
  35         g_assert_not_reached();
  36     }
  37 }
  38
  39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
  40                                      float_status *s)
  41 {
  42     if (is_snan(a->cls) || is_snan(b->cls)) {
  43         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  44     }
  45
  46     if (s->default_nan_mode) {
  47         parts_default_nan(a, s);
  48     } else {
  49         int cmp = frac_cmp(a, b);
  50         if (cmp == 0) {
  51             cmp = a->sign < b->sign;
  52         }
  53
  54         if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
  55             a = b;
  56         }
  57         if (is_snan(a->cls)) {
  58             parts_silence_nan(a, s);
  59         }
  60     }
  61     return a;
  62 }
  63
  64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
  65                                             FloatPartsN *c, float_status *s,
  66                                             int ab_mask, int abc_mask)
  67 {
  68     int which;
  69
  70     if (unlikely(abc_mask & float_cmask_snan)) {
  71         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  72     }
  73
  74     which = pickNaNMulAdd(a->cls, b->cls, c->cls,
  75                           ab_mask == float_cmask_infzero, s);
  76
  77     if (s->default_nan_mode || which == 3) {
  78         /*
  79          * Note that this check is after pickNaNMulAdd so that function
  80          * has an opportunity to set the Invalid flag for infzero.
  81          */
  82         parts_default_nan(a, s);
  83         return a;
  84     }
  85
  86     switch (which) {
  87     case 0:
  88         break;
  89     case 1:
  90         a = b;
  91         break;
  92     case 2:
  93         a = c;
  94         break;
  95     default:
  96         g_assert_not_reached();
  97     }
  98     if (is_snan(a->cls)) {
  99         parts_silence_nan(a, s);
 100     }
 101     return a;
 102 }
 103
 104 /*
 105  * Canonicalize the FloatParts structure.  Determine the class,
 106  * unbias the exponent, and normalize the fraction.
 107  */
 108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
 109                                  const FloatFmt *fmt)
 110 {
 111     if (unlikely(p->exp == 0)) {
 112         if (likely(frac_eqz(p))) {
 113             p->cls = float_class_zero;
 114         } else if (status->flush_inputs_to_zero) {
 115             float_raise(float_flag_input_denormal, status);
 116             p->cls = float_class_zero;
 117             frac_clear(p);
 118         } else {
 119             int shift = frac_normalize(p);
 120             p->cls = float_class_normal;
 121             p->exp = fmt->frac_shift - fmt->exp_bias
 122                    - shift + !fmt->m68k_denormal;
 123         }
 124     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
 125         p->cls = float_class_normal;
 126         p->exp -= fmt->exp_bias;
 127         frac_shl(p, fmt->frac_shift);
 128         p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 129     } else if (likely(frac_eqz(p))) {
 130         p->cls = float_class_inf;
 131     } else {
 132         frac_shl(p, fmt->frac_shift);
 133         p->cls = (parts_is_snan_frac(p->frac_hi, status)
 134                   ? float_class_snan : float_class_qnan);
 135     }
 136 }
 137
 138 /*
 139  * Round and uncanonicalize a floating-point number by parts. There
 140  * are FRAC_SHIFT bits that may require rounding at the bottom of the
 141  * fraction; these bits will be removed. The exponent will be biased
 142  * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
 143  */
 144 static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
 145                                    const FloatFmt *fmt)
 146 {
 147     const int exp_max = fmt->exp_max;
 148     const int frac_shift = fmt->frac_shift;
 149     const uint64_t round_mask = fmt->round_mask;
 150     const uint64_t frac_lsb = round_mask + 1;
 151     const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
 152     const uint64_t roundeven_mask = round_mask | frac_lsb;
 153     uint64_t inc;
 154     bool overflow_norm = false;
 155     int exp, flags = 0;
 156
 157     switch (s->float_rounding_mode) {
 158     case float_round_nearest_even:
 159         if (N > 64 && frac_lsb == 0) {
 160             inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
 161                    ? frac_lsbm1 : 0);
 162         } else {
 163             inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 164                    ? frac_lsbm1 : 0);
 165         }
 166         break;
 167     case float_round_ties_away:
 168         inc = frac_lsbm1;
 169         break;
 170     case float_round_to_zero:
 171         overflow_norm = true;
 172         inc = 0;
 173         break;
 174     case float_round_up:
 175         inc = p->sign ? 0 : round_mask;
 176         overflow_norm = p->sign;
 177         break;
 178     case float_round_down:
 179         inc = p->sign ? round_mask : 0;
 180         overflow_norm = !p->sign;
 181         break;
 182     case float_round_to_odd:
 183         overflow_norm = true;
 184         /* fall through */
 185     case float_round_to_odd_inf:
 186         if (N > 64 && frac_lsb == 0) {
 187             inc = p->frac_hi & 1 ? 0 : round_mask;
 188         } else {
 189             inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 190         }
 191         break;
 192     default:
 193         g_assert_not_reached();
 194     }
 195
 196     exp = p->exp + fmt->exp_bias;
 197     if (likely(exp > 0)) {
 198         if (p->frac_lo & round_mask) {
 199             flags |= float_flag_inexact;
 200             if (frac_addi(p, p, inc)) {
 201                 frac_shr(p, 1);
 202                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 203                 exp++;
 204             }
 205             p->frac_lo &= ~round_mask;
 206         }
 207
 208         if (fmt->arm_althp) {
 209             /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
 210             if (unlikely(exp > exp_max)) {
 211                 /* Overflow.  Return the maximum normal.  */
 212                 flags = float_flag_invalid;
 213                 exp = exp_max;
 214                 frac_allones(p);
 215                 p->frac_lo &= ~round_mask;
 216             }
 217         } else if (unlikely(exp >= exp_max)) {
 218             flags |= float_flag_overflow;
 219             if (s->rebias_overflow) {
 220                 exp -= fmt->exp_re_bias;
 221             } else if (overflow_norm) {
 222                 flags |= float_flag_inexact;
 223                 exp = exp_max - 1;
 224                 frac_allones(p);
 225                 p->frac_lo &= ~round_mask;
 226             } else {
 227                 flags |= float_flag_inexact;
 228                 p->cls = float_class_inf;
 229                 exp = exp_max;
 230                 frac_clear(p);
 231             }
 232         }
 233         frac_shr(p, frac_shift);
 234     } else if (unlikely(s->rebias_underflow)) {
 235         flags |= float_flag_underflow;
 236         exp += fmt->exp_re_bias;
 237         if (p->frac_lo & round_mask) {
 238             flags |= float_flag_inexact;
 239             if (frac_addi(p, p, inc)) {
 240                 frac_shr(p, 1);
 241                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 242                 exp++;
 243             }
 244             p->frac_lo &= ~round_mask;
 245         }
 246         frac_shr(p, frac_shift);
 247     } else if (s->flush_to_zero) {
 248         flags |= float_flag_output_denormal;
 249         p->cls = float_class_zero;
 250         exp = 0;
 251         frac_clear(p);
 252     } else {
 253         bool is_tiny = s->tininess_before_rounding || exp < 0;
 254
 255         if (!is_tiny) {
 256             FloatPartsN discard;
 257             is_tiny = !frac_addi(&discard, p, inc);
 258         }
 259
 260         frac_shrjam(p, !fmt->m68k_denormal - exp);
 261
 262         if (p->frac_lo & round_mask) {
 263             /* Need to recompute round-to-even/round-to-odd. */
 264             switch (s->float_rounding_mode) {
 265             case float_round_nearest_even:
 266                 if (N > 64 && frac_lsb == 0) {
 267                     inc = ((p->frac_hi & 1) ||
 268                            (p->frac_lo & round_mask) != frac_lsbm1
 269                            ? frac_lsbm1 : 0);
 270                 } else {
 271                     inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 272                            ? frac_lsbm1 : 0);
 273                 }
 274                 break;
 275             case float_round_to_odd:
 276             case float_round_to_odd_inf:
 277                 if (N > 64 && frac_lsb == 0) {
 278                     inc = p->frac_hi & 1 ? 0 : round_mask;
 279                 } else {
 280                     inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 281                 }
 282                 break;
 283             default:
 284                 break;
 285             }
 286             flags |= float_flag_inexact;
 287             frac_addi(p, p, inc);
 288             p->frac_lo &= ~round_mask;
 289         }
 290
 291         exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
 292         frac_shr(p, frac_shift);
 293
 294         if (is_tiny && (flags & float_flag_inexact)) {
 295             flags |= float_flag_underflow;
 296         }
 297         if (exp == 0 && frac_eqz(p)) {
 298             p->cls = float_class_zero;
 299         }
 300     }
 301     p->exp = exp;
 302     float_raise(flags, s);
 303 }
 304
 305 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
 306                             const FloatFmt *fmt)
 307 {
 308     if (likely(p->cls == float_class_normal)) {
 309         parts_uncanon_normal(p, s, fmt);
 310     } else {
 311         switch (p->cls) {
 312         case float_class_zero:
 313             p->exp = 0;
 314             frac_clear(p);
 315             return;
 316         case float_class_inf:
 317             g_assert(!fmt->arm_althp);
 318             p->exp = fmt->exp_max;
 319             frac_clear(p);
 320             return;
 321         case float_class_qnan:
 322         case float_class_snan:
 323             g_assert(!fmt->arm_althp);
 324             p->exp = fmt->exp_max;
 325             frac_shr(p, fmt->frac_shift);
 326             return;
 327         default:
 328             break;
 329         }
 330         g_assert_not_reached();
 331     }
 332 }
 333
 334 /*
 335  * Returns the result of adding or subtracting the values of the
 336  * floating-point values `a' and `b'. The operation is performed
 337  * according to the IEC/IEEE Standard for Binary Floating-Point
 338  * Arithmetic.
 339  */
 340 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
 341                                    float_status *s, bool subtract)
 342 {
 343     bool b_sign = b->sign ^ subtract;
 344     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 345
 346     if (a->sign != b_sign) {
 347         /* Subtraction */
 348         if (likely(ab_mask == float_cmask_normal)) {
 349             if (parts_sub_normal(a, b)) {
 350                 return a;
 351             }
 352             /* Subtract was exact, fall through to set sign. */
 353             ab_mask = float_cmask_zero;
 354         }
 355
 356         if (ab_mask == float_cmask_zero) {
 357             a->sign = s->float_rounding_mode == float_round_down;
 358             return a;
 359         }
 360
 361         if (unlikely(ab_mask & float_cmask_anynan)) {
 362             goto p_nan;
 363         }
 364
 365         if (ab_mask & float_cmask_inf) {
 366             if (a->cls != float_class_inf) {
 367                 /* N - Inf */
 368                 goto return_b;
 369             }
 370             if (b->cls != float_class_inf) {
 371                 /* Inf - N */
 372                 return a;
 373             }
 374             /* Inf - Inf */
 375             float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 376             parts_default_nan(a, s);
 377             return a;
 378         }
 379     } else {
 380         /* Addition */
 381         if (likely(ab_mask == float_cmask_normal)) {
 382             parts_add_normal(a, b);
 383             return a;
 384         }
 385
 386         if (ab_mask == float_cmask_zero) {
 387             return a;
 388         }
 389
 390         if (unlikely(ab_mask & float_cmask_anynan)) {
 391             goto p_nan;
 392         }
 393
 394         if (ab_mask & float_cmask_inf) {
 395             a->cls = float_class_inf;
 396             return a;
 397         }
 398     }
 399
 400     if (b->cls == float_class_zero) {
 401         g_assert(a->cls == float_class_normal);
 402         return a;
 403     }
 404
 405     g_assert(a->cls == float_class_zero);
 406     g_assert(b->cls == float_class_normal);
 407  return_b:
 408     b->sign = b_sign;
 409     return b;
 410
 411  p_nan:
 412     return parts_pick_nan(a, b, s);
 413 }
 414
 415 /*
 416  * Returns the result of multiplying the floating-point values `a' and
 417  * `b'. The operation is performed according to the IEC/IEEE Standard
 418  * for Binary Floating-Point Arithmetic.
 419  */
 420 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
 421                                 float_status *s)
 422 {
 423     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 424     bool sign = a->sign ^ b->sign;
 425
 426     if (likely(ab_mask == float_cmask_normal)) {
 427         FloatPartsW tmp;
 428
 429         frac_mulw(&tmp, a, b);
 430         frac_truncjam(a, &tmp);
 431
 432         a->exp += b->exp + 1;
 433         if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 434             frac_add(a, a, a);
 435             a->exp -= 1;
 436         }
 437
 438         a->sign = sign;
 439         return a;
 440     }
 441
 442     /* Inf * Zero == NaN */
 443     if (unlikely(ab_mask == float_cmask_infzero)) {
 444         float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 445         parts_default_nan(a, s);
 446         return a;
 447     }
 448
 449     if (unlikely(ab_mask & float_cmask_anynan)) {
 450         return parts_pick_nan(a, b, s);
 451     }
 452
 453     /* Multiply by 0 or Inf */
 454     if (ab_mask & float_cmask_inf) {
 455         a->cls = float_class_inf;
 456         a->sign = sign;
 457         return a;
 458     }
 459
 460     g_assert(ab_mask & float_cmask_zero);
 461     a->cls = float_class_zero;
 462     a->sign = sign;
 463     return a;
 464 }
 465
 466 /*
 467  * Returns the result of multiplying the floating-point values `a' and
 468  * `b' then adding 'c', with no intermediate rounding step after the
 469  * multiplication. The operation is performed according to the
 470  * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
 471  * The flags argument allows the caller to select negation of the
 472  * addend, the intermediate product, or the final result. (The
 473  * difference between this and having the caller do a separate
 474  * negation is that negating externally will flip the sign bit on NaNs.)
 475  *
 476  * Requires A and C extracted into a double-sized structure to provide the
 477  * extra space for the widening multiply.
 478  */
 479 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
 480                                    FloatPartsN *c, int flags, float_status *s)
 481 {
 482     int ab_mask, abc_mask;
 483     FloatPartsW p_widen, c_widen;
 484
 485     ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 486     abc_mask = float_cmask(c->cls) | ab_mask;
 487
 488     /*
 489      * It is implementation-defined whether the cases of (0,inf,qnan)
 490      * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
 491      * they return if they do), so we have to hand this information
 492      * off to the target-specific pick-a-NaN routine.
 493      */
 494     if (unlikely(abc_mask & float_cmask_anynan)) {
 495         return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
 496     }
 497
 498     if (flags & float_muladd_negate_c) {
 499         c->sign ^= 1;
 500     }
 501
 502     /* Compute the sign of the product into A. */
 503     a->sign ^= b->sign;
 504     if (flags & float_muladd_negate_product) {
 505         a->sign ^= 1;
 506     }
 507
 508     if (unlikely(ab_mask != float_cmask_normal)) {
 509         if (unlikely(ab_mask == float_cmask_infzero)) {
 510             float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 511             goto d_nan;
 512         }
 513
 514         if (ab_mask & float_cmask_inf) {
 515             if (c->cls == float_class_inf && a->sign != c->sign) {
 516                 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 517                 goto d_nan;
 518             }
 519             goto return_inf;
 520         }
 521
 522         g_assert(ab_mask & float_cmask_zero);
 523         if (c->cls == float_class_normal) {
 524             *a = *c;
 525             goto return_normal;
 526         }
 527         if (c->cls == float_class_zero) {
 528             if (a->sign != c->sign) {
 529                 goto return_sub_zero;
 530             }
 531             goto return_zero;
 532         }
 533         g_assert(c->cls == float_class_inf);
 534     }
 535
 536     if (unlikely(c->cls == float_class_inf)) {
 537         a->sign = c->sign;
 538         goto return_inf;
 539     }
 540
 541     /* Perform the multiplication step. */
 542     p_widen.sign = a->sign;
 543     p_widen.exp = a->exp + b->exp + 1;
 544     frac_mulw(&p_widen, a, b);
 545     if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 546         frac_add(&p_widen, &p_widen, &p_widen);
 547         p_widen.exp -= 1;
 548     }
 549
 550     /* Perform the addition step. */
 551     if (c->cls != float_class_zero) {
 552         /* Zero-extend C to less significant bits. */
 553         frac_widen(&c_widen, c);
 554         c_widen.exp = c->exp;
 555
 556         if (a->sign == c->sign) {
 557             parts_add_normal(&p_widen, &c_widen);
 558         } else if (!parts_sub_normal(&p_widen, &c_widen)) {
 559             goto return_sub_zero;
 560         }
 561     }
 562
 563     /* Narrow with sticky bit, for proper rounding later. */
 564     frac_truncjam(a, &p_widen);
 565     a->sign = p_widen.sign;
 566     a->exp = p_widen.exp;
 567
 568  return_normal:
 569     if (flags & float_muladd_halve_result) {
 570         a->exp -= 1;
 571     }
 572  finish_sign:
 573     if (flags & float_muladd_negate_result) {
 574         a->sign ^= 1;
 575     }
 576     return a;
 577
 578  return_sub_zero:
 579     a->sign = s->float_rounding_mode == float_round_down;
 580  return_zero:
 581     a->cls = float_class_zero;
 582     goto finish_sign;
 583
 584  return_inf:
 585     a->cls = float_class_inf;
 586     goto finish_sign;
 587
 588  d_nan:
 589     parts_default_nan(a, s);
 590     return a;
 591 }
 592
 593 /*
 594  * Returns the result of dividing the floating-point value `a' by the
 595  * corresponding value `b'. The operation is performed according to
 596  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 597  */
 598 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
 599                                 float_status *s)
 600 {
 601     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 602     bool sign = a->sign ^ b->sign;
 603
 604     if (likely(ab_mask == float_cmask_normal)) {
 605         a->sign = sign;
 606         a->exp -= b->exp + frac_div(a, b);
 607         return a;
 608     }
 609
 610     /* 0/0 or Inf/Inf => NaN */
 611     if (unlikely(ab_mask == float_cmask_zero)) {
 612         float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
 613         goto d_nan;
 614     }
 615     if (unlikely(ab_mask == float_cmask_inf)) {
 616         float_raise(float_flag_invalid | float_flag_invalid_idi, s);
 617         goto d_nan;
 618     }
 619
 620     /* All the NaN cases */
 621     if (unlikely(ab_mask & float_cmask_anynan)) {
 622         return parts_pick_nan(a, b, s);
 623     }
 624
 625     a->sign = sign;
 626
 627     /* Inf / X */
 628     if (a->cls == float_class_inf) {
 629         return a;
 630     }
 631
 632     /* 0 / X */
 633     if (a->cls == float_class_zero) {
 634         return a;
 635     }
 636
 637     /* X / Inf */
 638     if (b->cls == float_class_inf) {
 639         a->cls = float_class_zero;
 640         return a;
 641     }
 642
 643     /* X / 0 => Inf */
 644     g_assert(b->cls == float_class_zero);
 645     float_raise(float_flag_divbyzero, s);
 646     a->cls = float_class_inf;
 647     return a;
 648
 649  d_nan:
 650     parts_default_nan(a, s);
 651     return a;
 652 }
 653
 654 /*
 655  * Floating point remainder, per IEC/IEEE, or modulus.
 656  */
 657 static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
 658                                    uint64_t *mod_quot, float_status *s)
 659 {
 660     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 661
 662     if (likely(ab_mask == float_cmask_normal)) {
 663         frac_modrem(a, b, mod_quot);
 664         return a;
 665     }
 666
 667     if (mod_quot) {
 668         *mod_quot = 0;
 669     }
 670
 671     /* All the NaN cases */
 672     if (unlikely(ab_mask & float_cmask_anynan)) {
 673         return parts_pick_nan(a, b, s);
 674     }
 675
 676     /* Inf % N; N % 0 */
 677     if (a->cls == float_class_inf || b->cls == float_class_zero) {
 678         float_raise(float_flag_invalid, s);
 679         parts_default_nan(a, s);
 680         return a;
 681     }
 682
 683     /* N % Inf; 0 % N */
 684     g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
 685     return a;
 686 }
 687
 688 /*
 689  * Square Root
 690  *
 691  * The base algorithm is lifted from
 692  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
 693  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
 694  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
 695  * and is thus MIT licenced.
 696  */
 697 static void partsN(sqrt)(FloatPartsN *a, float_status *status,
 698                          const FloatFmt *fmt)
 699 {
 700     const uint32_t three32 = 3u << 30;
 701     const uint64_t three64 = 3ull << 62;
 702     uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
 703     uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
 704     uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
 705     uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
 706     uint64_t discard;
 707     bool exp_odd;
 708     size_t index;
 709
 710     if (unlikely(a->cls != float_class_normal)) {
 711         switch (a->cls) {
 712         case float_class_snan:
 713         case float_class_qnan:
 714             parts_return_nan(a, status);
 715             return;
 716         case float_class_zero:
 717             return;
 718         case float_class_inf:
 719             if (unlikely(a->sign)) {
 720                 goto d_nan;
 721             }
 722             return;
 723         default:
 724             g_assert_not_reached();
 725         }
 726     }
 727
 728     if (unlikely(a->sign)) {
 729         goto d_nan;
 730     }
 731
 732     /*
 733      * Argument reduction.
 734      * x = 4^e frac; with integer e, and frac in [1, 4)
 735      * m = frac fixed point at bit 62, since we're in base 4.
 736      * If base-2 exponent is odd, exchange that for multiply by 2,
 737      * which results in no shift.
 738      */
 739     exp_odd = a->exp & 1;
 740     index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
 741     if (!exp_odd) {
 742         frac_shr(a, 1);
 743     }
 744
 745     /*
 746      * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
 747      *
 748      * Initial estimate:
 749      * 7-bit lookup table (1-bit exponent and 6-bit significand).
 750      *
 751      * The relative error (e = r0*sqrt(m)-1) of a linear estimate
 752      * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
 753      * a table lookup is faster and needs one less iteration.
 754      * The 7-bit table gives |e| < 0x1.fdp-9.
 755      *
 756      * A Newton-Raphson iteration for r is
 757      *   s = m*r
 758      *   d = s*r
 759      *   u = 3 - d
 760      *   r = r*u/2
 761      *
 762      * Fixed point representations:
 763      *   m, s, d, u, three are all 2.30; r is 0.32
 764      */
 765     m64 = a->frac_hi;
 766     m32 = m64 >> 32;
 767
 768     r32 = rsqrt_tab[index] << 16;
 769     /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
 770
 771     s32 = ((uint64_t)m32 * r32) >> 32;
 772     d32 = ((uint64_t)s32 * r32) >> 32;
 773     u32 = three32 - d32;
 774
 775     if (N == 64) {
 776         /* float64 or smaller */
 777
 778         r32 = ((uint64_t)r32 * u32) >> 31;
 779         /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
 780
 781         s32 = ((uint64_t)m32 * r32) >> 32;
 782         d32 = ((uint64_t)s32 * r32) >> 32;
 783         u32 = three32 - d32;
 784
 785         if (fmt->frac_size <= 23) {
 786             /* float32 or smaller */
 787
 788             s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
 789             s32 = (s32 - 1) >> 6;               /* 9.23 */
 790             /* s < sqrt(m) < s + 0x1.08p-23 */
 791
 792             /* compute nearest rounded result to 2.23 bits */
 793             uint32_t d0 = (m32 << 16) - s32 * s32;
 794             uint32_t d1 = s32 - d0;
 795             uint32_t d2 = d1 + s32 + 1;
 796             s32 += d1 >> 31;
 797             a->frac_hi = (uint64_t)s32 << (64 - 25);
 798
 799             /* increment or decrement for inexact */
 800             if (d2 != 0) {
 801                 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
 802             }
 803             goto done;
 804         }
 805
 806         /* float64 */
 807
 808         r64 = (uint64_t)r32 * u32 * 2;
 809         /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
 810         mul64To128(m64, r64, &s64, &discard);
 811         mul64To128(s64, r64, &d64, &discard);
 812         u64 = three64 - d64;
 813
 814         mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
 815         s64 = (s64 - 2) >> 9;                  /* 12.52 */
 816
 817         /* Compute nearest rounded result */
 818         uint64_t d0 = (m64 << 42) - s64 * s64;
 819         uint64_t d1 = s64 - d0;
 820         uint64_t d2 = d1 + s64 + 1;
 821         s64 += d1 >> 63;
 822         a->frac_hi = s64 << (64 - 54);
 823
 824         /* increment or decrement for inexact */
 825         if (d2 != 0) {
 826             a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
 827         }
 828         goto done;
 829     }
 830
 831     r64 = (uint64_t)r32 * u32 * 2;
 832     /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
 833
 834     mul64To128(m64, r64, &s64, &discard);
 835     mul64To128(s64, r64, &d64, &discard);
 836     u64 = three64 - d64;
 837     mul64To128(u64, r64, &r64, &discard);
 838     r64 <<= 1;
 839     /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
 840
 841     mul64To128(m64, r64, &s64, &discard);
 842     mul64To128(s64, r64, &d64, &discard);
 843     u64 = three64 - d64;
 844     mul64To128(u64, r64, &rh, &rl);
 845     add128(rh, rl, rh, rl, &rh, &rl);
 846     /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
 847
 848     mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
 849     mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
 850     sub128(three64, 0, dh, dl, &uh, &ul);
 851     mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
 852     /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
 853
 854     sub128(sh, sl, 0, 4, &sh, &sl);
 855     shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
 856     /* s < sqrt(m) < s + 1ulp */
 857
 858     /* Compute nearest rounded result */
 859     mul64To128(sl, sl, &d0h, &d0l);
 860     d0h += 2 * sh * sl;
 861     sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
 862     sub128(sh, sl, d0h, d0l, &d1h, &d1l);
 863     add128(sh, sl, 0, 1, &d2h, &d2l);
 864     add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
 865     add128(sh, sl, 0, d1h >> 63, &sh, &sl);
 866     shift128Left(sh, sl, 128 - 114, &sh, &sl);
 867
 868     /* increment or decrement for inexact */
 869     if (d2h | d2l) {
 870         if ((int64_t)(d1h ^ d2h) < 0) {
 871             sub128(sh, sl, 0, 1, &sh, &sl);
 872         } else {
 873             add128(sh, sl, 0, 1, &sh, &sl);
 874         }
 875     }
 876     a->frac_lo = sl;
 877     a->frac_hi = sh;
 878
 879  done:
 880     /* Convert back from base 4 to base 2. */
 881     a->exp >>= 1;
 882     if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 883         frac_add(a, a, a);
 884     } else {
 885         a->exp += 1;
 886     }
 887     return;
 888
 889  d_nan:
 890     float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
 891     parts_default_nan(a, status);
 892 }
 893
 894 /*
 895  * Rounds the floating-point value `a' to an integer, and returns the
 896  * result as a floating-point value. The operation is performed
 897  * according to the IEC/IEEE Standard for Binary Floating-Point
 898  * Arithmetic.
 899  *
 900  * parts_round_to_int_normal is an internal helper function for
 901  * normal numbers only, returning true for inexact but not directly
 902  * raising float_flag_inexact.
 903  */
 904 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
 905                                         int scale, int frac_size)
 906 {
 907     uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
 908     int shift_adj;
 909
 910     scale = MIN(MAX(scale, -0x10000), 0x10000);
 911     a->exp += scale;
 912
 913     if (a->exp < 0) {
 914         bool one;
 915
 916         /* All fractional */
 917         switch (rmode) {
 918         case float_round_nearest_even:
 919             one = false;
 920             if (a->exp == -1) {
 921                 FloatPartsN tmp;
 922                 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
 923                 frac_add(&tmp, a, a);
 924                 /* Anything remaining means frac > 0.5. */
 925                 one = !frac_eqz(&tmp);
 926             }
 927             break;
 928         case float_round_ties_away:
 929             one = a->exp == -1;
 930             break;
 931         case float_round_to_zero:
 932             one = false;
 933             break;
 934         case float_round_up:
 935             one = !a->sign;
 936             break;
 937         case float_round_down:
 938             one = a->sign;
 939             break;
 940         case float_round_to_odd:
 941             one = true;
 942             break;
 943         default:
 944             g_assert_not_reached();
 945         }
 946
 947         frac_clear(a);
 948         a->exp = 0;
 949         if (one) {
 950             a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
 951         } else {
 952             a->cls = float_class_zero;
 953         }
 954         return true;
 955     }
 956
 957     if (a->exp >= frac_size) {
 958         /* All integral */
 959         return false;
 960     }
 961
 962     if (N > 64 && a->exp < N - 64) {
 963         /*
 964          * Rounding is not in the low word -- shift lsb to bit 2,
 965          * which leaves room for sticky and rounding bit.
 966          */
 967         shift_adj = (N - 1) - (a->exp + 2);
 968         frac_shrjam(a, shift_adj);
 969         frac_lsb = 1 << 2;
 970     } else {
 971         shift_adj = 0;
 972         frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
 973     }
 974
 975     frac_lsbm1 = frac_lsb >> 1;
 976     rnd_mask = frac_lsb - 1;
 977     rnd_even_mask = rnd_mask | frac_lsb;
 978
 979     if (!(a->frac_lo & rnd_mask)) {
 980         /* Fractional bits already clear, undo the shift above. */
 981         frac_shl(a, shift_adj);
 982         return false;
 983     }
 984
 985     switch (rmode) {
 986     case float_round_nearest_even:
 987         inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
 988         break;
 989     case float_round_ties_away:
 990         inc = frac_lsbm1;
 991         break;
 992     case float_round_to_zero:
 993         inc = 0;
 994         break;
 995     case float_round_up:
 996         inc = a->sign ? 0 : rnd_mask;
 997         break;
 998     case float_round_down:
 999         inc = a->sign ? rnd_mask : 0;
1000         break;
1001     case float_round_to_odd:
1002         inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1003         break;
1004     default:
1005         g_assert_not_reached();
1006     }
1007
1008     if (shift_adj == 0) {
1009         if (frac_addi(a, a, inc)) {
1010             frac_shr(a, 1);
1011             a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1012             a->exp++;
1013         }
1014         a->frac_lo &= ~rnd_mask;
1015     } else {
1016         frac_addi(a, a, inc);
1017         a->frac_lo &= ~rnd_mask;
1018         /* Be careful shifting back, not to overflow */
1019         frac_shl(a, shift_adj - 1);
1020         if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1021             a->exp++;
1022         } else {
1023             frac_add(a, a, a);
1024         }
1025     }
1026     return true;
1027 }
1028
1029 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1030                                  int scale, float_status *s,
1031                                  const FloatFmt *fmt)
1032 {
1033     switch (a->cls) {
1034     case float_class_qnan:
1035     case float_class_snan:
1036         parts_return_nan(a, s);
1037         break;
1038     case float_class_zero:
1039     case float_class_inf:
1040         break;
1041     case float_class_normal:
1042         if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1043             float_raise(float_flag_inexact, s);
1044         }
1045         break;
1046     default:
1047         g_assert_not_reached();
1048     }
1049 }
1050
1051 /*
1052  * Returns the result of converting the floating-point value `a' to
1053  * the two's complement integer format. The conversion is performed
1054  * according to the IEC/IEEE Standard for Binary Floating-Point
1055  * Arithmetic---which means in particular that the conversion is
1056  * rounded according to the current rounding mode. If `a' is a NaN,
1057  * the largest positive integer is returned. Otherwise, if the
1058  * conversion overflows, the largest integer with the same sign as `a'
1059  * is returned.
1060  */
1061 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1062                                      int scale, int64_t min, int64_t max,
1063                                      float_status *s)
1064 {
1065     int flags = 0;
1066     uint64_t r;
1067
1068     switch (p->cls) {
1069     case float_class_snan:
1070         flags |= float_flag_invalid_snan;
1071         /* fall through */
1072     case float_class_qnan:
1073         flags |= float_flag_invalid;
1074         r = max;
1075         break;
1076
1077     case float_class_inf:
1078         flags = float_flag_invalid | float_flag_invalid_cvti;
1079         r = p->sign ? min : max;
1080         break;
1081
1082     case float_class_zero:
1083         return 0;
1084
1085     case float_class_normal:
1086         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1087         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1088             flags = float_flag_inexact;
1089         }
1090
1091         if (p->exp <= DECOMPOSED_BINARY_POINT) {
1092             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1093         } else {
1094             r = UINT64_MAX;
1095         }
1096         if (p->sign) {
1097             if (r <= -(uint64_t)min) {
1098                 r = -r;
1099             } else {
1100                 flags = float_flag_invalid | float_flag_invalid_cvti;
1101                 r = min;
1102             }
1103         } else if (r > max) {
1104             flags = float_flag_invalid | float_flag_invalid_cvti;
1105             r = max;
1106         }
1107         break;
1108
1109     default:
1110         g_assert_not_reached();
1111     }
1112
1113     float_raise(flags, s);
1114     return r;
1115 }
1116
1117 /*
1118  *  Returns the result of converting the floating-point value `a' to
1119  *  the unsigned integer format. The conversion is performed according
1120  *  to the IEC/IEEE Standard for Binary Floating-Point
1121  *  Arithmetic---which means in particular that the conversion is
1122  *  rounded according to the current rounding mode. If `a' is a NaN,
1123  *  the largest unsigned integer is returned. Otherwise, if the
1124  *  conversion overflows, the largest unsigned integer is returned. If
1125  *  the 'a' is negative, the result is rounded and zero is returned;
1126  *  values that do not round to zero will raise the inexact exception
1127  *  flag.
1128  */
1129 static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1130                                       int scale, uint64_t max, float_status *s)
1131 {
1132     int flags = 0;
1133     uint64_t r;
1134
1135     switch (p->cls) {
1136     case float_class_snan:
1137         flags |= float_flag_invalid_snan;
1138         /* fall through */
1139     case float_class_qnan:
1140         flags |= float_flag_invalid;
1141         r = max;
1142         break;
1143
1144     case float_class_inf:
1145         flags = float_flag_invalid | float_flag_invalid_cvti;
1146         r = p->sign ? 0 : max;
1147         break;
1148
1149     case float_class_zero:
1150         return 0;
1151
1152     case float_class_normal:
1153         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1154         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1155             flags = float_flag_inexact;
1156             if (p->cls == float_class_zero) {
1157                 r = 0;
1158                 break;
1159             }
1160         }
1161
1162         if (p->sign) {
1163             flags = float_flag_invalid | float_flag_invalid_cvti;
1164             r = 0;
1165         } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1166             flags = float_flag_invalid | float_flag_invalid_cvti;
1167             r = max;
1168         } else {
1169             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1170             if (r > max) {
1171                 flags = float_flag_invalid | float_flag_invalid_cvti;
1172                 r = max;
1173             }
1174         }
1175         break;
1176
1177     default:
1178         g_assert_not_reached();
1179     }
1180
1181     float_raise(flags, s);
1182     return r;
1183 }
1184
1185 /*
1186  * Like partsN(float_to_sint), except do not saturate the result.
1187  * Instead, return the rounded unbounded precision two's compliment result,
1188  * modulo 2**(bitsm1 + 1).
1189  */
1190 static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1191                                             FloatRoundMode rmode,
1192                                             int bitsm1, float_status *s)
1193 {
1194     int flags = 0;
1195     uint64_t r;
1196     bool overflow = false;
1197
1198     switch (p->cls) {
1199     case float_class_snan:
1200         flags |= float_flag_invalid_snan;
1201         /* fall through */
1202     case float_class_qnan:
1203         flags |= float_flag_invalid;
1204         r = 0;
1205         break;
1206
1207     case float_class_inf:
1208         overflow = true;
1209         r = 0;
1210         break;
1211
1212     case float_class_zero:
1213         return 0;
1214
1215     case float_class_normal:
1216         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1217         if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1218             flags = float_flag_inexact;
1219         }
1220
1221         if (p->exp <= DECOMPOSED_BINARY_POINT) {
1222             /*
1223              * Because we rounded to integral, and exp < 64,
1224              * we know frac_low is zero.
1225              */
1226             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1227             if (p->exp < bitsm1) {
1228                 /* Result in range. */
1229             } else if (p->exp == bitsm1) {
1230                 /* The only in-range value is INT_MIN. */
1231                 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1232             } else {
1233                 overflow = true;
1234             }
1235         } else {
1236             /* Overflow, but there might still be bits to return. */
1237             int shl = p->exp - DECOMPOSED_BINARY_POINT;
1238             if (shl < N) {
1239                 frac_shl(p, shl);
1240                 r = p->frac_hi;
1241             } else {
1242                 r = 0;
1243             }
1244             overflow = true;
1245         }
1246
1247         if (p->sign) {
1248             r = -r;
1249         }
1250         break;
1251
1252     default:
1253         g_assert_not_reached();
1254     }
1255
1256     if (overflow) {
1257         flags = float_flag_invalid | float_flag_invalid_cvti;
1258     }
1259     float_raise(flags, s);
1260     return r;
1261 }
1262
1263 /*
1264  * Integer to float conversions
1265  *
1266  * Returns the result of converting the two's complement integer `a'
1267  * to the floating-point format. The conversion is performed according
1268  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1269  */
1270 static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1271                                   int scale, float_status *s)
1272 {
1273     uint64_t f = a;
1274     int shift;
1275
1276     memset(p, 0, sizeof(*p));
1277
1278     if (a == 0) {
1279         p->cls = float_class_zero;
1280         return;
1281     }
1282
1283     p->cls = float_class_normal;
1284     if (a < 0) {
1285         f = -f;
1286         p->sign = true;
1287     }
1288     shift = clz64(f);
1289     scale = MIN(MAX(scale, -0x10000), 0x10000);
1290
1291     p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1292     p->frac_hi = f << shift;
1293 }
1294
1295 /*
1296  * Unsigned Integer to float conversions
1297  *
1298  * Returns the result of converting the unsigned integer `a' to the
1299  * floating-point format. The conversion is performed according to the
1300  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1301  */
1302 static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1303                                   int scale, float_status *status)
1304 {
1305     memset(p, 0, sizeof(*p));
1306
1307     if (a == 0) {
1308         p->cls = float_class_zero;
1309     } else {
1310         int shift = clz64(a);
1311         scale = MIN(MAX(scale, -0x10000), 0x10000);
1312         p->cls = float_class_normal;
1313         p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1314         p->frac_hi = a << shift;
1315     }
1316 }
1317
1318 /*
1319  * Float min/max.
1320  */
1321 static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1322                                    float_status *s, int flags)
1323 {
1324     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1325     int a_exp, b_exp, cmp;
1326
1327     if (unlikely(ab_mask & float_cmask_anynan)) {
1328         /*
1329          * For minNum/maxNum (IEEE 754-2008)
1330          * or minimumNumber/maximumNumber (IEEE 754-2019),
1331          * if one operand is a QNaN, and the other
1332          * operand is numerical, then return numerical argument.
1333          */
1334         if ((flags & (minmax_isnum | minmax_isnumber))
1335             && !(ab_mask & float_cmask_snan)
1336             && (ab_mask & ~float_cmask_qnan)) {
1337             return is_nan(a->cls) ? b : a;
1338         }
1339
1340         /*
1341          * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1342          * are removed and replaced with minimum, minimumNumber, maximum
1343          * and maximumNumber.
1344          * minimumNumber/maximumNumber behavior for SNaN is changed to:
1345          *   If both operands are NaNs, a QNaN is returned.
1346          *   If either operand is a SNaN,
1347          *   an invalid operation exception is signaled,
1348          *   but unless both operands are NaNs,
1349          *   the SNaN is otherwise ignored and not converted to a QNaN.
1350          */
1351         if ((flags & minmax_isnumber)
1352             && (ab_mask & float_cmask_snan)
1353             && (ab_mask & ~float_cmask_anynan)) {
1354             float_raise(float_flag_invalid, s);
1355             return is_nan(a->cls) ? b : a;
1356         }
1357
1358         return parts_pick_nan(a, b, s);
1359     }
1360
1361     a_exp = a->exp;
1362     b_exp = b->exp;
1363
1364     if (unlikely(ab_mask != float_cmask_normal)) {
1365         switch (a->cls) {
1366         case float_class_normal:
1367             break;
1368         case float_class_inf:
1369             a_exp = INT16_MAX;
1370             break;
1371         case float_class_zero:
1372             a_exp = INT16_MIN;
1373             break;
1374         default:
1375             g_assert_not_reached();
1376             break;
1377         }
1378         switch (b->cls) {
1379         case float_class_normal:
1380             break;
1381         case float_class_inf:
1382             b_exp = INT16_MAX;
1383             break;
1384         case float_class_zero:
1385             b_exp = INT16_MIN;
1386             break;
1387         default:
1388             g_assert_not_reached();
1389             break;
1390         }
1391     }
1392
1393     /* Compare magnitudes. */
1394     cmp = a_exp - b_exp;
1395     if (cmp == 0) {
1396         cmp = frac_cmp(a, b);
1397     }
1398
1399     /*
1400      * Take the sign into account.
1401      * For ismag, only do this if the magnitudes are equal.
1402      */
1403     if (!(flags & minmax_ismag) || cmp == 0) {
1404         if (a->sign != b->sign) {
1405             /* For differing signs, the negative operand is less. */
1406             cmp = a->sign ? -1 : 1;
1407         } else if (a->sign) {
1408             /* For two negative operands, invert the magnitude comparison. */
1409             cmp = -cmp;
1410         }
1411     }
1412
1413     if (flags & minmax_ismin) {
1414         cmp = -cmp;
1415     }
1416     return cmp < 0 ? b : a;
1417 }
1418
1419 /*
1420  * Floating point compare
1421  */
1422 static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1423                                      float_status *s, bool is_quiet)
1424 {
1425     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1426
1427     if (likely(ab_mask == float_cmask_normal)) {
1428         FloatRelation cmp;
1429
1430         if (a->sign != b->sign) {
1431             goto a_sign;
1432         }
1433         if (a->exp == b->exp) {
1434             cmp = frac_cmp(a, b);
1435         } else if (a->exp < b->exp) {
1436             cmp = float_relation_less;
1437         } else {
1438             cmp = float_relation_greater;
1439         }
1440         if (a->sign) {
1441             cmp = -cmp;
1442         }
1443         return cmp;
1444     }
1445
1446     if (unlikely(ab_mask & float_cmask_anynan)) {
1447         if (ab_mask & float_cmask_snan) {
1448             float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1449         } else if (!is_quiet) {
1450             float_raise(float_flag_invalid, s);
1451         }
1452         return float_relation_unordered;
1453     }
1454
1455     if (ab_mask & float_cmask_zero) {
1456         if (ab_mask == float_cmask_zero) {
1457             return float_relation_equal;
1458         } else if (a->cls == float_class_zero) {
1459             goto b_sign;
1460         } else {
1461             goto a_sign;
1462         }
1463     }
1464
1465     if (ab_mask == float_cmask_inf) {
1466         if (a->sign == b->sign) {
1467             return float_relation_equal;
1468         }
1469     } else if (b->cls == float_class_inf) {
1470         goto b_sign;
1471     } else {
1472         g_assert(a->cls == float_class_inf);
1473     }
1474
1475  a_sign:
1476     return a->sign ? float_relation_less : float_relation_greater;
1477  b_sign:
1478     return b->sign ? float_relation_greater : float_relation_less;
1479 }
1480
1481 /*
1482  * Multiply A by 2 raised to the power N.
1483  */
1484 static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1485 {
1486     switch (a->cls) {
1487     case float_class_snan:
1488     case float_class_qnan:
1489         parts_return_nan(a, s);
1490         break;
1491     case float_class_zero:
1492     case float_class_inf:
1493         break;
1494     case float_class_normal:
1495         a->exp += MIN(MAX(n, -0x10000), 0x10000);
1496         break;
1497     default:
1498         g_assert_not_reached();
1499     }
1500 }
1501
1502 /*
1503  * Return log2(A)
1504  */
1505 static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1506 {
1507     uint64_t a0, a1, r, t, ign;
1508     FloatPartsN f;
1509     int i, n, a_exp, f_exp;
1510
1511     if (unlikely(a->cls != float_class_normal)) {
1512         switch (a->cls) {
1513         case float_class_snan:
1514         case float_class_qnan:
1515             parts_return_nan(a, s);
1516             return;
1517         case float_class_zero:
1518             float_raise(float_flag_divbyzero, s);
1519             /* log2(0) = -inf */
1520             a->cls = float_class_inf;
1521             a->sign = 1;
1522             return;
1523         case float_class_inf:
1524             if (unlikely(a->sign)) {
1525                 goto d_nan;
1526             }
1527             return;
1528         default:
1529             break;
1530         }
1531         g_assert_not_reached();
1532     }
1533     if (unlikely(a->sign)) {
1534         goto d_nan;
1535     }
1536
1537     /* TODO: This algorithm looses bits too quickly for float128. */
1538     g_assert(N == 64);
1539
1540     a_exp = a->exp;
1541     f_exp = -1;
1542
1543     r = 0;
1544     t = DECOMPOSED_IMPLICIT_BIT;
1545     a0 = a->frac_hi;
1546     a1 = 0;
1547
1548     n = fmt->frac_size + 2;
1549     if (unlikely(a_exp == -1)) {
1550         /*
1551          * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1552          * When the value is very close to 1.0, there are lots of 1's in
1553          * the msb parts of the fraction.  At the end, when we subtract
1554          * this value from -1.0, we can see a catastrophic loss of precision,
1555          * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1556          * bits of y in the final result.  To minimize this, compute as many
1557          * digits as we can.
1558          * ??? This case needs another algorithm to avoid this.
1559          */
1560         n = fmt->frac_size * 2 + 2;
1561         /* Don't compute a value overlapping the sticky bit */
1562         n = MIN(n, 62);
1563     }
1564
1565     for (i = 0; i < n; i++) {
1566         if (a1) {
1567             mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1568         } else if (a0 & 0xffffffffull) {
1569             mul64To128(a0, a0, &a0, &a1);
1570         } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1571             a0 >>= 32;
1572             a0 *= a0;
1573         } else {
1574             goto exact;
1575         }
1576
1577         if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1578             if (unlikely(a_exp == 0 && r == 0)) {
1579                 /*
1580                  * When a_exp == 0, we're computing the log2 of a value
1581                  * [1.0,2.0).  When the value is very close to 1.0, there
1582                  * are lots of 0's in the msb parts of the fraction.
1583                  * We need to compute more digits to produce a correct
1584                  * result -- restart at the top of the fraction.
1585                  * ??? This is likely to lose precision quickly, as for
1586                  * float128; we may need another method.
1587                  */
1588                 f_exp -= i;
1589                 t = r = DECOMPOSED_IMPLICIT_BIT;
1590                 i = 0;
1591             } else {
1592                 r |= t;
1593             }
1594         } else {
1595             add128(a0, a1, a0, a1, &a0, &a1);
1596         }
1597         t >>= 1;
1598     }
1599
1600     /* Set sticky for inexact. */
1601     r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1602
1603  exact:
1604     parts_sint_to_float(a, a_exp, 0, s);
1605     if (r == 0) {
1606         return;
1607     }
1608
1609     memset(&f, 0, sizeof(f));
1610     f.cls = float_class_normal;
1611     f.frac_hi = r;
1612     f.exp = f_exp - frac_normalize(&f);
1613
1614     if (a_exp < 0) {
1615         parts_sub_normal(a, &f);
1616     } else if (a_exp > 0) {
1617         parts_add_normal(a, &f);
1618     } else {
1619         *a = f;
1620     }
1621     return;
1622
1623  d_nan:
1624     float_raise(float_flag_invalid, s);
1625     parts_default_nan(a, s);
1626 }