fpu/softfloat-parts.c.inc

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
  19 {
  20     switch (a->cls) {
  21     case float_class_snan:
  22         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  23         if (s->default_nan_mode) {
  24             parts_default_nan(a, s);
  25         } else {
  26             parts_silence_nan(a, s);
  27         }
  28         break;
  29     case float_class_qnan:
  30         if (s->default_nan_mode) {
  31             parts_default_nan(a, s);
  32         }
  33         break;
  34     default:
  35         g_assert_not_reached();
  36     }
  37 }
  38
  39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
  40                                      float_status *s)
  41 {
  42     if (is_snan(a->cls) || is_snan(b->cls)) {
  43         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  44     }
  45
  46     if (s->default_nan_mode) {
  47         parts_default_nan(a, s);
  48     } else {
  49         int cmp = frac_cmp(a, b);
  50         if (cmp == 0) {
  51             cmp = a->sign < b->sign;
  52         }
  53
  54         if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
  55             a = b;
  56         }
  57         if (is_snan(a->cls)) {
  58             parts_silence_nan(a, s);
  59         }
  60     }
  61     return a;
  62 }
  63
  64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
  65                                             FloatPartsN *c, float_status *s,
  66                                             int ab_mask, int abc_mask)
  67 {
  68     int which;
  69
  70     if (unlikely(abc_mask & float_cmask_snan)) {
  71         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  72     }
  73
  74     which = pickNaNMulAdd(a->cls, b->cls, c->cls,
  75                           ab_mask == float_cmask_infzero, s);
  76
  77     if (s->default_nan_mode || which == 3) {
  78         /*
  79          * Note that this check is after pickNaNMulAdd so that function
  80          * has an opportunity to set the Invalid flag for infzero.
  81          */
  82         parts_default_nan(a, s);
  83         return a;
  84     }
  85
  86     switch (which) {
  87     case 0:
  88         break;
  89     case 1:
  90         a = b;
  91         break;
  92     case 2:
  93         a = c;
  94         break;
  95     default:
  96         g_assert_not_reached();
  97     }
  98     if (is_snan(a->cls)) {
  99         parts_silence_nan(a, s);
 100     }
 101     return a;
 102 }
 103
 104 /*
 105  * Canonicalize the FloatParts structure.  Determine the class,
 106  * unbias the exponent, and normalize the fraction.
 107  */
 108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
 109                                  const FloatFmt *fmt)
 110 {
 111     if (unlikely(p->exp == 0)) {
 112         if (likely(frac_eqz(p))) {
 113             p->cls = float_class_zero;
 114         } else if (status->flush_inputs_to_zero) {
 115             float_raise(float_flag_input_denormal, status);
 116             p->cls = float_class_zero;
 117             frac_clear(p);
 118         } else {
 119             int shift = frac_normalize(p);
 120             p->cls = float_class_normal;
 121             p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
 122         }
 123     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
 124         p->cls = float_class_normal;
 125         p->exp -= fmt->exp_bias;
 126         frac_shl(p, fmt->frac_shift);
 127         p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 128     } else if (likely(frac_eqz(p))) {
 129         p->cls = float_class_inf;
 130     } else {
 131         frac_shl(p, fmt->frac_shift);
 132         p->cls = (parts_is_snan_frac(p->frac_hi, status)
 133                   ? float_class_snan : float_class_qnan);
 134     }
 135 }
 136
 137 /*
 138  * Round and uncanonicalize a floating-point number by parts. There
 139  * are FRAC_SHIFT bits that may require rounding at the bottom of the
 140  * fraction; these bits will be removed. The exponent will be biased
 141  * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
 142  */
 143 static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
 144                                    const FloatFmt *fmt)
 145 {
 146     const int exp_max = fmt->exp_max;
 147     const int frac_shift = fmt->frac_shift;
 148     const uint64_t round_mask = fmt->round_mask;
 149     const uint64_t frac_lsb = round_mask + 1;
 150     const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
 151     const uint64_t roundeven_mask = round_mask | frac_lsb;
 152     uint64_t inc;
 153     bool overflow_norm = false;
 154     int exp, flags = 0;
 155
 156     switch (s->float_rounding_mode) {
 157     case float_round_nearest_even:
 158         if (N > 64 && frac_lsb == 0) {
 159             inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
 160                    ? frac_lsbm1 : 0);
 161         } else {
 162             inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 163                    ? frac_lsbm1 : 0);
 164         }
 165         break;
 166     case float_round_ties_away:
 167         inc = frac_lsbm1;
 168         break;
 169     case float_round_to_zero:
 170         overflow_norm = true;
 171         inc = 0;
 172         break;
 173     case float_round_up:
 174         inc = p->sign ? 0 : round_mask;
 175         overflow_norm = p->sign;
 176         break;
 177     case float_round_down:
 178         inc = p->sign ? round_mask : 0;
 179         overflow_norm = !p->sign;
 180         break;
 181     case float_round_to_odd:
 182         overflow_norm = true;
 183         /* fall through */
 184     case float_round_to_odd_inf:
 185         if (N > 64 && frac_lsb == 0) {
 186             inc = p->frac_hi & 1 ? 0 : round_mask;
 187         } else {
 188             inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 189         }
 190         break;
 191     default:
 192         g_assert_not_reached();
 193     }
 194
 195     exp = p->exp + fmt->exp_bias;
 196     if (likely(exp > 0)) {
 197         if (p->frac_lo & round_mask) {
 198             flags |= float_flag_inexact;
 199             if (frac_addi(p, p, inc)) {
 200                 frac_shr(p, 1);
 201                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 202                 exp++;
 203             }
 204             p->frac_lo &= ~round_mask;
 205         }
 206
 207         if (fmt->arm_althp) {
 208             /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
 209             if (unlikely(exp > exp_max)) {
 210                 /* Overflow.  Return the maximum normal.  */
 211                 flags = float_flag_invalid;
 212                 exp = exp_max;
 213                 frac_allones(p);
 214                 p->frac_lo &= ~round_mask;
 215             }
 216         } else if (unlikely(exp >= exp_max)) {
 217             flags |= float_flag_overflow;
 218             if (s->rebias_overflow) {
 219                 exp -= fmt->exp_re_bias;
 220             } else if (overflow_norm) {
 221                 flags |= float_flag_inexact;
 222                 exp = exp_max - 1;
 223                 frac_allones(p);
 224                 p->frac_lo &= ~round_mask;
 225             } else {
 226                 flags |= float_flag_inexact;
 227                 p->cls = float_class_inf;
 228                 exp = exp_max;
 229                 frac_clear(p);
 230             }
 231         }
 232         frac_shr(p, frac_shift);
 233     } else if (unlikely(s->rebias_underflow)) {
 234         flags |= float_flag_underflow;
 235         exp += fmt->exp_re_bias;
 236         if (p->frac_lo & round_mask) {
 237             flags |= float_flag_inexact;
 238             if (frac_addi(p, p, inc)) {
 239                 frac_shr(p, 1);
 240                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 241                 exp++;
 242             }
 243             p->frac_lo &= ~round_mask;
 244         }
 245         frac_shr(p, frac_shift);
 246     } else if (s->flush_to_zero) {
 247         flags |= float_flag_output_denormal;
 248         p->cls = float_class_zero;
 249         exp = 0;
 250         frac_clear(p);
 251     } else {
 252         bool is_tiny = s->tininess_before_rounding || exp < 0;
 253
 254         if (!is_tiny) {
 255             FloatPartsN discard;
 256             is_tiny = !frac_addi(&discard, p, inc);
 257         }
 258
 259         frac_shrjam(p, 1 - exp);
 260
 261         if (p->frac_lo & round_mask) {
 262             /* Need to recompute round-to-even/round-to-odd. */
 263             switch (s->float_rounding_mode) {
 264             case float_round_nearest_even:
 265                 if (N > 64 && frac_lsb == 0) {
 266                     inc = ((p->frac_hi & 1) ||
 267                            (p->frac_lo & round_mask) != frac_lsbm1
 268                            ? frac_lsbm1 : 0);
 269                 } else {
 270                     inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 271                            ? frac_lsbm1 : 0);
 272                 }
 273                 break;
 274             case float_round_to_odd:
 275             case float_round_to_odd_inf:
 276                 if (N > 64 && frac_lsb == 0) {
 277                     inc = p->frac_hi & 1 ? 0 : round_mask;
 278                 } else {
 279                     inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 280                 }
 281                 break;
 282             default:
 283                 break;
 284             }
 285             flags |= float_flag_inexact;
 286             frac_addi(p, p, inc);
 287             p->frac_lo &= ~round_mask;
 288         }
 289
 290         exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
 291         frac_shr(p, frac_shift);
 292
 293         if (is_tiny && (flags & float_flag_inexact)) {
 294             flags |= float_flag_underflow;
 295         }
 296         if (exp == 0 && frac_eqz(p)) {
 297             p->cls = float_class_zero;
 298         }
 299     }
 300     p->exp = exp;
 301     float_raise(flags, s);
 302 }
 303
 304 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
 305                             const FloatFmt *fmt)
 306 {
 307     if (likely(p->cls == float_class_normal)) {
 308         parts_uncanon_normal(p, s, fmt);
 309     } else {
 310         switch (p->cls) {
 311         case float_class_zero:
 312             p->exp = 0;
 313             frac_clear(p);
 314             return;
 315         case float_class_inf:
 316             g_assert(!fmt->arm_althp);
 317             p->exp = fmt->exp_max;
 318             frac_clear(p);
 319             return;
 320         case float_class_qnan:
 321         case float_class_snan:
 322             g_assert(!fmt->arm_althp);
 323             p->exp = fmt->exp_max;
 324             frac_shr(p, fmt->frac_shift);
 325             return;
 326         default:
 327             break;
 328         }
 329         g_assert_not_reached();
 330     }
 331 }
 332
 333 /*
 334  * Returns the result of adding or subtracting the values of the
 335  * floating-point values `a' and `b'. The operation is performed
 336  * according to the IEC/IEEE Standard for Binary Floating-Point
 337  * Arithmetic.
 338  */
 339 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
 340                                    float_status *s, bool subtract)
 341 {
 342     bool b_sign = b->sign ^ subtract;
 343     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 344
 345     if (a->sign != b_sign) {
 346         /* Subtraction */
 347         if (likely(ab_mask == float_cmask_normal)) {
 348             if (parts_sub_normal(a, b)) {
 349                 return a;
 350             }
 351             /* Subtract was exact, fall through to set sign. */
 352             ab_mask = float_cmask_zero;
 353         }
 354
 355         if (ab_mask == float_cmask_zero) {
 356             a->sign = s->float_rounding_mode == float_round_down;
 357             return a;
 358         }
 359
 360         if (unlikely(ab_mask & float_cmask_anynan)) {
 361             goto p_nan;
 362         }
 363
 364         if (ab_mask & float_cmask_inf) {
 365             if (a->cls != float_class_inf) {
 366                 /* N - Inf */
 367                 goto return_b;
 368             }
 369             if (b->cls != float_class_inf) {
 370                 /* Inf - N */
 371                 return a;
 372             }
 373             /* Inf - Inf */
 374             float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 375             parts_default_nan(a, s);
 376             return a;
 377         }
 378     } else {
 379         /* Addition */
 380         if (likely(ab_mask == float_cmask_normal)) {
 381             parts_add_normal(a, b);
 382             return a;
 383         }
 384
 385         if (ab_mask == float_cmask_zero) {
 386             return a;
 387         }
 388
 389         if (unlikely(ab_mask & float_cmask_anynan)) {
 390             goto p_nan;
 391         }
 392
 393         if (ab_mask & float_cmask_inf) {
 394             a->cls = float_class_inf;
 395             return a;
 396         }
 397     }
 398
 399     if (b->cls == float_class_zero) {
 400         g_assert(a->cls == float_class_normal);
 401         return a;
 402     }
 403
 404     g_assert(a->cls == float_class_zero);
 405     g_assert(b->cls == float_class_normal);
 406  return_b:
 407     b->sign = b_sign;
 408     return b;
 409
 410  p_nan:
 411     return parts_pick_nan(a, b, s);
 412 }
 413
 414 /*
 415  * Returns the result of multiplying the floating-point values `a' and
 416  * `b'. The operation is performed according to the IEC/IEEE Standard
 417  * for Binary Floating-Point Arithmetic.
 418  */
 419 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
 420                                 float_status *s)
 421 {
 422     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 423     bool sign = a->sign ^ b->sign;
 424
 425     if (likely(ab_mask == float_cmask_normal)) {
 426         FloatPartsW tmp;
 427
 428         frac_mulw(&tmp, a, b);
 429         frac_truncjam(a, &tmp);
 430
 431         a->exp += b->exp + 1;
 432         if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 433             frac_add(a, a, a);
 434             a->exp -= 1;
 435         }
 436
 437         a->sign = sign;
 438         return a;
 439     }
 440
 441     /* Inf * Zero == NaN */
 442     if (unlikely(ab_mask == float_cmask_infzero)) {
 443         float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 444         parts_default_nan(a, s);
 445         return a;
 446     }
 447
 448     if (unlikely(ab_mask & float_cmask_anynan)) {
 449         return parts_pick_nan(a, b, s);
 450     }
 451
 452     /* Multiply by 0 or Inf */
 453     if (ab_mask & float_cmask_inf) {
 454         a->cls = float_class_inf;
 455         a->sign = sign;
 456         return a;
 457     }
 458
 459     g_assert(ab_mask & float_cmask_zero);
 460     a->cls = float_class_zero;
 461     a->sign = sign;
 462     return a;
 463 }
 464
 465 /*
 466  * Returns the result of multiplying the floating-point values `a' and
 467  * `b' then adding 'c', with no intermediate rounding step after the
 468  * multiplication. The operation is performed according to the
 469  * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
 470  * The flags argument allows the caller to select negation of the
 471  * addend, the intermediate product, or the final result. (The
 472  * difference between this and having the caller do a separate
 473  * negation is that negating externally will flip the sign bit on NaNs.)
 474  *
 475  * Requires A and C extracted into a double-sized structure to provide the
 476  * extra space for the widening multiply.
 477  */
 478 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
 479                                    FloatPartsN *c, int flags, float_status *s)
 480 {
 481     int ab_mask, abc_mask;
 482     FloatPartsW p_widen, c_widen;
 483
 484     ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 485     abc_mask = float_cmask(c->cls) | ab_mask;
 486
 487     /*
 488      * It is implementation-defined whether the cases of (0,inf,qnan)
 489      * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
 490      * they return if they do), so we have to hand this information
 491      * off to the target-specific pick-a-NaN routine.
 492      */
 493     if (unlikely(abc_mask & float_cmask_anynan)) {
 494         return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
 495     }
 496
 497     if (flags & float_muladd_negate_c) {
 498         c->sign ^= 1;
 499     }
 500
 501     /* Compute the sign of the product into A. */
 502     a->sign ^= b->sign;
 503     if (flags & float_muladd_negate_product) {
 504         a->sign ^= 1;
 505     }
 506
 507     if (unlikely(ab_mask != float_cmask_normal)) {
 508         if (unlikely(ab_mask == float_cmask_infzero)) {
 509             float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 510             goto d_nan;
 511         }
 512
 513         if (ab_mask & float_cmask_inf) {
 514             if (c->cls == float_class_inf && a->sign != c->sign) {
 515                 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 516                 goto d_nan;
 517             }
 518             goto return_inf;
 519         }
 520
 521         g_assert(ab_mask & float_cmask_zero);
 522         if (c->cls == float_class_normal) {
 523             *a = *c;
 524             goto return_normal;
 525         }
 526         if (c->cls == float_class_zero) {
 527             if (a->sign != c->sign) {
 528                 goto return_sub_zero;
 529             }
 530             goto return_zero;
 531         }
 532         g_assert(c->cls == float_class_inf);
 533     }
 534
 535     if (unlikely(c->cls == float_class_inf)) {
 536         a->sign = c->sign;
 537         goto return_inf;
 538     }
 539
 540     /* Perform the multiplication step. */
 541     p_widen.sign = a->sign;
 542     p_widen.exp = a->exp + b->exp + 1;
 543     frac_mulw(&p_widen, a, b);
 544     if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 545         frac_add(&p_widen, &p_widen, &p_widen);
 546         p_widen.exp -= 1;
 547     }
 548
 549     /* Perform the addition step. */
 550     if (c->cls != float_class_zero) {
 551         /* Zero-extend C to less significant bits. */
 552         frac_widen(&c_widen, c);
 553         c_widen.exp = c->exp;
 554
 555         if (a->sign == c->sign) {
 556             parts_add_normal(&p_widen, &c_widen);
 557         } else if (!parts_sub_normal(&p_widen, &c_widen)) {
 558             goto return_sub_zero;
 559         }
 560     }
 561
 562     /* Narrow with sticky bit, for proper rounding later. */
 563     frac_truncjam(a, &p_widen);
 564     a->sign = p_widen.sign;
 565     a->exp = p_widen.exp;
 566
 567  return_normal:
 568     if (flags & float_muladd_halve_result) {
 569         a->exp -= 1;
 570     }
 571  finish_sign:
 572     if (flags & float_muladd_negate_result) {
 573         a->sign ^= 1;
 574     }
 575     return a;
 576
 577  return_sub_zero:
 578     a->sign = s->float_rounding_mode == float_round_down;
 579  return_zero:
 580     a->cls = float_class_zero;
 581     goto finish_sign;
 582
 583  return_inf:
 584     a->cls = float_class_inf;
 585     goto finish_sign;
 586
 587  d_nan:
 588     parts_default_nan(a, s);
 589     return a;
 590 }
 591
 592 /*
 593  * Returns the result of dividing the floating-point value `a' by the
 594  * corresponding value `b'. The operation is performed according to
 595  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 596  */
 597 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
 598                                 float_status *s)
 599 {
 600     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 601     bool sign = a->sign ^ b->sign;
 602
 603     if (likely(ab_mask == float_cmask_normal)) {
 604         a->sign = sign;
 605         a->exp -= b->exp + frac_div(a, b);
 606         return a;
 607     }
 608
 609     /* 0/0 or Inf/Inf => NaN */
 610     if (unlikely(ab_mask == float_cmask_zero)) {
 611         float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
 612         goto d_nan;
 613     }
 614     if (unlikely(ab_mask == float_cmask_inf)) {
 615         float_raise(float_flag_invalid | float_flag_invalid_idi, s);
 616         goto d_nan;
 617     }
 618
 619     /* All the NaN cases */
 620     if (unlikely(ab_mask & float_cmask_anynan)) {
 621         return parts_pick_nan(a, b, s);
 622     }
 623
 624     a->sign = sign;
 625
 626     /* Inf / X */
 627     if (a->cls == float_class_inf) {
 628         return a;
 629     }
 630
 631     /* 0 / X */
 632     if (a->cls == float_class_zero) {
 633         return a;
 634     }
 635
 636     /* X / Inf */
 637     if (b->cls == float_class_inf) {
 638         a->cls = float_class_zero;
 639         return a;
 640     }
 641
 642     /* X / 0 => Inf */
 643     g_assert(b->cls == float_class_zero);
 644     float_raise(float_flag_divbyzero, s);
 645     a->cls = float_class_inf;
 646     return a;
 647
 648  d_nan:
 649     parts_default_nan(a, s);
 650     return a;
 651 }
 652
 653 /*
 654  * Floating point remainder, per IEC/IEEE, or modulus.
 655  */
 656 static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
 657                                    uint64_t *mod_quot, float_status *s)
 658 {
 659     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 660
 661     if (likely(ab_mask == float_cmask_normal)) {
 662         frac_modrem(a, b, mod_quot);
 663         return a;
 664     }
 665
 666     if (mod_quot) {
 667         *mod_quot = 0;
 668     }
 669
 670     /* All the NaN cases */
 671     if (unlikely(ab_mask & float_cmask_anynan)) {
 672         return parts_pick_nan(a, b, s);
 673     }
 674
 675     /* Inf % N; N % 0 */
 676     if (a->cls == float_class_inf || b->cls == float_class_zero) {
 677         float_raise(float_flag_invalid, s);
 678         parts_default_nan(a, s);
 679         return a;
 680     }
 681
 682     /* N % Inf; 0 % N */
 683     g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
 684     return a;
 685 }
 686
 687 /*
 688  * Square Root
 689  *
 690  * The base algorithm is lifted from
 691  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
 692  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
 693  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
 694  * and is thus MIT licenced.
 695  */
 696 static void partsN(sqrt)(FloatPartsN *a, float_status *status,
 697                          const FloatFmt *fmt)
 698 {
 699     const uint32_t three32 = 3u << 30;
 700     const uint64_t three64 = 3ull << 62;
 701     uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
 702     uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
 703     uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
 704     uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
 705     uint64_t discard;
 706     bool exp_odd;
 707     size_t index;
 708
 709     if (unlikely(a->cls != float_class_normal)) {
 710         switch (a->cls) {
 711         case float_class_snan:
 712         case float_class_qnan:
 713             parts_return_nan(a, status);
 714             return;
 715         case float_class_zero:
 716             return;
 717         case float_class_inf:
 718             if (unlikely(a->sign)) {
 719                 goto d_nan;
 720             }
 721             return;
 722         default:
 723             g_assert_not_reached();
 724         }
 725     }
 726
 727     if (unlikely(a->sign)) {
 728         goto d_nan;
 729     }
 730
 731     /*
 732      * Argument reduction.
 733      * x = 4^e frac; with integer e, and frac in [1, 4)
 734      * m = frac fixed point at bit 62, since we're in base 4.
 735      * If base-2 exponent is odd, exchange that for multiply by 2,
 736      * which results in no shift.
 737      */
 738     exp_odd = a->exp & 1;
 739     index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
 740     if (!exp_odd) {
 741         frac_shr(a, 1);
 742     }
 743
 744     /*
 745      * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
 746      *
 747      * Initial estimate:
 748      * 7-bit lookup table (1-bit exponent and 6-bit significand).
 749      *
 750      * The relative error (e = r0*sqrt(m)-1) of a linear estimate
 751      * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
 752      * a table lookup is faster and needs one less iteration.
 753      * The 7-bit table gives |e| < 0x1.fdp-9.
 754      *
 755      * A Newton-Raphson iteration for r is
 756      *   s = m*r
 757      *   d = s*r
 758      *   u = 3 - d
 759      *   r = r*u/2
 760      *
 761      * Fixed point representations:
 762      *   m, s, d, u, three are all 2.30; r is 0.32
 763      */
 764     m64 = a->frac_hi;
 765     m32 = m64 >> 32;
 766
 767     r32 = rsqrt_tab[index] << 16;
 768     /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
 769
 770     s32 = ((uint64_t)m32 * r32) >> 32;
 771     d32 = ((uint64_t)s32 * r32) >> 32;
 772     u32 = three32 - d32;
 773
 774     if (N == 64) {
 775         /* float64 or smaller */
 776
 777         r32 = ((uint64_t)r32 * u32) >> 31;
 778         /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
 779
 780         s32 = ((uint64_t)m32 * r32) >> 32;
 781         d32 = ((uint64_t)s32 * r32) >> 32;
 782         u32 = three32 - d32;
 783
 784         if (fmt->frac_size <= 23) {
 785             /* float32 or smaller */
 786
 787             s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
 788             s32 = (s32 - 1) >> 6;               /* 9.23 */
 789             /* s < sqrt(m) < s + 0x1.08p-23 */
 790
 791             /* compute nearest rounded result to 2.23 bits */
 792             uint32_t d0 = (m32 << 16) - s32 * s32;
 793             uint32_t d1 = s32 - d0;
 794             uint32_t d2 = d1 + s32 + 1;
 795             s32 += d1 >> 31;
 796             a->frac_hi = (uint64_t)s32 << (64 - 25);
 797
 798             /* increment or decrement for inexact */
 799             if (d2 != 0) {
 800                 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
 801             }
 802             goto done;
 803         }
 804
 805         /* float64 */
 806
 807         r64 = (uint64_t)r32 * u32 * 2;
 808         /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
 809         mul64To128(m64, r64, &s64, &discard);
 810         mul64To128(s64, r64, &d64, &discard);
 811         u64 = three64 - d64;
 812
 813         mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
 814         s64 = (s64 - 2) >> 9;                  /* 12.52 */
 815
 816         /* Compute nearest rounded result */
 817         uint64_t d0 = (m64 << 42) - s64 * s64;
 818         uint64_t d1 = s64 - d0;
 819         uint64_t d2 = d1 + s64 + 1;
 820         s64 += d1 >> 63;
 821         a->frac_hi = s64 << (64 - 54);
 822
 823         /* increment or decrement for inexact */
 824         if (d2 != 0) {
 825             a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
 826         }
 827         goto done;
 828     }
 829
 830     r64 = (uint64_t)r32 * u32 * 2;
 831     /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
 832
 833     mul64To128(m64, r64, &s64, &discard);
 834     mul64To128(s64, r64, &d64, &discard);
 835     u64 = three64 - d64;
 836     mul64To128(u64, r64, &r64, &discard);
 837     r64 <<= 1;
 838     /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
 839
 840     mul64To128(m64, r64, &s64, &discard);
 841     mul64To128(s64, r64, &d64, &discard);
 842     u64 = three64 - d64;
 843     mul64To128(u64, r64, &rh, &rl);
 844     add128(rh, rl, rh, rl, &rh, &rl);
 845     /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
 846
 847     mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
 848     mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
 849     sub128(three64, 0, dh, dl, &uh, &ul);
 850     mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
 851     /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
 852
 853     sub128(sh, sl, 0, 4, &sh, &sl);
 854     shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
 855     /* s < sqrt(m) < s + 1ulp */
 856
 857     /* Compute nearest rounded result */
 858     mul64To128(sl, sl, &d0h, &d0l);
 859     d0h += 2 * sh * sl;
 860     sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
 861     sub128(sh, sl, d0h, d0l, &d1h, &d1l);
 862     add128(sh, sl, 0, 1, &d2h, &d2l);
 863     add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
 864     add128(sh, sl, 0, d1h >> 63, &sh, &sl);
 865     shift128Left(sh, sl, 128 - 114, &sh, &sl);
 866
 867     /* increment or decrement for inexact */
 868     if (d2h | d2l) {
 869         if ((int64_t)(d1h ^ d2h) < 0) {
 870             sub128(sh, sl, 0, 1, &sh, &sl);
 871         } else {
 872             add128(sh, sl, 0, 1, &sh, &sl);
 873         }
 874     }
 875     a->frac_lo = sl;
 876     a->frac_hi = sh;
 877
 878  done:
 879     /* Convert back from base 4 to base 2. */
 880     a->exp >>= 1;
 881     if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 882         frac_add(a, a, a);
 883     } else {
 884         a->exp += 1;
 885     }
 886     return;
 887
 888  d_nan:
 889     float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
 890     parts_default_nan(a, status);
 891 }
 892
 893 /*
 894  * Rounds the floating-point value `a' to an integer, and returns the
 895  * result as a floating-point value. The operation is performed
 896  * according to the IEC/IEEE Standard for Binary Floating-Point
 897  * Arithmetic.
 898  *
 899  * parts_round_to_int_normal is an internal helper function for
 900  * normal numbers only, returning true for inexact but not directly
 901  * raising float_flag_inexact.
 902  */
 903 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
 904                                         int scale, int frac_size)
 905 {
 906     uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
 907     int shift_adj;
 908
 909     scale = MIN(MAX(scale, -0x10000), 0x10000);
 910     a->exp += scale;
 911
 912     if (a->exp < 0) {
 913         bool one;
 914
 915         /* All fractional */
 916         switch (rmode) {
 917         case float_round_nearest_even:
 918             one = false;
 919             if (a->exp == -1) {
 920                 FloatPartsN tmp;
 921                 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
 922                 frac_add(&tmp, a, a);
 923                 /* Anything remaining means frac > 0.5. */
 924                 one = !frac_eqz(&tmp);
 925             }
 926             break;
 927         case float_round_ties_away:
 928             one = a->exp == -1;
 929             break;
 930         case float_round_to_zero:
 931             one = false;
 932             break;
 933         case float_round_up:
 934             one = !a->sign;
 935             break;
 936         case float_round_down:
 937             one = a->sign;
 938             break;
 939         case float_round_to_odd:
 940             one = true;
 941             break;
 942         default:
 943             g_assert_not_reached();
 944         }
 945
 946         frac_clear(a);
 947         a->exp = 0;
 948         if (one) {
 949             a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
 950         } else {
 951             a->cls = float_class_zero;
 952         }
 953         return true;
 954     }
 955
 956     if (a->exp >= frac_size) {
 957         /* All integral */
 958         return false;
 959     }
 960
 961     if (N > 64 && a->exp < N - 64) {
 962         /*
 963          * Rounding is not in the low word -- shift lsb to bit 2,
 964          * which leaves room for sticky and rounding bit.
 965          */
 966         shift_adj = (N - 1) - (a->exp + 2);
 967         frac_shrjam(a, shift_adj);
 968         frac_lsb = 1 << 2;
 969     } else {
 970         shift_adj = 0;
 971         frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
 972     }
 973
 974     frac_lsbm1 = frac_lsb >> 1;
 975     rnd_mask = frac_lsb - 1;
 976     rnd_even_mask = rnd_mask | frac_lsb;
 977
 978     if (!(a->frac_lo & rnd_mask)) {
 979         /* Fractional bits already clear, undo the shift above. */
 980         frac_shl(a, shift_adj);
 981         return false;
 982     }
 983
 984     switch (rmode) {
 985     case float_round_nearest_even:
 986         inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
 987         break;
 988     case float_round_ties_away:
 989         inc = frac_lsbm1;
 990         break;
 991     case float_round_to_zero:
 992         inc = 0;
 993         break;
 994     case float_round_up:
 995         inc = a->sign ? 0 : rnd_mask;
 996         break;
 997     case float_round_down:
 998         inc = a->sign ? rnd_mask : 0;
 999         break;
1000     case float_round_to_odd:
1001         inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1002         break;
1003     default:
1004         g_assert_not_reached();
1005     }
1006
1007     if (shift_adj == 0) {
1008         if (frac_addi(a, a, inc)) {
1009             frac_shr(a, 1);
1010             a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1011             a->exp++;
1012         }
1013         a->frac_lo &= ~rnd_mask;
1014     } else {
1015         frac_addi(a, a, inc);
1016         a->frac_lo &= ~rnd_mask;
1017         /* Be careful shifting back, not to overflow */
1018         frac_shl(a, shift_adj - 1);
1019         if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1020             a->exp++;
1021         } else {
1022             frac_add(a, a, a);
1023         }
1024     }
1025     return true;
1026 }
1027
1028 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1029                                  int scale, float_status *s,
1030                                  const FloatFmt *fmt)
1031 {
1032     switch (a->cls) {
1033     case float_class_qnan:
1034     case float_class_snan:
1035         parts_return_nan(a, s);
1036         break;
1037     case float_class_zero:
1038     case float_class_inf:
1039         break;
1040     case float_class_normal:
1041         if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1042             float_raise(float_flag_inexact, s);
1043         }
1044         break;
1045     default:
1046         g_assert_not_reached();
1047     }
1048 }
1049
1050 /*
1051  * Returns the result of converting the floating-point value `a' to
1052  * the two's complement integer format. The conversion is performed
1053  * according to the IEC/IEEE Standard for Binary Floating-Point
1054  * Arithmetic---which means in particular that the conversion is
1055  * rounded according to the current rounding mode. If `a' is a NaN,
1056  * the largest positive integer is returned. Otherwise, if the
1057  * conversion overflows, the largest integer with the same sign as `a'
1058  * is returned.
1059  */
1060 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1061                                      int scale, int64_t min, int64_t max,
1062                                      float_status *s)
1063 {
1064     int flags = 0;
1065     uint64_t r;
1066
1067     switch (p->cls) {
1068     case float_class_snan:
1069         flags |= float_flag_invalid_snan;
1070         /* fall through */
1071     case float_class_qnan:
1072         flags |= float_flag_invalid;
1073         r = max;
1074         break;
1075
1076     case float_class_inf:
1077         flags = float_flag_invalid | float_flag_invalid_cvti;
1078         r = p->sign ? min : max;
1079         break;
1080
1081     case float_class_zero:
1082         return 0;
1083
1084     case float_class_normal:
1085         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1086         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1087             flags = float_flag_inexact;
1088         }
1089
1090         if (p->exp <= DECOMPOSED_BINARY_POINT) {
1091             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1092         } else {
1093             r = UINT64_MAX;
1094         }
1095         if (p->sign) {
1096             if (r <= -(uint64_t)min) {
1097                 r = -r;
1098             } else {
1099                 flags = float_flag_invalid | float_flag_invalid_cvti;
1100                 r = min;
1101             }
1102         } else if (r > max) {
1103             flags = float_flag_invalid | float_flag_invalid_cvti;
1104             r = max;
1105         }
1106         break;
1107
1108     default:
1109         g_assert_not_reached();
1110     }
1111
1112     float_raise(flags, s);
1113     return r;
1114 }
1115
1116 /*
1117  *  Returns the result of converting the floating-point value `a' to
1118  *  the unsigned integer format. The conversion is performed according
1119  *  to the IEC/IEEE Standard for Binary Floating-Point
1120  *  Arithmetic---which means in particular that the conversion is
1121  *  rounded according to the current rounding mode. If `a' is a NaN,
1122  *  the largest unsigned integer is returned. Otherwise, if the
1123  *  conversion overflows, the largest unsigned integer is returned. If
1124  *  the 'a' is negative, the result is rounded and zero is returned;
1125  *  values that do not round to zero will raise the inexact exception
1126  *  flag.
1127  */
1128 static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1129                                       int scale, uint64_t max, float_status *s)
1130 {
1131     int flags = 0;
1132     uint64_t r;
1133
1134     switch (p->cls) {
1135     case float_class_snan:
1136         flags |= float_flag_invalid_snan;
1137         /* fall through */
1138     case float_class_qnan:
1139         flags |= float_flag_invalid;
1140         r = max;
1141         break;
1142
1143     case float_class_inf:
1144         flags = float_flag_invalid | float_flag_invalid_cvti;
1145         r = p->sign ? 0 : max;
1146         break;
1147
1148     case float_class_zero:
1149         return 0;
1150
1151     case float_class_normal:
1152         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1153         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1154             flags = float_flag_inexact;
1155             if (p->cls == float_class_zero) {
1156                 r = 0;
1157                 break;
1158             }
1159         }
1160
1161         if (p->sign) {
1162             flags = float_flag_invalid | float_flag_invalid_cvti;
1163             r = 0;
1164         } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1165             flags = float_flag_invalid | float_flag_invalid_cvti;
1166             r = max;
1167         } else {
1168             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1169             if (r > max) {
1170                 flags = float_flag_invalid | float_flag_invalid_cvti;
1171                 r = max;
1172             }
1173         }
1174         break;
1175
1176     default:
1177         g_assert_not_reached();
1178     }
1179
1180     float_raise(flags, s);
1181     return r;
1182 }
1183
1184 /*
1185  * Integer to float conversions
1186  *
1187  * Returns the result of converting the two's complement integer `a'
1188  * to the floating-point format. The conversion is performed according
1189  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1190  */
1191 static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1192                                   int scale, float_status *s)
1193 {
1194     uint64_t f = a;
1195     int shift;
1196
1197     memset(p, 0, sizeof(*p));
1198
1199     if (a == 0) {
1200         p->cls = float_class_zero;
1201         return;
1202     }
1203
1204     p->cls = float_class_normal;
1205     if (a < 0) {
1206         f = -f;
1207         p->sign = true;
1208     }
1209     shift = clz64(f);
1210     scale = MIN(MAX(scale, -0x10000), 0x10000);
1211
1212     p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1213     p->frac_hi = f << shift;
1214 }
1215
1216 /*
1217  * Unsigned Integer to float conversions
1218  *
1219  * Returns the result of converting the unsigned integer `a' to the
1220  * floating-point format. The conversion is performed according to the
1221  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1222  */
1223 static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1224                                   int scale, float_status *status)
1225 {
1226     memset(p, 0, sizeof(*p));
1227
1228     if (a == 0) {
1229         p->cls = float_class_zero;
1230     } else {
1231         int shift = clz64(a);
1232         scale = MIN(MAX(scale, -0x10000), 0x10000);
1233         p->cls = float_class_normal;
1234         p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1235         p->frac_hi = a << shift;
1236     }
1237 }
1238
1239 /*
1240  * Float min/max.
1241  */
1242 static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1243                                    float_status *s, int flags)
1244 {
1245     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1246     int a_exp, b_exp, cmp;
1247
1248     if (unlikely(ab_mask & float_cmask_anynan)) {
1249         /*
1250          * For minNum/maxNum (IEEE 754-2008)
1251          * or minimumNumber/maximumNumber (IEEE 754-2019),
1252          * if one operand is a QNaN, and the other
1253          * operand is numerical, then return numerical argument.
1254          */
1255         if ((flags & (minmax_isnum | minmax_isnumber))
1256             && !(ab_mask & float_cmask_snan)
1257             && (ab_mask & ~float_cmask_qnan)) {
1258             return is_nan(a->cls) ? b : a;
1259         }
1260
1261         /*
1262          * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1263          * are removed and replaced with minimum, minimumNumber, maximum
1264          * and maximumNumber.
1265          * minimumNumber/maximumNumber behavior for SNaN is changed to:
1266          *   If both operands are NaNs, a QNaN is returned.
1267          *   If either operand is a SNaN,
1268          *   an invalid operation exception is signaled,
1269          *   but unless both operands are NaNs,
1270          *   the SNaN is otherwise ignored and not converted to a QNaN.
1271          */
1272         if ((flags & minmax_isnumber)
1273             && (ab_mask & float_cmask_snan)
1274             && (ab_mask & ~float_cmask_anynan)) {
1275             float_raise(float_flag_invalid, s);
1276             return is_nan(a->cls) ? b : a;
1277         }
1278
1279         return parts_pick_nan(a, b, s);
1280     }
1281
1282     a_exp = a->exp;
1283     b_exp = b->exp;
1284
1285     if (unlikely(ab_mask != float_cmask_normal)) {
1286         switch (a->cls) {
1287         case float_class_normal:
1288             break;
1289         case float_class_inf:
1290             a_exp = INT16_MAX;
1291             break;
1292         case float_class_zero:
1293             a_exp = INT16_MIN;
1294             break;
1295         default:
1296             g_assert_not_reached();
1297             break;
1298         }
1299         switch (b->cls) {
1300         case float_class_normal:
1301             break;
1302         case float_class_inf:
1303             b_exp = INT16_MAX;
1304             break;
1305         case float_class_zero:
1306             b_exp = INT16_MIN;
1307             break;
1308         default:
1309             g_assert_not_reached();
1310             break;
1311         }
1312     }
1313
1314     /* Compare magnitudes. */
1315     cmp = a_exp - b_exp;
1316     if (cmp == 0) {
1317         cmp = frac_cmp(a, b);
1318     }
1319
1320     /*
1321      * Take the sign into account.
1322      * For ismag, only do this if the magnitudes are equal.
1323      */
1324     if (!(flags & minmax_ismag) || cmp == 0) {
1325         if (a->sign != b->sign) {
1326             /* For differing signs, the negative operand is less. */
1327             cmp = a->sign ? -1 : 1;
1328         } else if (a->sign) {
1329             /* For two negative operands, invert the magnitude comparison. */
1330             cmp = -cmp;
1331         }
1332     }
1333
1334     if (flags & minmax_ismin) {
1335         cmp = -cmp;
1336     }
1337     return cmp < 0 ? b : a;
1338 }
1339
1340 /*
1341  * Floating point compare
1342  */
1343 static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1344                                      float_status *s, bool is_quiet)
1345 {
1346     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1347
1348     if (likely(ab_mask == float_cmask_normal)) {
1349         FloatRelation cmp;
1350
1351         if (a->sign != b->sign) {
1352             goto a_sign;
1353         }
1354         if (a->exp == b->exp) {
1355             cmp = frac_cmp(a, b);
1356         } else if (a->exp < b->exp) {
1357             cmp = float_relation_less;
1358         } else {
1359             cmp = float_relation_greater;
1360         }
1361         if (a->sign) {
1362             cmp = -cmp;
1363         }
1364         return cmp;
1365     }
1366
1367     if (unlikely(ab_mask & float_cmask_anynan)) {
1368         if (ab_mask & float_cmask_snan) {
1369             float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1370         } else if (!is_quiet) {
1371             float_raise(float_flag_invalid, s);
1372         }
1373         return float_relation_unordered;
1374     }
1375
1376     if (ab_mask & float_cmask_zero) {
1377         if (ab_mask == float_cmask_zero) {
1378             return float_relation_equal;
1379         } else if (a->cls == float_class_zero) {
1380             goto b_sign;
1381         } else {
1382             goto a_sign;
1383         }
1384     }
1385
1386     if (ab_mask == float_cmask_inf) {
1387         if (a->sign == b->sign) {
1388             return float_relation_equal;
1389         }
1390     } else if (b->cls == float_class_inf) {
1391         goto b_sign;
1392     } else {
1393         g_assert(a->cls == float_class_inf);
1394     }
1395
1396  a_sign:
1397     return a->sign ? float_relation_less : float_relation_greater;
1398  b_sign:
1399     return b->sign ? float_relation_greater : float_relation_less;
1400 }
1401
1402 /*
1403  * Multiply A by 2 raised to the power N.
1404  */
1405 static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1406 {
1407     switch (a->cls) {
1408     case float_class_snan:
1409     case float_class_qnan:
1410         parts_return_nan(a, s);
1411         break;
1412     case float_class_zero:
1413     case float_class_inf:
1414         break;
1415     case float_class_normal:
1416         a->exp += MIN(MAX(n, -0x10000), 0x10000);
1417         break;
1418     default:
1419         g_assert_not_reached();
1420     }
1421 }
1422
1423 /*
1424  * Return log2(A)
1425  */
1426 static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1427 {
1428     uint64_t a0, a1, r, t, ign;
1429     FloatPartsN f;
1430     int i, n, a_exp, f_exp;
1431
1432     if (unlikely(a->cls != float_class_normal)) {
1433         switch (a->cls) {
1434         case float_class_snan:
1435         case float_class_qnan:
1436             parts_return_nan(a, s);
1437             return;
1438         case float_class_zero:
1439             float_raise(float_flag_divbyzero, s);
1440             /* log2(0) = -inf */
1441             a->cls = float_class_inf;
1442             a->sign = 1;
1443             return;
1444         case float_class_inf:
1445             if (unlikely(a->sign)) {
1446                 goto d_nan;
1447             }
1448             return;
1449         default:
1450             break;
1451         }
1452         g_assert_not_reached();
1453     }
1454     if (unlikely(a->sign)) {
1455         goto d_nan;
1456     }
1457
1458     /* TODO: This algorithm looses bits too quickly for float128. */
1459     g_assert(N == 64);
1460
1461     a_exp = a->exp;
1462     f_exp = -1;
1463
1464     r = 0;
1465     t = DECOMPOSED_IMPLICIT_BIT;
1466     a0 = a->frac_hi;
1467     a1 = 0;
1468
1469     n = fmt->frac_size + 2;
1470     if (unlikely(a_exp == -1)) {
1471         /*
1472          * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1473          * When the value is very close to 1.0, there are lots of 1's in
1474          * the msb parts of the fraction.  At the end, when we subtract
1475          * this value from -1.0, we can see a catastrophic loss of precision,
1476          * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1477          * bits of y in the final result.  To minimize this, compute as many
1478          * digits as we can.
1479          * ??? This case needs another algorithm to avoid this.
1480          */
1481         n = fmt->frac_size * 2 + 2;
1482         /* Don't compute a value overlapping the sticky bit */
1483         n = MIN(n, 62);
1484     }
1485
1486     for (i = 0; i < n; i++) {
1487         if (a1) {
1488             mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1489         } else if (a0 & 0xffffffffull) {
1490             mul64To128(a0, a0, &a0, &a1);
1491         } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1492             a0 >>= 32;
1493             a0 *= a0;
1494         } else {
1495             goto exact;
1496         }
1497
1498         if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1499             if (unlikely(a_exp == 0 && r == 0)) {
1500                 /*
1501                  * When a_exp == 0, we're computing the log2 of a value
1502                  * [1.0,2.0).  When the value is very close to 1.0, there
1503                  * are lots of 0's in the msb parts of the fraction.
1504                  * We need to compute more digits to produce a correct
1505                  * result -- restart at the top of the fraction.
1506                  * ??? This is likely to lose precision quickly, as for
1507                  * float128; we may need another method.
1508                  */
1509                 f_exp -= i;
1510                 t = r = DECOMPOSED_IMPLICIT_BIT;
1511                 i = 0;
1512             } else {
1513                 r |= t;
1514             }
1515         } else {
1516             add128(a0, a1, a0, a1, &a0, &a1);
1517         }
1518         t >>= 1;
1519     }
1520
1521     /* Set sticky for inexact. */
1522     r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1523
1524  exact:
1525     parts_sint_to_float(a, a_exp, 0, s);
1526     if (r == 0) {
1527         return;
1528     }
1529
1530     memset(&f, 0, sizeof(f));
1531     f.cls = float_class_normal;
1532     f.frac_hi = r;
1533     f.exp = f_exp - frac_normalize(&f);
1534
1535     if (a_exp < 0) {
1536         parts_sub_normal(a, &f);
1537     } else if (a_exp > 0) {
1538         parts_add_normal(a, &f);
1539     } else {
1540         *a = f;
1541     }
1542     return;
1543
1544  d_nan:
1545     float_raise(float_flag_invalid, s);
1546     parts_default_nan(a, s);
1547 }