fpu/softfloat-parts.c.inc

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
  19 {
  20     switch (a->cls) {
  21     case float_class_snan:
  22         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  23         if (s->default_nan_mode) {
  24             parts_default_nan(a, s);
  25         } else {
  26             parts_silence_nan(a, s);
  27         }
  28         break;
  29     case float_class_qnan:
  30         if (s->default_nan_mode) {
  31             parts_default_nan(a, s);
  32         }
  33         break;
  34     default:
  35         g_assert_not_reached();
  36     }
  37 }
  38
  39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
  40                                      float_status *s)
  41 {
  42     if (is_snan(a->cls) || is_snan(b->cls)) {
  43         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  44     }
  45
  46     if (s->default_nan_mode) {
  47         parts_default_nan(a, s);
  48     } else {
  49         int cmp = frac_cmp(a, b);
  50         if (cmp == 0) {
  51             cmp = a->sign < b->sign;
  52         }
  53
  54         if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
  55             a = b;
  56         }
  57         if (is_snan(a->cls)) {
  58             parts_silence_nan(a, s);
  59         }
  60     }
  61     return a;
  62 }
  63
  64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
  65                                             FloatPartsN *c, float_status *s,
  66                                             int ab_mask, int abc_mask)
  67 {
  68     int which;
  69
  70     if (unlikely(abc_mask & float_cmask_snan)) {
  71         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  72     }
  73
  74     which = pickNaNMulAdd(a->cls, b->cls, c->cls,
  75                           ab_mask == float_cmask_infzero, s);
  76
  77     if (s->default_nan_mode || which == 3) {
  78         /*
  79          * Note that this check is after pickNaNMulAdd so that function
  80          * has an opportunity to set the Invalid flag for infzero.
  81          */
  82         parts_default_nan(a, s);
  83         return a;
  84     }
  85
  86     switch (which) {
  87     case 0:
  88         break;
  89     case 1:
  90         a = b;
  91         break;
  92     case 2:
  93         a = c;
  94         break;
  95     default:
  96         g_assert_not_reached();
  97     }
  98     if (is_snan(a->cls)) {
  99         parts_silence_nan(a, s);
 100     }
 101     return a;
 102 }
 103
 104 /*
 105  * Canonicalize the FloatParts structure.  Determine the class,
 106  * unbias the exponent, and normalize the fraction.
 107  */
 108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
 109                                  const FloatFmt *fmt)
 110 {
 111     if (unlikely(p->exp == 0)) {
 112         if (likely(frac_eqz(p))) {
 113             p->cls = float_class_zero;
 114         } else if (status->flush_inputs_to_zero) {
 115             float_raise(float_flag_input_denormal, status);
 116             p->cls = float_class_zero;
 117             frac_clear(p);
 118         } else {
 119             int shift = frac_normalize(p);
 120             p->cls = float_class_normal;
 121             p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
 122         }
 123     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
 124         p->cls = float_class_normal;
 125         p->exp -= fmt->exp_bias;
 126         frac_shl(p, fmt->frac_shift);
 127         p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 128     } else if (likely(frac_eqz(p))) {
 129         p->cls = float_class_inf;
 130     } else {
 131         frac_shl(p, fmt->frac_shift);
 132         p->cls = (parts_is_snan_frac(p->frac_hi, status)
 133                   ? float_class_snan : float_class_qnan);
 134     }
 135 }
 136
 137 /*
 138  * Round and uncanonicalize a floating-point number by parts. There
 139  * are FRAC_SHIFT bits that may require rounding at the bottom of the
 140  * fraction; these bits will be removed. The exponent will be biased
 141  * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
 142  */
 143 static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
 144                                    const FloatFmt *fmt)
 145 {
 146     const int exp_max = fmt->exp_max;
 147     const int frac_shift = fmt->frac_shift;
 148     const uint64_t round_mask = fmt->round_mask;
 149     const uint64_t frac_lsb = round_mask + 1;
 150     const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
 151     const uint64_t roundeven_mask = round_mask | frac_lsb;
 152     uint64_t inc;
 153     bool overflow_norm = false;
 154     int exp, flags = 0;
 155
 156     switch (s->float_rounding_mode) {
 157     case float_round_nearest_even:
 158         if (N > 64 && frac_lsb == 0) {
 159             inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
 160                    ? frac_lsbm1 : 0);
 161         } else {
 162             inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 163                    ? frac_lsbm1 : 0);
 164         }
 165         break;
 166     case float_round_ties_away:
 167         inc = frac_lsbm1;
 168         break;
 169     case float_round_to_zero:
 170         overflow_norm = true;
 171         inc = 0;
 172         break;
 173     case float_round_up:
 174         inc = p->sign ? 0 : round_mask;
 175         overflow_norm = p->sign;
 176         break;
 177     case float_round_down:
 178         inc = p->sign ? round_mask : 0;
 179         overflow_norm = !p->sign;
 180         break;
 181     case float_round_to_odd:
 182         overflow_norm = true;
 183         /* fall through */
 184     case float_round_to_odd_inf:
 185         if (N > 64 && frac_lsb == 0) {
 186             inc = p->frac_hi & 1 ? 0 : round_mask;
 187         } else {
 188             inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 189         }
 190         break;
 191     default:
 192         g_assert_not_reached();
 193     }
 194
 195     exp = p->exp + fmt->exp_bias;
 196     if (likely(exp > 0)) {
 197         if (p->frac_lo & round_mask) {
 198             flags |= float_flag_inexact;
 199             if (frac_addi(p, p, inc)) {
 200                 frac_shr(p, 1);
 201                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 202                 exp++;
 203             }
 204             p->frac_lo &= ~round_mask;
 205         }
 206
 207         if (fmt->arm_althp) {
 208             /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
 209             if (unlikely(exp > exp_max)) {
 210                 /* Overflow.  Return the maximum normal.  */
 211                 flags = float_flag_invalid;
 212                 exp = exp_max;
 213                 frac_allones(p);
 214                 p->frac_lo &= ~round_mask;
 215             }
 216         } else if (unlikely(exp >= exp_max)) {
 217             flags |= float_flag_overflow | float_flag_inexact;
 218             if (overflow_norm) {
 219                 exp = exp_max - 1;
 220                 frac_allones(p);
 221                 p->frac_lo &= ~round_mask;
 222             } else {
 223                 p->cls = float_class_inf;
 224                 exp = exp_max;
 225                 frac_clear(p);
 226             }
 227         }
 228         frac_shr(p, frac_shift);
 229     } else if (s->flush_to_zero) {
 230         flags |= float_flag_output_denormal;
 231         p->cls = float_class_zero;
 232         exp = 0;
 233         frac_clear(p);
 234     } else {
 235         bool is_tiny = s->tininess_before_rounding || exp < 0;
 236
 237         if (!is_tiny) {
 238             FloatPartsN discard;
 239             is_tiny = !frac_addi(&discard, p, inc);
 240         }
 241
 242         frac_shrjam(p, 1 - exp);
 243
 244         if (p->frac_lo & round_mask) {
 245             /* Need to recompute round-to-even/round-to-odd. */
 246             switch (s->float_rounding_mode) {
 247             case float_round_nearest_even:
 248                 if (N > 64 && frac_lsb == 0) {
 249                     inc = ((p->frac_hi & 1) ||
 250                            (p->frac_lo & round_mask) != frac_lsbm1
 251                            ? frac_lsbm1 : 0);
 252                 } else {
 253                     inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 254                            ? frac_lsbm1 : 0);
 255                 }
 256                 break;
 257             case float_round_to_odd:
 258             case float_round_to_odd_inf:
 259                 if (N > 64 && frac_lsb == 0) {
 260                     inc = p->frac_hi & 1 ? 0 : round_mask;
 261                 } else {
 262                     inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 263                 }
 264                 break;
 265             default:
 266                 break;
 267             }
 268             flags |= float_flag_inexact;
 269             frac_addi(p, p, inc);
 270             p->frac_lo &= ~round_mask;
 271         }
 272
 273         exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
 274         frac_shr(p, frac_shift);
 275
 276         if (is_tiny && (flags & float_flag_inexact)) {
 277             flags |= float_flag_underflow;
 278         }
 279         if (exp == 0 && frac_eqz(p)) {
 280             p->cls = float_class_zero;
 281         }
 282     }
 283     p->exp = exp;
 284     float_raise(flags, s);
 285 }
 286
 287 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
 288                             const FloatFmt *fmt)
 289 {
 290     if (likely(p->cls == float_class_normal)) {
 291         parts_uncanon_normal(p, s, fmt);
 292     } else {
 293         switch (p->cls) {
 294         case float_class_zero:
 295             p->exp = 0;
 296             frac_clear(p);
 297             return;
 298         case float_class_inf:
 299             g_assert(!fmt->arm_althp);
 300             p->exp = fmt->exp_max;
 301             frac_clear(p);
 302             return;
 303         case float_class_qnan:
 304         case float_class_snan:
 305             g_assert(!fmt->arm_althp);
 306             p->exp = fmt->exp_max;
 307             frac_shr(p, fmt->frac_shift);
 308             return;
 309         default:
 310             break;
 311         }
 312         g_assert_not_reached();
 313     }
 314 }
 315
 316 /*
 317  * Returns the result of adding or subtracting the values of the
 318  * floating-point values `a' and `b'. The operation is performed
 319  * according to the IEC/IEEE Standard for Binary Floating-Point
 320  * Arithmetic.
 321  */
 322 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
 323                                    float_status *s, bool subtract)
 324 {
 325     bool b_sign = b->sign ^ subtract;
 326     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 327
 328     if (a->sign != b_sign) {
 329         /* Subtraction */
 330         if (likely(ab_mask == float_cmask_normal)) {
 331             if (parts_sub_normal(a, b)) {
 332                 return a;
 333             }
 334             /* Subtract was exact, fall through to set sign. */
 335             ab_mask = float_cmask_zero;
 336         }
 337
 338         if (ab_mask == float_cmask_zero) {
 339             a->sign = s->float_rounding_mode == float_round_down;
 340             return a;
 341         }
 342
 343         if (unlikely(ab_mask & float_cmask_anynan)) {
 344             goto p_nan;
 345         }
 346
 347         if (ab_mask & float_cmask_inf) {
 348             if (a->cls != float_class_inf) {
 349                 /* N - Inf */
 350                 goto return_b;
 351             }
 352             if (b->cls != float_class_inf) {
 353                 /* Inf - N */
 354                 return a;
 355             }
 356             /* Inf - Inf */
 357             float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 358             parts_default_nan(a, s);
 359             return a;
 360         }
 361     } else {
 362         /* Addition */
 363         if (likely(ab_mask == float_cmask_normal)) {
 364             parts_add_normal(a, b);
 365             return a;
 366         }
 367
 368         if (ab_mask == float_cmask_zero) {
 369             return a;
 370         }
 371
 372         if (unlikely(ab_mask & float_cmask_anynan)) {
 373             goto p_nan;
 374         }
 375
 376         if (ab_mask & float_cmask_inf) {
 377             a->cls = float_class_inf;
 378             return a;
 379         }
 380     }
 381
 382     if (b->cls == float_class_zero) {
 383         g_assert(a->cls == float_class_normal);
 384         return a;
 385     }
 386
 387     g_assert(a->cls == float_class_zero);
 388     g_assert(b->cls == float_class_normal);
 389  return_b:
 390     b->sign = b_sign;
 391     return b;
 392
 393  p_nan:
 394     return parts_pick_nan(a, b, s);
 395 }
 396
 397 /*
 398  * Returns the result of multiplying the floating-point values `a' and
 399  * `b'. The operation is performed according to the IEC/IEEE Standard
 400  * for Binary Floating-Point Arithmetic.
 401  */
 402 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
 403                                 float_status *s)
 404 {
 405     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 406     bool sign = a->sign ^ b->sign;
 407
 408     if (likely(ab_mask == float_cmask_normal)) {
 409         FloatPartsW tmp;
 410
 411         frac_mulw(&tmp, a, b);
 412         frac_truncjam(a, &tmp);
 413
 414         a->exp += b->exp + 1;
 415         if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 416             frac_add(a, a, a);
 417             a->exp -= 1;
 418         }
 419
 420         a->sign = sign;
 421         return a;
 422     }
 423
 424     /* Inf * Zero == NaN */
 425     if (unlikely(ab_mask == float_cmask_infzero)) {
 426         float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 427         parts_default_nan(a, s);
 428         return a;
 429     }
 430
 431     if (unlikely(ab_mask & float_cmask_anynan)) {
 432         return parts_pick_nan(a, b, s);
 433     }
 434
 435     /* Multiply by 0 or Inf */
 436     if (ab_mask & float_cmask_inf) {
 437         a->cls = float_class_inf;
 438         a->sign = sign;
 439         return a;
 440     }
 441
 442     g_assert(ab_mask & float_cmask_zero);
 443     a->cls = float_class_zero;
 444     a->sign = sign;
 445     return a;
 446 }
 447
 448 /*
 449  * Returns the result of multiplying the floating-point values `a' and
 450  * `b' then adding 'c', with no intermediate rounding step after the
 451  * multiplication. The operation is performed according to the
 452  * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
 453  * The flags argument allows the caller to select negation of the
 454  * addend, the intermediate product, or the final result. (The
 455  * difference between this and having the caller do a separate
 456  * negation is that negating externally will flip the sign bit on NaNs.)
 457  *
 458  * Requires A and C extracted into a double-sized structure to provide the
 459  * extra space for the widening multiply.
 460  */
 461 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
 462                                    FloatPartsN *c, int flags, float_status *s)
 463 {
 464     int ab_mask, abc_mask;
 465     FloatPartsW p_widen, c_widen;
 466
 467     ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 468     abc_mask = float_cmask(c->cls) | ab_mask;
 469
 470     /*
 471      * It is implementation-defined whether the cases of (0,inf,qnan)
 472      * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
 473      * they return if they do), so we have to hand this information
 474      * off to the target-specific pick-a-NaN routine.
 475      */
 476     if (unlikely(abc_mask & float_cmask_anynan)) {
 477         return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
 478     }
 479
 480     if (flags & float_muladd_negate_c) {
 481         c->sign ^= 1;
 482     }
 483
 484     /* Compute the sign of the product into A. */
 485     a->sign ^= b->sign;
 486     if (flags & float_muladd_negate_product) {
 487         a->sign ^= 1;
 488     }
 489
 490     if (unlikely(ab_mask != float_cmask_normal)) {
 491         if (unlikely(ab_mask == float_cmask_infzero)) {
 492             float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 493             goto d_nan;
 494         }
 495
 496         if (ab_mask & float_cmask_inf) {
 497             if (c->cls == float_class_inf && a->sign != c->sign) {
 498                 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 499                 goto d_nan;
 500             }
 501             goto return_inf;
 502         }
 503
 504         g_assert(ab_mask & float_cmask_zero);
 505         if (c->cls == float_class_normal) {
 506             *a = *c;
 507             goto return_normal;
 508         }
 509         if (c->cls == float_class_zero) {
 510             if (a->sign != c->sign) {
 511                 goto return_sub_zero;
 512             }
 513             goto return_zero;
 514         }
 515         g_assert(c->cls == float_class_inf);
 516     }
 517
 518     if (unlikely(c->cls == float_class_inf)) {
 519         a->sign = c->sign;
 520         goto return_inf;
 521     }
 522
 523     /* Perform the multiplication step. */
 524     p_widen.sign = a->sign;
 525     p_widen.exp = a->exp + b->exp + 1;
 526     frac_mulw(&p_widen, a, b);
 527     if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 528         frac_add(&p_widen, &p_widen, &p_widen);
 529         p_widen.exp -= 1;
 530     }
 531
 532     /* Perform the addition step. */
 533     if (c->cls != float_class_zero) {
 534         /* Zero-extend C to less significant bits. */
 535         frac_widen(&c_widen, c);
 536         c_widen.exp = c->exp;
 537
 538         if (a->sign == c->sign) {
 539             parts_add_normal(&p_widen, &c_widen);
 540         } else if (!parts_sub_normal(&p_widen, &c_widen)) {
 541             goto return_sub_zero;
 542         }
 543     }
 544
 545     /* Narrow with sticky bit, for proper rounding later. */
 546     frac_truncjam(a, &p_widen);
 547     a->sign = p_widen.sign;
 548     a->exp = p_widen.exp;
 549
 550  return_normal:
 551     if (flags & float_muladd_halve_result) {
 552         a->exp -= 1;
 553     }
 554  finish_sign:
 555     if (flags & float_muladd_negate_result) {
 556         a->sign ^= 1;
 557     }
 558     return a;
 559
 560  return_sub_zero:
 561     a->sign = s->float_rounding_mode == float_round_down;
 562  return_zero:
 563     a->cls = float_class_zero;
 564     goto finish_sign;
 565
 566  return_inf:
 567     a->cls = float_class_inf;
 568     goto finish_sign;
 569
 570  d_nan:
 571     parts_default_nan(a, s);
 572     return a;
 573 }
 574
 575 /*
 576  * Returns the result of dividing the floating-point value `a' by the
 577  * corresponding value `b'. The operation is performed according to
 578  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 579  */
 580 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
 581                                 float_status *s)
 582 {
 583     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 584     bool sign = a->sign ^ b->sign;
 585
 586     if (likely(ab_mask == float_cmask_normal)) {
 587         a->sign = sign;
 588         a->exp -= b->exp + frac_div(a, b);
 589         return a;
 590     }
 591
 592     /* 0/0 or Inf/Inf => NaN */
 593     if (unlikely(ab_mask == float_cmask_zero)) {
 594         float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
 595         goto d_nan;
 596     }
 597     if (unlikely(ab_mask == float_cmask_inf)) {
 598         float_raise(float_flag_invalid | float_flag_invalid_idi, s);
 599         goto d_nan;
 600     }
 601
 602     /* All the NaN cases */
 603     if (unlikely(ab_mask & float_cmask_anynan)) {
 604         return parts_pick_nan(a, b, s);
 605     }
 606
 607     a->sign = sign;
 608
 609     /* Inf / X */
 610     if (a->cls == float_class_inf) {
 611         return a;
 612     }
 613
 614     /* 0 / X */
 615     if (a->cls == float_class_zero) {
 616         return a;
 617     }
 618
 619     /* X / Inf */
 620     if (b->cls == float_class_inf) {
 621         a->cls = float_class_zero;
 622         return a;
 623     }
 624
 625     /* X / 0 => Inf */
 626     g_assert(b->cls == float_class_zero);
 627     float_raise(float_flag_divbyzero, s);
 628     a->cls = float_class_inf;
 629     return a;
 630
 631  d_nan:
 632     parts_default_nan(a, s);
 633     return a;
 634 }
 635
 636 /*
 637  * Floating point remainder, per IEC/IEEE, or modulus.
 638  */
 639 static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
 640                                    uint64_t *mod_quot, float_status *s)
 641 {
 642     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 643
 644     if (likely(ab_mask == float_cmask_normal)) {
 645         frac_modrem(a, b, mod_quot);
 646         return a;
 647     }
 648
 649     if (mod_quot) {
 650         *mod_quot = 0;
 651     }
 652
 653     /* All the NaN cases */
 654     if (unlikely(ab_mask & float_cmask_anynan)) {
 655         return parts_pick_nan(a, b, s);
 656     }
 657
 658     /* Inf % N; N % 0 */
 659     if (a->cls == float_class_inf || b->cls == float_class_zero) {
 660         float_raise(float_flag_invalid, s);
 661         parts_default_nan(a, s);
 662         return a;
 663     }
 664
 665     /* N % Inf; 0 % N */
 666     g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
 667     return a;
 668 }
 669
 670 /*
 671  * Square Root
 672  *
 673  * The base algorithm is lifted from
 674  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
 675  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
 676  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
 677  * and is thus MIT licenced.
 678  */
 679 static void partsN(sqrt)(FloatPartsN *a, float_status *status,
 680                          const FloatFmt *fmt)
 681 {
 682     const uint32_t three32 = 3u << 30;
 683     const uint64_t three64 = 3ull << 62;
 684     uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
 685     uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
 686     uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
 687     uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
 688     uint64_t discard;
 689     bool exp_odd;
 690     size_t index;
 691
 692     if (unlikely(a->cls != float_class_normal)) {
 693         switch (a->cls) {
 694         case float_class_snan:
 695         case float_class_qnan:
 696             parts_return_nan(a, status);
 697             return;
 698         case float_class_zero:
 699             return;
 700         case float_class_inf:
 701             if (unlikely(a->sign)) {
 702                 goto d_nan;
 703             }
 704             return;
 705         default:
 706             g_assert_not_reached();
 707         }
 708     }
 709
 710     if (unlikely(a->sign)) {
 711         goto d_nan;
 712     }
 713
 714     /*
 715      * Argument reduction.
 716      * x = 4^e frac; with integer e, and frac in [1, 4)
 717      * m = frac fixed point at bit 62, since we're in base 4.
 718      * If base-2 exponent is odd, exchange that for multiply by 2,
 719      * which results in no shift.
 720      */
 721     exp_odd = a->exp & 1;
 722     index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
 723     if (!exp_odd) {
 724         frac_shr(a, 1);
 725     }
 726
 727     /*
 728      * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
 729      *
 730      * Initial estimate:
 731      * 7-bit lookup table (1-bit exponent and 6-bit significand).
 732      *
 733      * The relative error (e = r0*sqrt(m)-1) of a linear estimate
 734      * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
 735      * a table lookup is faster and needs one less iteration.
 736      * The 7-bit table gives |e| < 0x1.fdp-9.
 737      *
 738      * A Newton-Raphson iteration for r is
 739      *   s = m*r
 740      *   d = s*r
 741      *   u = 3 - d
 742      *   r = r*u/2
 743      *
 744      * Fixed point representations:
 745      *   m, s, d, u, three are all 2.30; r is 0.32
 746      */
 747     m64 = a->frac_hi;
 748     m32 = m64 >> 32;
 749
 750     r32 = rsqrt_tab[index] << 16;
 751     /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
 752
 753     s32 = ((uint64_t)m32 * r32) >> 32;
 754     d32 = ((uint64_t)s32 * r32) >> 32;
 755     u32 = three32 - d32;
 756
 757     if (N == 64) {
 758         /* float64 or smaller */
 759
 760         r32 = ((uint64_t)r32 * u32) >> 31;
 761         /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
 762
 763         s32 = ((uint64_t)m32 * r32) >> 32;
 764         d32 = ((uint64_t)s32 * r32) >> 32;
 765         u32 = three32 - d32;
 766
 767         if (fmt->frac_size <= 23) {
 768             /* float32 or smaller */
 769
 770             s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
 771             s32 = (s32 - 1) >> 6;               /* 9.23 */
 772             /* s < sqrt(m) < s + 0x1.08p-23 */
 773
 774             /* compute nearest rounded result to 2.23 bits */
 775             uint32_t d0 = (m32 << 16) - s32 * s32;
 776             uint32_t d1 = s32 - d0;
 777             uint32_t d2 = d1 + s32 + 1;
 778             s32 += d1 >> 31;
 779             a->frac_hi = (uint64_t)s32 << (64 - 25);
 780
 781             /* increment or decrement for inexact */
 782             if (d2 != 0) {
 783                 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
 784             }
 785             goto done;
 786         }
 787
 788         /* float64 */
 789
 790         r64 = (uint64_t)r32 * u32 * 2;
 791         /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
 792         mul64To128(m64, r64, &s64, &discard);
 793         mul64To128(s64, r64, &d64, &discard);
 794         u64 = three64 - d64;
 795
 796         mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
 797         s64 = (s64 - 2) >> 9;                  /* 12.52 */
 798
 799         /* Compute nearest rounded result */
 800         uint64_t d0 = (m64 << 42) - s64 * s64;
 801         uint64_t d1 = s64 - d0;
 802         uint64_t d2 = d1 + s64 + 1;
 803         s64 += d1 >> 63;
 804         a->frac_hi = s64 << (64 - 54);
 805
 806         /* increment or decrement for inexact */
 807         if (d2 != 0) {
 808             a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
 809         }
 810         goto done;
 811     }
 812
 813     r64 = (uint64_t)r32 * u32 * 2;
 814     /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
 815
 816     mul64To128(m64, r64, &s64, &discard);
 817     mul64To128(s64, r64, &d64, &discard);
 818     u64 = three64 - d64;
 819     mul64To128(u64, r64, &r64, &discard);
 820     r64 <<= 1;
 821     /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
 822
 823     mul64To128(m64, r64, &s64, &discard);
 824     mul64To128(s64, r64, &d64, &discard);
 825     u64 = three64 - d64;
 826     mul64To128(u64, r64, &rh, &rl);
 827     add128(rh, rl, rh, rl, &rh, &rl);
 828     /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
 829
 830     mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
 831     mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
 832     sub128(three64, 0, dh, dl, &uh, &ul);
 833     mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
 834     /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
 835
 836     sub128(sh, sl, 0, 4, &sh, &sl);
 837     shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
 838     /* s < sqrt(m) < s + 1ulp */
 839
 840     /* Compute nearest rounded result */
 841     mul64To128(sl, sl, &d0h, &d0l);
 842     d0h += 2 * sh * sl;
 843     sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
 844     sub128(sh, sl, d0h, d0l, &d1h, &d1l);
 845     add128(sh, sl, 0, 1, &d2h, &d2l);
 846     add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
 847     add128(sh, sl, 0, d1h >> 63, &sh, &sl);
 848     shift128Left(sh, sl, 128 - 114, &sh, &sl);
 849
 850     /* increment or decrement for inexact */
 851     if (d2h | d2l) {
 852         if ((int64_t)(d1h ^ d2h) < 0) {
 853             sub128(sh, sl, 0, 1, &sh, &sl);
 854         } else {
 855             add128(sh, sl, 0, 1, &sh, &sl);
 856         }
 857     }
 858     a->frac_lo = sl;
 859     a->frac_hi = sh;
 860
 861  done:
 862     /* Convert back from base 4 to base 2. */
 863     a->exp >>= 1;
 864     if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 865         frac_add(a, a, a);
 866     } else {
 867         a->exp += 1;
 868     }
 869     return;
 870
 871  d_nan:
 872     float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
 873     parts_default_nan(a, status);
 874 }
 875
 876 /*
 877  * Rounds the floating-point value `a' to an integer, and returns the
 878  * result as a floating-point value. The operation is performed
 879  * according to the IEC/IEEE Standard for Binary Floating-Point
 880  * Arithmetic.
 881  *
 882  * parts_round_to_int_normal is an internal helper function for
 883  * normal numbers only, returning true for inexact but not directly
 884  * raising float_flag_inexact.
 885  */
 886 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
 887                                         int scale, int frac_size)
 888 {
 889     uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
 890     int shift_adj;
 891
 892     scale = MIN(MAX(scale, -0x10000), 0x10000);
 893     a->exp += scale;
 894
 895     if (a->exp < 0) {
 896         bool one;
 897
 898         /* All fractional */
 899         switch (rmode) {
 900         case float_round_nearest_even:
 901             one = false;
 902             if (a->exp == -1) {
 903                 FloatPartsN tmp;
 904                 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
 905                 frac_add(&tmp, a, a);
 906                 /* Anything remaining means frac > 0.5. */
 907                 one = !frac_eqz(&tmp);
 908             }
 909             break;
 910         case float_round_ties_away:
 911             one = a->exp == -1;
 912             break;
 913         case float_round_to_zero:
 914             one = false;
 915             break;
 916         case float_round_up:
 917             one = !a->sign;
 918             break;
 919         case float_round_down:
 920             one = a->sign;
 921             break;
 922         case float_round_to_odd:
 923             one = true;
 924             break;
 925         default:
 926             g_assert_not_reached();
 927         }
 928
 929         frac_clear(a);
 930         a->exp = 0;
 931         if (one) {
 932             a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
 933         } else {
 934             a->cls = float_class_zero;
 935         }
 936         return true;
 937     }
 938
 939     if (a->exp >= frac_size) {
 940         /* All integral */
 941         return false;
 942     }
 943
 944     if (N > 64 && a->exp < N - 64) {
 945         /*
 946          * Rounding is not in the low word -- shift lsb to bit 2,
 947          * which leaves room for sticky and rounding bit.
 948          */
 949         shift_adj = (N - 1) - (a->exp + 2);
 950         frac_shrjam(a, shift_adj);
 951         frac_lsb = 1 << 2;
 952     } else {
 953         shift_adj = 0;
 954         frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
 955     }
 956
 957     frac_lsbm1 = frac_lsb >> 1;
 958     rnd_mask = frac_lsb - 1;
 959     rnd_even_mask = rnd_mask | frac_lsb;
 960
 961     if (!(a->frac_lo & rnd_mask)) {
 962         /* Fractional bits already clear, undo the shift above. */
 963         frac_shl(a, shift_adj);
 964         return false;
 965     }
 966
 967     switch (rmode) {
 968     case float_round_nearest_even:
 969         inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
 970         break;
 971     case float_round_ties_away:
 972         inc = frac_lsbm1;
 973         break;
 974     case float_round_to_zero:
 975         inc = 0;
 976         break;
 977     case float_round_up:
 978         inc = a->sign ? 0 : rnd_mask;
 979         break;
 980     case float_round_down:
 981         inc = a->sign ? rnd_mask : 0;
 982         break;
 983     case float_round_to_odd:
 984         inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
 985         break;
 986     default:
 987         g_assert_not_reached();
 988     }
 989
 990     if (shift_adj == 0) {
 991         if (frac_addi(a, a, inc)) {
 992             frac_shr(a, 1);
 993             a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 994             a->exp++;
 995         }
 996         a->frac_lo &= ~rnd_mask;
 997     } else {
 998         frac_addi(a, a, inc);
 999         a->frac_lo &= ~rnd_mask;
1000         /* Be careful shifting back, not to overflow */
1001         frac_shl(a, shift_adj - 1);
1002         if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1003             a->exp++;
1004         } else {
1005             frac_add(a, a, a);
1006         }
1007     }
1008     return true;
1009 }
1010
1011 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1012                                  int scale, float_status *s,
1013                                  const FloatFmt *fmt)
1014 {
1015     switch (a->cls) {
1016     case float_class_qnan:
1017     case float_class_snan:
1018         parts_return_nan(a, s);
1019         break;
1020     case float_class_zero:
1021     case float_class_inf:
1022         break;
1023     case float_class_normal:
1024         if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1025             float_raise(float_flag_inexact, s);
1026         }
1027         break;
1028     default:
1029         g_assert_not_reached();
1030     }
1031 }
1032
1033 /*
1034  * Returns the result of converting the floating-point value `a' to
1035  * the two's complement integer format. The conversion is performed
1036  * according to the IEC/IEEE Standard for Binary Floating-Point
1037  * Arithmetic---which means in particular that the conversion is
1038  * rounded according to the current rounding mode. If `a' is a NaN,
1039  * the largest positive integer is returned. Otherwise, if the
1040  * conversion overflows, the largest integer with the same sign as `a'
1041  * is returned.
1042  */
1043 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1044                                      int scale, int64_t min, int64_t max,
1045                                      float_status *s)
1046 {
1047     int flags = 0;
1048     uint64_t r;
1049
1050     switch (p->cls) {
1051     case float_class_snan:
1052         flags |= float_flag_invalid_snan;
1053         /* fall through */
1054     case float_class_qnan:
1055         flags |= float_flag_invalid;
1056         r = max;
1057         break;
1058
1059     case float_class_inf:
1060         flags = float_flag_invalid | float_flag_invalid_cvti;
1061         r = p->sign ? min : max;
1062         break;
1063
1064     case float_class_zero:
1065         return 0;
1066
1067     case float_class_normal:
1068         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1069         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1070             flags = float_flag_inexact;
1071         }
1072
1073         if (p->exp <= DECOMPOSED_BINARY_POINT) {
1074             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1075         } else {
1076             r = UINT64_MAX;
1077         }
1078         if (p->sign) {
1079             if (r <= -(uint64_t)min) {
1080                 r = -r;
1081             } else {
1082                 flags = float_flag_invalid | float_flag_invalid_cvti;
1083                 r = min;
1084             }
1085         } else if (r > max) {
1086             flags = float_flag_invalid | float_flag_invalid_cvti;
1087             r = max;
1088         }
1089         break;
1090
1091     default:
1092         g_assert_not_reached();
1093     }
1094
1095     float_raise(flags, s);
1096     return r;
1097 }
1098
1099 /*
1100  *  Returns the result of converting the floating-point value `a' to
1101  *  the unsigned integer format. The conversion is performed according
1102  *  to the IEC/IEEE Standard for Binary Floating-Point
1103  *  Arithmetic---which means in particular that the conversion is
1104  *  rounded according to the current rounding mode. If `a' is a NaN,
1105  *  the largest unsigned integer is returned. Otherwise, if the
1106  *  conversion overflows, the largest unsigned integer is returned. If
1107  *  the 'a' is negative, the result is rounded and zero is returned;
1108  *  values that do not round to zero will raise the inexact exception
1109  *  flag.
1110  */
1111 static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1112                                       int scale, uint64_t max, float_status *s)
1113 {
1114     int flags = 0;
1115     uint64_t r;
1116
1117     switch (p->cls) {
1118     case float_class_snan:
1119         flags |= float_flag_invalid_snan;
1120         /* fall through */
1121     case float_class_qnan:
1122         flags |= float_flag_invalid;
1123         r = max;
1124         break;
1125
1126     case float_class_inf:
1127         flags = float_flag_invalid | float_flag_invalid_cvti;
1128         r = p->sign ? 0 : max;
1129         break;
1130
1131     case float_class_zero:
1132         return 0;
1133
1134     case float_class_normal:
1135         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1136         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1137             flags = float_flag_inexact;
1138             if (p->cls == float_class_zero) {
1139                 r = 0;
1140                 break;
1141             }
1142         }
1143
1144         if (p->sign) {
1145             flags = float_flag_invalid | float_flag_invalid_cvti;
1146             r = 0;
1147         } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1148             flags = float_flag_invalid | float_flag_invalid_cvti;
1149             r = max;
1150         } else {
1151             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1152             if (r > max) {
1153                 flags = float_flag_invalid | float_flag_invalid_cvti;
1154                 r = max;
1155             }
1156         }
1157         break;
1158
1159     default:
1160         g_assert_not_reached();
1161     }
1162
1163     float_raise(flags, s);
1164     return r;
1165 }
1166
1167 /*
1168  * Integer to float conversions
1169  *
1170  * Returns the result of converting the two's complement integer `a'
1171  * to the floating-point format. The conversion is performed according
1172  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1173  */
1174 static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1175                                   int scale, float_status *s)
1176 {
1177     uint64_t f = a;
1178     int shift;
1179
1180     memset(p, 0, sizeof(*p));
1181
1182     if (a == 0) {
1183         p->cls = float_class_zero;
1184         return;
1185     }
1186
1187     p->cls = float_class_normal;
1188     if (a < 0) {
1189         f = -f;
1190         p->sign = true;
1191     }
1192     shift = clz64(f);
1193     scale = MIN(MAX(scale, -0x10000), 0x10000);
1194
1195     p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1196     p->frac_hi = f << shift;
1197 }
1198
1199 /*
1200  * Unsigned Integer to float conversions
1201  *
1202  * Returns the result of converting the unsigned integer `a' to the
1203  * floating-point format. The conversion is performed according to the
1204  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1205  */
1206 static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1207                                   int scale, float_status *status)
1208 {
1209     memset(p, 0, sizeof(*p));
1210
1211     if (a == 0) {
1212         p->cls = float_class_zero;
1213     } else {
1214         int shift = clz64(a);
1215         scale = MIN(MAX(scale, -0x10000), 0x10000);
1216         p->cls = float_class_normal;
1217         p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1218         p->frac_hi = a << shift;
1219     }
1220 }
1221
1222 /*
1223  * Float min/max.
1224  */
1225 static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1226                                    float_status *s, int flags)
1227 {
1228     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1229     int a_exp, b_exp, cmp;
1230
1231     if (unlikely(ab_mask & float_cmask_anynan)) {
1232         /*
1233          * For minNum/maxNum (IEEE 754-2008)
1234          * or minimumNumber/maximumNumber (IEEE 754-2019),
1235          * if one operand is a QNaN, and the other
1236          * operand is numerical, then return numerical argument.
1237          */
1238         if ((flags & (minmax_isnum | minmax_isnumber))
1239             && !(ab_mask & float_cmask_snan)
1240             && (ab_mask & ~float_cmask_qnan)) {
1241             return is_nan(a->cls) ? b : a;
1242         }
1243
1244         /*
1245          * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1246          * are removed and replaced with minimum, minimumNumber, maximum
1247          * and maximumNumber.
1248          * minimumNumber/maximumNumber behavior for SNaN is changed to:
1249          *   If both operands are NaNs, a QNaN is returned.
1250          *   If either operand is a SNaN,
1251          *   an invalid operation exception is signaled,
1252          *   but unless both operands are NaNs,
1253          *   the SNaN is otherwise ignored and not converted to a QNaN.
1254          */
1255         if ((flags & minmax_isnumber)
1256             && (ab_mask & float_cmask_snan)
1257             && (ab_mask & ~float_cmask_anynan)) {
1258             float_raise(float_flag_invalid, s);
1259             return is_nan(a->cls) ? b : a;
1260         }
1261
1262         return parts_pick_nan(a, b, s);
1263     }
1264
1265     a_exp = a->exp;
1266     b_exp = b->exp;
1267
1268     if (unlikely(ab_mask != float_cmask_normal)) {
1269         switch (a->cls) {
1270         case float_class_normal:
1271             break;
1272         case float_class_inf:
1273             a_exp = INT16_MAX;
1274             break;
1275         case float_class_zero:
1276             a_exp = INT16_MIN;
1277             break;
1278         default:
1279             g_assert_not_reached();
1280             break;
1281         }
1282         switch (b->cls) {
1283         case float_class_normal:
1284             break;
1285         case float_class_inf:
1286             b_exp = INT16_MAX;
1287             break;
1288         case float_class_zero:
1289             b_exp = INT16_MIN;
1290             break;
1291         default:
1292             g_assert_not_reached();
1293             break;
1294         }
1295     }
1296
1297     /* Compare magnitudes. */
1298     cmp = a_exp - b_exp;
1299     if (cmp == 0) {
1300         cmp = frac_cmp(a, b);
1301     }
1302
1303     /*
1304      * Take the sign into account.
1305      * For ismag, only do this if the magnitudes are equal.
1306      */
1307     if (!(flags & minmax_ismag) || cmp == 0) {
1308         if (a->sign != b->sign) {
1309             /* For differing signs, the negative operand is less. */
1310             cmp = a->sign ? -1 : 1;
1311         } else if (a->sign) {
1312             /* For two negative operands, invert the magnitude comparison. */
1313             cmp = -cmp;
1314         }
1315     }
1316
1317     if (flags & minmax_ismin) {
1318         cmp = -cmp;
1319     }
1320     return cmp < 0 ? b : a;
1321 }
1322
1323 /*
1324  * Floating point compare
1325  */
1326 static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1327                                      float_status *s, bool is_quiet)
1328 {
1329     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1330
1331     if (likely(ab_mask == float_cmask_normal)) {
1332         FloatRelation cmp;
1333
1334         if (a->sign != b->sign) {
1335             goto a_sign;
1336         }
1337         if (a->exp == b->exp) {
1338             cmp = frac_cmp(a, b);
1339         } else if (a->exp < b->exp) {
1340             cmp = float_relation_less;
1341         } else {
1342             cmp = float_relation_greater;
1343         }
1344         if (a->sign) {
1345             cmp = -cmp;
1346         }
1347         return cmp;
1348     }
1349
1350     if (unlikely(ab_mask & float_cmask_anynan)) {
1351         if (ab_mask & float_cmask_snan) {
1352             float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1353         } else if (!is_quiet) {
1354             float_raise(float_flag_invalid, s);
1355         }
1356         return float_relation_unordered;
1357     }
1358
1359     if (ab_mask & float_cmask_zero) {
1360         if (ab_mask == float_cmask_zero) {
1361             return float_relation_equal;
1362         } else if (a->cls == float_class_zero) {
1363             goto b_sign;
1364         } else {
1365             goto a_sign;
1366         }
1367     }
1368
1369     if (ab_mask == float_cmask_inf) {
1370         if (a->sign == b->sign) {
1371             return float_relation_equal;
1372         }
1373     } else if (b->cls == float_class_inf) {
1374         goto b_sign;
1375     } else {
1376         g_assert(a->cls == float_class_inf);
1377     }
1378
1379  a_sign:
1380     return a->sign ? float_relation_less : float_relation_greater;
1381  b_sign:
1382     return b->sign ? float_relation_greater : float_relation_less;
1383 }
1384
1385 /*
1386  * Multiply A by 2 raised to the power N.
1387  */
1388 static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1389 {
1390     switch (a->cls) {
1391     case float_class_snan:
1392     case float_class_qnan:
1393         parts_return_nan(a, s);
1394         break;
1395     case float_class_zero:
1396     case float_class_inf:
1397         break;
1398     case float_class_normal:
1399         a->exp += MIN(MAX(n, -0x10000), 0x10000);
1400         break;
1401     default:
1402         g_assert_not_reached();
1403     }
1404 }
1405
1406 /*
1407  * Return log2(A)
1408  */
1409 static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1410 {
1411     uint64_t a0, a1, r, t, ign;
1412     FloatPartsN f;
1413     int i, n, a_exp, f_exp;
1414
1415     if (unlikely(a->cls != float_class_normal)) {
1416         switch (a->cls) {
1417         case float_class_snan:
1418         case float_class_qnan:
1419             parts_return_nan(a, s);
1420             return;
1421         case float_class_zero:
1422             /* log2(0) = -inf */
1423             a->cls = float_class_inf;
1424             a->sign = 1;
1425             return;
1426         case float_class_inf:
1427             if (unlikely(a->sign)) {
1428                 goto d_nan;
1429             }
1430             return;
1431         default:
1432             break;
1433         }
1434         g_assert_not_reached();
1435     }
1436     if (unlikely(a->sign)) {
1437         goto d_nan;
1438     }
1439
1440     /* TODO: This algorithm looses bits too quickly for float128. */
1441     g_assert(N == 64);
1442
1443     a_exp = a->exp;
1444     f_exp = -1;
1445
1446     r = 0;
1447     t = DECOMPOSED_IMPLICIT_BIT;
1448     a0 = a->frac_hi;
1449     a1 = 0;
1450
1451     n = fmt->frac_size + 2;
1452     if (unlikely(a_exp == -1)) {
1453         /*
1454          * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1455          * When the value is very close to 1.0, there are lots of 1's in
1456          * the msb parts of the fraction.  At the end, when we subtract
1457          * this value from -1.0, we can see a catastrophic loss of precision,
1458          * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1459          * bits of y in the final result.  To minimize this, compute as many
1460          * digits as we can.
1461          * ??? This case needs another algorithm to avoid this.
1462          */
1463         n = fmt->frac_size * 2 + 2;
1464         /* Don't compute a value overlapping the sticky bit */
1465         n = MIN(n, 62);
1466     }
1467
1468     for (i = 0; i < n; i++) {
1469         if (a1) {
1470             mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1471         } else if (a0 & 0xffffffffull) {
1472             mul64To128(a0, a0, &a0, &a1);
1473         } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1474             a0 >>= 32;
1475             a0 *= a0;
1476         } else {
1477             goto exact;
1478         }
1479
1480         if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1481             if (unlikely(a_exp == 0 && r == 0)) {
1482                 /*
1483                  * When a_exp == 0, we're computing the log2 of a value
1484                  * [1.0,2.0).  When the value is very close to 1.0, there
1485                  * are lots of 0's in the msb parts of the fraction.
1486                  * We need to compute more digits to produce a correct
1487                  * result -- restart at the top of the fraction.
1488                  * ??? This is likely to lose precision quickly, as for
1489                  * float128; we may need another method.
1490                  */
1491                 f_exp -= i;
1492                 t = r = DECOMPOSED_IMPLICIT_BIT;
1493                 i = 0;
1494             } else {
1495                 r |= t;
1496             }
1497         } else {
1498             add128(a0, a1, a0, a1, &a0, &a1);
1499         }
1500         t >>= 1;
1501     }
1502
1503     /* Set sticky for inexact. */
1504     r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1505
1506  exact:
1507     parts_sint_to_float(a, a_exp, 0, s);
1508     if (r == 0) {
1509         return;
1510     }
1511
1512     memset(&f, 0, sizeof(f));
1513     f.cls = float_class_normal;
1514     f.frac_hi = r;
1515     f.exp = f_exp - frac_normalize(&f);
1516
1517     if (a_exp < 0) {
1518         parts_sub_normal(a, &f);
1519     } else if (a_exp > 0) {
1520         parts_add_normal(a, &f);
1521     } else {
1522         *a = f;
1523     }
1524     return;
1525
1526  d_nan:
1527     float_raise(float_flag_invalid, s);
1528     parts_default_nan(a, s);
1529 }