fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439     /*
 440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441      * operations.
 442      */
 443     minmax_isnumber = 8,
 444 };
 445
 446 /* Simple helpers for checking if, or what kind of, NaN we have */
 447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448 {
 449     return unlikely(c >= float_class_qnan);
 450 }
 451
 452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453 {
 454     return c == float_class_snan;
 455 }
 456
 457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458 {
 459     return c == float_class_qnan;
 460 }
 461
 462 /*
 463  * Structure holding all of the decomposed parts of a float.
 464  * The exponent is unbiased and the fraction is normalized.
 465  *
 466  * The fraction words are stored in big-endian word ordering,
 467  * so that truncation from a larger format to a smaller format
 468  * can be done simply by ignoring subsequent elements.
 469  */
 470
 471 typedef struct {
 472     FloatClass cls;
 473     bool sign;
 474     int32_t exp;
 475     union {
 476         /* Routines that know the structure may reference the singular name. */
 477         uint64_t frac;
 478         /*
 479          * Routines expanded with multiple structures reference "hi" and "lo"
 480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481          * both the same word and aliased here.
 482          */
 483         uint64_t frac_hi;
 484         uint64_t frac_lo;
 485     };
 486 } FloatParts64;
 487
 488 typedef struct {
 489     FloatClass cls;
 490     bool sign;
 491     int32_t exp;
 492     uint64_t frac_hi;
 493     uint64_t frac_lo;
 494 } FloatParts128;
 495
 496 typedef struct {
 497     FloatClass cls;
 498     bool sign;
 499     int32_t exp;
 500     uint64_t frac_hi;
 501     uint64_t frac_hm;  /* high-middle */
 502     uint64_t frac_lm;  /* low-middle */
 503     uint64_t frac_lo;
 504 } FloatParts256;
 505
 506 /* These apply to the most significant word of each FloatPartsN. */
 507 #define DECOMPOSED_BINARY_POINT    63
 508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510 /* Structure holding all of the relevant parameters for a format.
 511  *   exp_size: the size of the exponent field
 512  *   exp_bias: the offset applied to the exponent field
 513  *   exp_max: the maximum normalised exponent
 514  *   frac_size: the size of the fraction field
 515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516  * The following are computed based the size of fraction
 517  *   round_mask: bits below lsb which must be rounded
 518  * The following optional modifiers are available:
 519  *   arm_althp: handle ARM Alternative Half Precision
 520  */
 521 typedef struct {
 522     int exp_size;
 523     int exp_bias;
 524     int exp_max;
 525     int frac_size;
 526     int frac_shift;
 527     bool arm_althp;
 528     uint64_t round_mask;
 529 } FloatFmt;
 530
 531 /* Expand fields based on the size of exponent and fraction */
 532 #define FLOAT_PARAMS_(E)                                \
 533     .exp_size       = E,                                \
 534     .exp_bias       = ((1 << E) - 1) >> 1,              \
 535     .exp_max        = (1 << E) - 1
 536
 537 #define FLOAT_PARAMS(E, F)                              \
 538     FLOAT_PARAMS_(E),                                   \
 539     .frac_size      = F,                                \
 540     .frac_shift     = (-F - 1) & 63,                    \
 541     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 542
 543 static const FloatFmt float16_params = {
 544     FLOAT_PARAMS(5, 10)
 545 };
 546
 547 static const FloatFmt float16_params_ahp = {
 548     FLOAT_PARAMS(5, 10),
 549     .arm_althp = true
 550 };
 551
 552 static const FloatFmt bfloat16_params = {
 553     FLOAT_PARAMS(8, 7)
 554 };
 555
 556 static const FloatFmt float32_params = {
 557     FLOAT_PARAMS(8, 23)
 558 };
 559
 560 static const FloatFmt float64_params = {
 561     FLOAT_PARAMS(11, 52)
 562 };
 563
 564 static const FloatFmt float128_params = {
 565     FLOAT_PARAMS(15, 112)
 566 };
 567
 568 #define FLOATX80_PARAMS(R)              \
 569     FLOAT_PARAMS_(15),                  \
 570     .frac_size = R == 64 ? 63 : R,      \
 571     .frac_shift = 0,                    \
 572     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 573
 574 static const FloatFmt floatx80_params[3] = {
 575     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 576     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 577     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 578 };
 579
 580 /* Unpack a float to parts, but do not canonicalize.  */
 581 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 582 {
 583     const int f_size = fmt->frac_size;
 584     const int e_size = fmt->exp_size;
 585
 586     *r = (FloatParts64) {
 587         .cls = float_class_unclassified,
 588         .sign = extract64(raw, f_size + e_size, 1),
 589         .exp = extract64(raw, f_size, e_size),
 590         .frac = extract64(raw, 0, f_size)
 591     };
 592 }
 593
 594 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 595 {
 596     unpack_raw64(p, &float16_params, f);
 597 }
 598
 599 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 600 {
 601     unpack_raw64(p, &bfloat16_params, f);
 602 }
 603
 604 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 605 {
 606     unpack_raw64(p, &float32_params, f);
 607 }
 608
 609 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 610 {
 611     unpack_raw64(p, &float64_params, f);
 612 }
 613
 614 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 615 {
 616     *p = (FloatParts128) {
 617         .cls = float_class_unclassified,
 618         .sign = extract32(f.high, 15, 1),
 619         .exp = extract32(f.high, 0, 15),
 620         .frac_hi = f.low
 621     };
 622 }
 623
 624 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 625 {
 626     const int f_size = float128_params.frac_size - 64;
 627     const int e_size = float128_params.exp_size;
 628
 629     *p = (FloatParts128) {
 630         .cls = float_class_unclassified,
 631         .sign = extract64(f.high, f_size + e_size, 1),
 632         .exp = extract64(f.high, f_size, e_size),
 633         .frac_hi = extract64(f.high, 0, f_size),
 634         .frac_lo = f.low,
 635     };
 636 }
 637
 638 /* Pack a float from parts, but do not canonicalize.  */
 639 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 640 {
 641     const int f_size = fmt->frac_size;
 642     const int e_size = fmt->exp_size;
 643     uint64_t ret;
 644
 645     ret = (uint64_t)p->sign << (f_size + e_size);
 646     ret = deposit64(ret, f_size, e_size, p->exp);
 647     ret = deposit64(ret, 0, f_size, p->frac);
 648     return ret;
 649 }
 650
 651 static inline float16 float16_pack_raw(const FloatParts64 *p)
 652 {
 653     return make_float16(pack_raw64(p, &float16_params));
 654 }
 655
 656 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 657 {
 658     return pack_raw64(p, &bfloat16_params);
 659 }
 660
 661 static inline float32 float32_pack_raw(const FloatParts64 *p)
 662 {
 663     return make_float32(pack_raw64(p, &float32_params));
 664 }
 665
 666 static inline float64 float64_pack_raw(const FloatParts64 *p)
 667 {
 668     return make_float64(pack_raw64(p, &float64_params));
 669 }
 670
 671 static float128 float128_pack_raw(const FloatParts128 *p)
 672 {
 673     const int f_size = float128_params.frac_size - 64;
 674     const int e_size = float128_params.exp_size;
 675     uint64_t hi;
 676
 677     hi = (uint64_t)p->sign << (f_size + e_size);
 678     hi = deposit64(hi, f_size, e_size, p->exp);
 679     hi = deposit64(hi, 0, f_size, p->frac_hi);
 680     return make_float128(hi, p->frac_lo);
 681 }
 682
 683 /*----------------------------------------------------------------------------
 684 | Functions and definitions to determine:  (1) whether tininess for underflow
 685 | is detected before or after rounding by default, (2) what (if anything)
 686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 688 | are propagated from function inputs to output.  These details are target-
 689 | specific.
 690 *----------------------------------------------------------------------------*/
 691 #include "softfloat-specialize.c.inc"
 692
 693 #define PARTS_GENERIC_64_128(NAME, P) \
 694     _Generic((P), FloatParts64 *: parts64_##NAME, \
 695                   FloatParts128 *: parts128_##NAME)
 696
 697 #define PARTS_GENERIC_64_128_256(NAME, P) \
 698     _Generic((P), FloatParts64 *: parts64_##NAME, \
 699                   FloatParts128 *: parts128_##NAME, \
 700                   FloatParts256 *: parts256_##NAME)
 701
 702 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 703 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 704
 705 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 706 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 707
 708 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 709
 710 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 711                                       float_status *s);
 712 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 713                                         float_status *s);
 714
 715 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 716
 717 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 718                                              FloatParts64 *c, float_status *s,
 719                                              int ab_mask, int abc_mask);
 720 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 721                                                FloatParts128 *b,
 722                                                FloatParts128 *c,
 723                                                float_status *s,
 724                                                int ab_mask, int abc_mask);
 725
 726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 727     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 728
 729 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 730                                  const FloatFmt *fmt);
 731 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 732                                   const FloatFmt *fmt);
 733
 734 #define parts_canonicalize(A, S, F) \
 735     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 736
 737 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 738                                    const FloatFmt *fmt);
 739 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 740                                     const FloatFmt *fmt);
 741
 742 #define parts_uncanon_normal(A, S, F) \
 743     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 744
 745 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 746                             const FloatFmt *fmt);
 747 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 748                              const FloatFmt *fmt);
 749
 750 #define parts_uncanon(A, S, F) \
 751     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 752
 753 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 754 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 755 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757 #define parts_add_normal(A, B) \
 758     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 759
 760 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 761 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 762 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 763
 764 #define parts_sub_normal(A, B) \
 765     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 766
 767 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 768                                     float_status *s, bool subtract);
 769 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 770                                       float_status *s, bool subtract);
 771
 772 #define parts_addsub(A, B, S, Z) \
 773     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 774
 775 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 776                                  float_status *s);
 777 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 778                                    float_status *s);
 779
 780 #define parts_mul(A, B, S) \
 781     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 782
 783 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 784                                     FloatParts64 *c, int flags,
 785                                     float_status *s);
 786 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 787                                       FloatParts128 *c, int flags,
 788                                       float_status *s);
 789
 790 #define parts_muladd(A, B, C, Z, S) \
 791     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 792
 793 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 794                                  float_status *s);
 795 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 796                                    float_status *s);
 797
 798 #define parts_div(A, B, S) \
 799     PARTS_GENERIC_64_128(div, A)(A, B, S)
 800
 801 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 802                                     uint64_t *mod_quot, float_status *s);
 803 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 804                                       uint64_t *mod_quot, float_status *s);
 805
 806 #define parts_modrem(A, B, Q, S) \
 807     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 808
 809 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 810 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 811
 812 #define parts_sqrt(A, S, F) \
 813     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 814
 815 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 816                                         int scale, int frac_size);
 817 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 818                                          int scale, int frac_size);
 819
 820 #define parts_round_to_int_normal(A, R, C, F) \
 821     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 822
 823 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 824                                  int scale, float_status *s,
 825                                  const FloatFmt *fmt);
 826 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 827                                   int scale, float_status *s,
 828                                   const FloatFmt *fmt);
 829
 830 #define parts_round_to_int(A, R, C, S, F) \
 831     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 832
 833 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 834                                      int scale, int64_t min, int64_t max,
 835                                      float_status *s);
 836 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 837                                      int scale, int64_t min, int64_t max,
 838                                      float_status *s);
 839
 840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 841     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 842
 843 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 844                                       int scale, uint64_t max,
 845                                       float_status *s);
 846 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 847                                        int scale, uint64_t max,
 848                                        float_status *s);
 849
 850 #define parts_float_to_uint(P, R, Z, M, S) \
 851     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 852
 853 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 854                                   int scale, float_status *s);
 855 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 856                                    int scale, float_status *s);
 857
 858 #define parts_sint_to_float(P, I, Z, S) \
 859     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 860
 861 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 862                                   int scale, float_status *s);
 863 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 864                                    int scale, float_status *s);
 865
 866 #define parts_uint_to_float(P, I, Z, S) \
 867     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 868
 869 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 870                                     float_status *s, int flags);
 871 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 872                                       float_status *s, int flags);
 873
 874 #define parts_minmax(A, B, S, F) \
 875     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 876
 877 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
 878                                      float_status *s, bool q);
 879 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
 880                                       float_status *s, bool q);
 881
 882 #define parts_compare(A, B, S, Q) \
 883     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 884
 885 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 886 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 887
 888 #define parts_scalbn(A, N, S) \
 889     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 890
 891 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 892 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 893
 894 #define parts_log2(A, S, F) \
 895     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 896
 897 /*
 898  * Helper functions for softfloat-parts.c.inc, per-size operations.
 899  */
 900
 901 #define FRAC_GENERIC_64_128(NAME, P) \
 902     _Generic((P), FloatParts64 *: frac64_##NAME, \
 903                   FloatParts128 *: frac128_##NAME)
 904
 905 #define FRAC_GENERIC_64_128_256(NAME, P) \
 906     _Generic((P), FloatParts64 *: frac64_##NAME, \
 907                   FloatParts128 *: frac128_##NAME, \
 908                   FloatParts256 *: frac256_##NAME)
 909
 910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911 {
 912     return uadd64_overflow(a->frac, b->frac, &r->frac);
 913 }
 914
 915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916 {
 917     bool c = 0;
 918     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920     return c;
 921 }
 922
 923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924 {
 925     bool c = 0;
 926     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930     return c;
 931 }
 932
 933 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936 {
 937     return uadd64_overflow(a->frac, c, &r->frac);
 938 }
 939
 940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941 {
 942     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944 }
 945
 946 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948 static void frac64_allones(FloatParts64 *a)
 949 {
 950     a->frac = -1;
 951 }
 952
 953 static void frac128_allones(FloatParts128 *a)
 954 {
 955     a->frac_hi = a->frac_lo = -1;
 956 }
 957
 958 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961 {
 962     return (a->frac == b->frac ? float_relation_equal
 963             : a->frac < b->frac ? float_relation_less
 964             : float_relation_greater);
 965 }
 966
 967 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 968 {
 969     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 970     if (ta == tb) {
 971         ta = a->frac_lo, tb = b->frac_lo;
 972         if (ta == tb) {
 973             return float_relation_equal;
 974         }
 975     }
 976     return ta < tb ? float_relation_less : float_relation_greater;
 977 }
 978
 979 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 980
 981 static void frac64_clear(FloatParts64 *a)
 982 {
 983     a->frac = 0;
 984 }
 985
 986 static void frac128_clear(FloatParts128 *a)
 987 {
 988     a->frac_hi = a->frac_lo = 0;
 989 }
 990
 991 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 992
 993 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 994 {
 995     uint64_t n1, n0, r, q;
 996     bool ret;
 997
 998     /*
 999      * We want a 2*N / N-bit division to produce exactly an N-bit
1000      * result, so that we do not lose any precision and so that we
1001      * do not have to renormalize afterward.  If A.frac < B.frac,
1002      * then division would produce an (N-1)-bit result; shift A left
1003      * by one to produce the an N-bit result, and return true to
1004      * decrement the exponent to match.
1005      *
1006      * The udiv_qrnnd algorithm that we're using requires normalization,
1007      * i.e. the msb of the denominator must be set, which is already true.
1008      */
1009     ret = a->frac < b->frac;
1010     if (ret) {
1011         n0 = a->frac;
1012         n1 = 0;
1013     } else {
1014         n0 = a->frac >> 1;
1015         n1 = a->frac << 63;
1016     }
1017     q = udiv_qrnnd(&r, n0, n1, b->frac);
1018
1019     /* Set lsb if there is a remainder, to set inexact. */
1020     a->frac = q | (r != 0);
1021
1022     return ret;
1023 }
1024
1025 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1026 {
1027     uint64_t q0, q1, a0, a1, b0, b1;
1028     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1029     bool ret = false;
1030
1031     a0 = a->frac_hi, a1 = a->frac_lo;
1032     b0 = b->frac_hi, b1 = b->frac_lo;
1033
1034     ret = lt128(a0, a1, b0, b1);
1035     if (!ret) {
1036         a1 = shr_double(a0, a1, 1);
1037         a0 = a0 >> 1;
1038     }
1039
1040     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1041     q0 = estimateDiv128To64(a0, a1, b0);
1042
1043     /*
1044      * Estimate is high because B1 was not included (unless B1 == 0).
1045      * Reduce quotient and increase remainder until remainder is non-negative.
1046      * This loop will execute 0 to 2 times.
1047      */
1048     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1049     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1050     while (r0 != 0) {
1051         q0--;
1052         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1053     }
1054
1055     /* Repeat using the remainder, producing a second word of quotient. */
1056     q1 = estimateDiv128To64(r1, r2, b0);
1057     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1058     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1059     while (r1 != 0) {
1060         q1--;
1061         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1062     }
1063
1064     /* Any remainder indicates inexact; set sticky bit. */
1065     q1 |= (r2 | r3) != 0;
1066
1067     a->frac_hi = q0;
1068     a->frac_lo = q1;
1069     return ret;
1070 }
1071
1072 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1073
1074 static bool frac64_eqz(FloatParts64 *a)
1075 {
1076     return a->frac == 0;
1077 }
1078
1079 static bool frac128_eqz(FloatParts128 *a)
1080 {
1081     return (a->frac_hi | a->frac_lo) == 0;
1082 }
1083
1084 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1085
1086 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1087 {
1088     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1089 }
1090
1091 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1092 {
1093     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1094                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1095 }
1096
1097 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1098
1099 static void frac64_neg(FloatParts64 *a)
1100 {
1101     a->frac = -a->frac;
1102 }
1103
1104 static void frac128_neg(FloatParts128 *a)
1105 {
1106     bool c = 0;
1107     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1108     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1109 }
1110
1111 static void frac256_neg(FloatParts256 *a)
1112 {
1113     bool c = 0;
1114     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1115     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1116     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1117     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1118 }
1119
1120 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1121
1122 static int frac64_normalize(FloatParts64 *a)
1123 {
1124     if (a->frac) {
1125         int shift = clz64(a->frac);
1126         a->frac <<= shift;
1127         return shift;
1128     }
1129     return 64;
1130 }
1131
1132 static int frac128_normalize(FloatParts128 *a)
1133 {
1134     if (a->frac_hi) {
1135         int shl = clz64(a->frac_hi);
1136         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1137         a->frac_lo <<= shl;
1138         return shl;
1139     } else if (a->frac_lo) {
1140         int shl = clz64(a->frac_lo);
1141         a->frac_hi = a->frac_lo << shl;
1142         a->frac_lo = 0;
1143         return shl + 64;
1144     }
1145     return 128;
1146 }
1147
1148 static int frac256_normalize(FloatParts256 *a)
1149 {
1150     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1151     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1152     int ret, shl;
1153
1154     if (likely(a0)) {
1155         shl = clz64(a0);
1156         if (shl == 0) {
1157             return 0;
1158         }
1159         ret = shl;
1160     } else {
1161         if (a1) {
1162             ret = 64;
1163             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1164         } else if (a2) {
1165             ret = 128;
1166             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1167         } else if (a3) {
1168             ret = 192;
1169             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1170         } else {
1171             ret = 256;
1172             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1173             goto done;
1174         }
1175         shl = clz64(a0);
1176         if (shl == 0) {
1177             goto done;
1178         }
1179         ret += shl;
1180     }
1181
1182     a0 = shl_double(a0, a1, shl);
1183     a1 = shl_double(a1, a2, shl);
1184     a2 = shl_double(a2, a3, shl);
1185     a3 <<= shl;
1186
1187  done:
1188     a->frac_hi = a0;
1189     a->frac_hm = a1;
1190     a->frac_lm = a2;
1191     a->frac_lo = a3;
1192     return ret;
1193 }
1194
1195 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1196
1197 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1198 {
1199     uint64_t a0, a1, b0, t0, t1, q, quot;
1200     int exp_diff = a->exp - b->exp;
1201     int shift;
1202
1203     a0 = a->frac;
1204     a1 = 0;
1205
1206     if (exp_diff < -1) {
1207         if (mod_quot) {
1208             *mod_quot = 0;
1209         }
1210         return;
1211     }
1212     if (exp_diff == -1) {
1213         a0 >>= 1;
1214         exp_diff = 0;
1215     }
1216
1217     b0 = b->frac;
1218     quot = q = b0 <= a0;
1219     if (q) {
1220         a0 -= b0;
1221     }
1222
1223     exp_diff -= 64;
1224     while (exp_diff > 0) {
1225         q = estimateDiv128To64(a0, a1, b0);
1226         q = q > 2 ? q - 2 : 0;
1227         mul64To128(b0, q, &t0, &t1);
1228         sub128(a0, a1, t0, t1, &a0, &a1);
1229         shortShift128Left(a0, a1, 62, &a0, &a1);
1230         exp_diff -= 62;
1231         quot = (quot << 62) + q;
1232     }
1233
1234     exp_diff += 64;
1235     if (exp_diff > 0) {
1236         q = estimateDiv128To64(a0, a1, b0);
1237         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1238         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1239         sub128(a0, a1, t0, t1, &a0, &a1);
1240         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1241         while (le128(t0, t1, a0, a1)) {
1242             ++q;
1243             sub128(a0, a1, t0, t1, &a0, &a1);
1244         }
1245         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1246     } else {
1247         t0 = b0;
1248         t1 = 0;
1249     }
1250
1251     if (mod_quot) {
1252         *mod_quot = quot;
1253     } else {
1254         sub128(t0, t1, a0, a1, &t0, &t1);
1255         if (lt128(t0, t1, a0, a1) ||
1256             (eq128(t0, t1, a0, a1) && (q & 1))) {
1257             a0 = t0;
1258             a1 = t1;
1259             a->sign = !a->sign;
1260         }
1261     }
1262
1263     if (likely(a0)) {
1264         shift = clz64(a0);
1265         shortShift128Left(a0, a1, shift, &a0, &a1);
1266     } else if (likely(a1)) {
1267         shift = clz64(a1);
1268         a0 = a1 << shift;
1269         a1 = 0;
1270         shift += 64;
1271     } else {
1272         a->cls = float_class_zero;
1273         return;
1274     }
1275
1276     a->exp = b->exp + exp_diff - shift;
1277     a->frac = a0 | (a1 != 0);
1278 }
1279
1280 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1281                            uint64_t *mod_quot)
1282 {
1283     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1284     int exp_diff = a->exp - b->exp;
1285     int shift;
1286
1287     a0 = a->frac_hi;
1288     a1 = a->frac_lo;
1289     a2 = 0;
1290
1291     if (exp_diff < -1) {
1292         if (mod_quot) {
1293             *mod_quot = 0;
1294         }
1295         return;
1296     }
1297     if (exp_diff == -1) {
1298         shift128Right(a0, a1, 1, &a0, &a1);
1299         exp_diff = 0;
1300     }
1301
1302     b0 = b->frac_hi;
1303     b1 = b->frac_lo;
1304
1305     quot = q = le128(b0, b1, a0, a1);
1306     if (q) {
1307         sub128(a0, a1, b0, b1, &a0, &a1);
1308     }
1309
1310     exp_diff -= 64;
1311     while (exp_diff > 0) {
1312         q = estimateDiv128To64(a0, a1, b0);
1313         q = q > 4 ? q - 4 : 0;
1314         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1315         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1316         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1317         exp_diff -= 61;
1318         quot = (quot << 61) + q;
1319     }
1320
1321     exp_diff += 64;
1322     if (exp_diff > 0) {
1323         q = estimateDiv128To64(a0, a1, b0);
1324         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1325         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1326         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1327         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1328         while (le192(t0, t1, t2, a0, a1, a2)) {
1329             ++q;
1330             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1331         }
1332         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1333     } else {
1334         t0 = b0;
1335         t1 = b1;
1336         t2 = 0;
1337     }
1338
1339     if (mod_quot) {
1340         *mod_quot = quot;
1341     } else {
1342         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1343         if (lt192(t0, t1, t2, a0, a1, a2) ||
1344             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1345             a0 = t0;
1346             a1 = t1;
1347             a2 = t2;
1348             a->sign = !a->sign;
1349         }
1350     }
1351
1352     if (likely(a0)) {
1353         shift = clz64(a0);
1354         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1355     } else if (likely(a1)) {
1356         shift = clz64(a1);
1357         shortShift128Left(a1, a2, shift, &a0, &a1);
1358         a2 = 0;
1359         shift += 64;
1360     } else if (likely(a2)) {
1361         shift = clz64(a2);
1362         a0 = a2 << shift;
1363         a1 = a2 = 0;
1364         shift += 128;
1365     } else {
1366         a->cls = float_class_zero;
1367         return;
1368     }
1369
1370     a->exp = b->exp + exp_diff - shift;
1371     a->frac_hi = a0;
1372     a->frac_lo = a1 | (a2 != 0);
1373 }
1374
1375 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1376
1377 static void frac64_shl(FloatParts64 *a, int c)
1378 {
1379     a->frac <<= c;
1380 }
1381
1382 static void frac128_shl(FloatParts128 *a, int c)
1383 {
1384     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1385
1386     if (c & 64) {
1387         a0 = a1, a1 = 0;
1388     }
1389
1390     c &= 63;
1391     if (c) {
1392         a0 = shl_double(a0, a1, c);
1393         a1 = a1 << c;
1394     }
1395
1396     a->frac_hi = a0;
1397     a->frac_lo = a1;
1398 }
1399
1400 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1401
1402 static void frac64_shr(FloatParts64 *a, int c)
1403 {
1404     a->frac >>= c;
1405 }
1406
1407 static void frac128_shr(FloatParts128 *a, int c)
1408 {
1409     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1410
1411     if (c & 64) {
1412         a1 = a0, a0 = 0;
1413     }
1414
1415     c &= 63;
1416     if (c) {
1417         a1 = shr_double(a0, a1, c);
1418         a0 = a0 >> c;
1419     }
1420
1421     a->frac_hi = a0;
1422     a->frac_lo = a1;
1423 }
1424
1425 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1426
1427 static void frac64_shrjam(FloatParts64 *a, int c)
1428 {
1429     uint64_t a0 = a->frac;
1430
1431     if (likely(c != 0)) {
1432         if (likely(c < 64)) {
1433             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1434         } else {
1435             a0 = a0 != 0;
1436         }
1437         a->frac = a0;
1438     }
1439 }
1440
1441 static void frac128_shrjam(FloatParts128 *a, int c)
1442 {
1443     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1444     uint64_t sticky = 0;
1445
1446     if (unlikely(c == 0)) {
1447         return;
1448     } else if (likely(c < 64)) {
1449         /* nothing */
1450     } else if (likely(c < 128)) {
1451         sticky = a1;
1452         a1 = a0;
1453         a0 = 0;
1454         c &= 63;
1455         if (c == 0) {
1456             goto done;
1457         }
1458     } else {
1459         sticky = a0 | a1;
1460         a0 = a1 = 0;
1461         goto done;
1462     }
1463
1464     sticky |= shr_double(a1, 0, c);
1465     a1 = shr_double(a0, a1, c);
1466     a0 = a0 >> c;
1467
1468  done:
1469     a->frac_lo = a1 | (sticky != 0);
1470     a->frac_hi = a0;
1471 }
1472
1473 static void frac256_shrjam(FloatParts256 *a, int c)
1474 {
1475     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1476     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1477     uint64_t sticky = 0;
1478
1479     if (unlikely(c == 0)) {
1480         return;
1481     } else if (likely(c < 64)) {
1482         /* nothing */
1483     } else if (likely(c < 256)) {
1484         if (unlikely(c & 128)) {
1485             sticky |= a2 | a3;
1486             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1487         }
1488         if (unlikely(c & 64)) {
1489             sticky |= a3;
1490             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1491         }
1492         c &= 63;
1493         if (c == 0) {
1494             goto done;
1495         }
1496     } else {
1497         sticky = a0 | a1 | a2 | a3;
1498         a0 = a1 = a2 = a3 = 0;
1499         goto done;
1500     }
1501
1502     sticky |= shr_double(a3, 0, c);
1503     a3 = shr_double(a2, a3, c);
1504     a2 = shr_double(a1, a2, c);
1505     a1 = shr_double(a0, a1, c);
1506     a0 = a0 >> c;
1507
1508  done:
1509     a->frac_lo = a3 | (sticky != 0);
1510     a->frac_lm = a2;
1511     a->frac_hm = a1;
1512     a->frac_hi = a0;
1513 }
1514
1515 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1516
1517 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1518 {
1519     return usub64_overflow(a->frac, b->frac, &r->frac);
1520 }
1521
1522 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1523 {
1524     bool c = 0;
1525     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1526     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1527     return c;
1528 }
1529
1530 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1531 {
1532     bool c = 0;
1533     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1534     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1535     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1536     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1537     return c;
1538 }
1539
1540 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1541
1542 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1543 {
1544     r->frac = a->frac_hi | (a->frac_lo != 0);
1545 }
1546
1547 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1548 {
1549     r->frac_hi = a->frac_hi;
1550     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1551 }
1552
1553 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1554
1555 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1556 {
1557     r->frac_hi = a->frac;
1558     r->frac_lo = 0;
1559 }
1560
1561 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1562 {
1563     r->frac_hi = a->frac_hi;
1564     r->frac_hm = a->frac_lo;
1565     r->frac_lm = 0;
1566     r->frac_lo = 0;
1567 }
1568
1569 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1570
1571 /*
1572  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1573  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1574  * and thus MIT licenced.
1575  */
1576 static const uint16_t rsqrt_tab[128] = {
1577     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1578     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1579     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1580     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1581     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1582     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1583     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1584     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1585     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1586     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1587     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1588     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1589     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1590     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1591     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1592     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1593 };
1594
1595 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1596 #define FloatPartsN    glue(FloatParts,N)
1597 #define FloatPartsW    glue(FloatParts,W)
1598
1599 #define N 64
1600 #define W 128
1601
1602 #include "softfloat-parts-addsub.c.inc"
1603 #include "softfloat-parts.c.inc"
1604
1605 #undef  N
1606 #undef  W
1607 #define N 128
1608 #define W 256
1609
1610 #include "softfloat-parts-addsub.c.inc"
1611 #include "softfloat-parts.c.inc"
1612
1613 #undef  N
1614 #undef  W
1615 #define N            256
1616
1617 #include "softfloat-parts-addsub.c.inc"
1618
1619 #undef  N
1620 #undef  W
1621 #undef  partsN
1622 #undef  FloatPartsN
1623 #undef  FloatPartsW
1624
1625 /*
1626  * Pack/unpack routines with a specific FloatFmt.
1627  */
1628
1629 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1630                                       float_status *s, const FloatFmt *params)
1631 {
1632     float16_unpack_raw(p, f);
1633     parts_canonicalize(p, s, params);
1634 }
1635
1636 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1637                                      float_status *s)
1638 {
1639     float16a_unpack_canonical(p, f, s, &float16_params);
1640 }
1641
1642 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1643                                       float_status *s)
1644 {
1645     bfloat16_unpack_raw(p, f);
1646     parts_canonicalize(p, s, &bfloat16_params);
1647 }
1648
1649 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1650                                              float_status *s,
1651                                              const FloatFmt *params)
1652 {
1653     parts_uncanon(p, s, params);
1654     return float16_pack_raw(p);
1655 }
1656
1657 static float16 float16_round_pack_canonical(FloatParts64 *p,
1658                                             float_status *s)
1659 {
1660     return float16a_round_pack_canonical(p, s, &float16_params);
1661 }
1662
1663 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1664                                               float_status *s)
1665 {
1666     parts_uncanon(p, s, &bfloat16_params);
1667     return bfloat16_pack_raw(p);
1668 }
1669
1670 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1671                                      float_status *s)
1672 {
1673     float32_unpack_raw(p, f);
1674     parts_canonicalize(p, s, &float32_params);
1675 }
1676
1677 static float32 float32_round_pack_canonical(FloatParts64 *p,
1678                                             float_status *s)
1679 {
1680     parts_uncanon(p, s, &float32_params);
1681     return float32_pack_raw(p);
1682 }
1683
1684 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1685                                      float_status *s)
1686 {
1687     float64_unpack_raw(p, f);
1688     parts_canonicalize(p, s, &float64_params);
1689 }
1690
1691 static float64 float64_round_pack_canonical(FloatParts64 *p,
1692                                             float_status *s)
1693 {
1694     parts_uncanon(p, s, &float64_params);
1695     return float64_pack_raw(p);
1696 }
1697
1698 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1699                                                float_status *s)
1700 {
1701     parts_uncanon(p, s, &float32_params);
1702
1703     /*
1704      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1705      * We need to adjust the fraction higher so that the least N bits are
1706      * zero, and the fraction is adjacent to the float64 implicit bit.
1707      */
1708     switch (p->cls) {
1709     case float_class_normal:
1710         if (unlikely(p->exp == 0)) {
1711             /*
1712              * The result is denormal for float32, but can be represented
1713              * in normalized form for float64.  Adjust, per canonicalize.
1714              */
1715             int shift = frac_normalize(p);
1716             p->exp = (float32_params.frac_shift -
1717                       float32_params.exp_bias - shift + 1 +
1718                       float64_params.exp_bias);
1719             frac_shr(p, float64_params.frac_shift);
1720         } else {
1721             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1722             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1723         }
1724         break;
1725     case float_class_snan:
1726     case float_class_qnan:
1727         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1728         p->exp = float64_params.exp_max;
1729         break;
1730     case float_class_inf:
1731         p->exp = float64_params.exp_max;
1732         break;
1733     case float_class_zero:
1734         break;
1735     default:
1736         g_assert_not_reached();
1737     }
1738
1739     return float64_pack_raw(p);
1740 }
1741
1742 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1743                                       float_status *s)
1744 {
1745     float128_unpack_raw(p, f);
1746     parts_canonicalize(p, s, &float128_params);
1747 }
1748
1749 static float128 float128_round_pack_canonical(FloatParts128 *p,
1750                                               float_status *s)
1751 {
1752     parts_uncanon(p, s, &float128_params);
1753     return float128_pack_raw(p);
1754 }
1755
1756 /* Returns false if the encoding is invalid. */
1757 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1758                                       float_status *s)
1759 {
1760     /* Ensure rounding precision is set before beginning. */
1761     switch (s->floatx80_rounding_precision) {
1762     case floatx80_precision_x:
1763     case floatx80_precision_d:
1764     case floatx80_precision_s:
1765         break;
1766     default:
1767         g_assert_not_reached();
1768     }
1769
1770     if (unlikely(floatx80_invalid_encoding(f))) {
1771         float_raise(float_flag_invalid, s);
1772         return false;
1773     }
1774
1775     floatx80_unpack_raw(p, f);
1776
1777     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1778         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1779     } else {
1780         /* The explicit integer bit is ignored, after invalid checks. */
1781         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1782         p->cls = (p->frac_hi == 0 ? float_class_inf
1783                   : parts_is_snan_frac(p->frac_hi, s)
1784                   ? float_class_snan : float_class_qnan);
1785     }
1786     return true;
1787 }
1788
1789 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1790                                               float_status *s)
1791 {
1792     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1793     uint64_t frac;
1794     int exp;
1795
1796     switch (p->cls) {
1797     case float_class_normal:
1798         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1799             parts_uncanon_normal(p, s, fmt);
1800             frac = p->frac_hi;
1801             exp = p->exp;
1802         } else {
1803             FloatParts64 p64;
1804
1805             p64.sign = p->sign;
1806             p64.exp = p->exp;
1807             frac_truncjam(&p64, p);
1808             parts_uncanon_normal(&p64, s, fmt);
1809             frac = p64.frac;
1810             exp = p64.exp;
1811         }
1812         if (exp != fmt->exp_max) {
1813             break;
1814         }
1815         /* rounded to inf -- fall through to set frac correctly */
1816
1817     case float_class_inf:
1818         /* x86 and m68k differ in the setting of the integer bit. */
1819         frac = floatx80_infinity_low;
1820         exp = fmt->exp_max;
1821         break;
1822
1823     case float_class_zero:
1824         frac = 0;
1825         exp = 0;
1826         break;
1827
1828     case float_class_snan:
1829     case float_class_qnan:
1830         /* NaNs have the integer bit set. */
1831         frac = p->frac_hi | (1ull << 63);
1832         exp = fmt->exp_max;
1833         break;
1834
1835     default:
1836         g_assert_not_reached();
1837     }
1838
1839     return packFloatx80(p->sign, exp, frac);
1840 }
1841
1842 /*
1843  * Addition and subtraction
1844  */
1845
1846 static float16 QEMU_FLATTEN
1847 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1848 {
1849     FloatParts64 pa, pb, *pr;
1850
1851     float16_unpack_canonical(&pa, a, status);
1852     float16_unpack_canonical(&pb, b, status);
1853     pr = parts_addsub(&pa, &pb, status, subtract);
1854
1855     return float16_round_pack_canonical(pr, status);
1856 }
1857
1858 float16 float16_add(float16 a, float16 b, float_status *status)
1859 {
1860     return float16_addsub(a, b, status, false);
1861 }
1862
1863 float16 float16_sub(float16 a, float16 b, float_status *status)
1864 {
1865     return float16_addsub(a, b, status, true);
1866 }
1867
1868 static float32 QEMU_SOFTFLOAT_ATTR
1869 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1870 {
1871     FloatParts64 pa, pb, *pr;
1872
1873     float32_unpack_canonical(&pa, a, status);
1874     float32_unpack_canonical(&pb, b, status);
1875     pr = parts_addsub(&pa, &pb, status, subtract);
1876
1877     return float32_round_pack_canonical(pr, status);
1878 }
1879
1880 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1881 {
1882     return soft_f32_addsub(a, b, status, false);
1883 }
1884
1885 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1886 {
1887     return soft_f32_addsub(a, b, status, true);
1888 }
1889
1890 static float64 QEMU_SOFTFLOAT_ATTR
1891 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894
1895     float64_unpack_canonical(&pa, a, status);
1896     float64_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898
1899     return float64_round_pack_canonical(pr, status);
1900 }
1901
1902 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1903 {
1904     return soft_f64_addsub(a, b, status, false);
1905 }
1906
1907 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1908 {
1909     return soft_f64_addsub(a, b, status, true);
1910 }
1911
1912 static float hard_f32_add(float a, float b)
1913 {
1914     return a + b;
1915 }
1916
1917 static float hard_f32_sub(float a, float b)
1918 {
1919     return a - b;
1920 }
1921
1922 static double hard_f64_add(double a, double b)
1923 {
1924     return a + b;
1925 }
1926
1927 static double hard_f64_sub(double a, double b)
1928 {
1929     return a - b;
1930 }
1931
1932 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1933 {
1934     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1935         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1936     }
1937     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1938 }
1939
1940 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1941 {
1942     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1943         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1944     } else {
1945         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1946     }
1947 }
1948
1949 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1950                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1951 {
1952     return float32_gen2(a, b, s, hard, soft,
1953                         f32_is_zon2, f32_addsubmul_post);
1954 }
1955
1956 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1957                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1958 {
1959     return float64_gen2(a, b, s, hard, soft,
1960                         f64_is_zon2, f64_addsubmul_post);
1961 }
1962
1963 float32 QEMU_FLATTEN
1964 float32_add(float32 a, float32 b, float_status *s)
1965 {
1966     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1967 }
1968
1969 float32 QEMU_FLATTEN
1970 float32_sub(float32 a, float32 b, float_status *s)
1971 {
1972     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1973 }
1974
1975 float64 QEMU_FLATTEN
1976 float64_add(float64 a, float64 b, float_status *s)
1977 {
1978     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1979 }
1980
1981 float64 QEMU_FLATTEN
1982 float64_sub(float64 a, float64 b, float_status *s)
1983 {
1984     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1985 }
1986
1987 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1988                                  bool subtract)
1989 {
1990     FloatParts64 pa, pb, *pr;
1991
1992     float64_unpack_canonical(&pa, a, status);
1993     float64_unpack_canonical(&pb, b, status);
1994     pr = parts_addsub(&pa, &pb, status, subtract);
1995
1996     return float64r32_round_pack_canonical(pr, status);
1997 }
1998
1999 float64 float64r32_add(float64 a, float64 b, float_status *status)
2000 {
2001     return float64r32_addsub(a, b, status, false);
2002 }
2003
2004 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2005 {
2006     return float64r32_addsub(a, b, status, true);
2007 }
2008
2009 static bfloat16 QEMU_FLATTEN
2010 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013
2014     bfloat16_unpack_canonical(&pa, a, status);
2015     bfloat16_unpack_canonical(&pb, b, status);
2016     pr = parts_addsub(&pa, &pb, status, subtract);
2017
2018     return bfloat16_round_pack_canonical(pr, status);
2019 }
2020
2021 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2022 {
2023     return bfloat16_addsub(a, b, status, false);
2024 }
2025
2026 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2027 {
2028     return bfloat16_addsub(a, b, status, true);
2029 }
2030
2031 static float128 QEMU_FLATTEN
2032 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2033 {
2034     FloatParts128 pa, pb, *pr;
2035
2036     float128_unpack_canonical(&pa, a, status);
2037     float128_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039
2040     return float128_round_pack_canonical(pr, status);
2041 }
2042
2043 float128 float128_add(float128 a, float128 b, float_status *status)
2044 {
2045     return float128_addsub(a, b, status, false);
2046 }
2047
2048 float128 float128_sub(float128 a, float128 b, float_status *status)
2049 {
2050     return float128_addsub(a, b, status, true);
2051 }
2052
2053 static floatx80 QEMU_FLATTEN
2054 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2055 {
2056     FloatParts128 pa, pb, *pr;
2057
2058     if (!floatx80_unpack_canonical(&pa, a, status) ||
2059         !floatx80_unpack_canonical(&pb, b, status)) {
2060         return floatx80_default_nan(status);
2061     }
2062
2063     pr = parts_addsub(&pa, &pb, status, subtract);
2064     return floatx80_round_pack_canonical(pr, status);
2065 }
2066
2067 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2068 {
2069     return floatx80_addsub(a, b, status, false);
2070 }
2071
2072 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2073 {
2074     return floatx80_addsub(a, b, status, true);
2075 }
2076
2077 /*
2078  * Multiplication
2079  */
2080
2081 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2082 {
2083     FloatParts64 pa, pb, *pr;
2084
2085     float16_unpack_canonical(&pa, a, status);
2086     float16_unpack_canonical(&pb, b, status);
2087     pr = parts_mul(&pa, &pb, status);
2088
2089     return float16_round_pack_canonical(pr, status);
2090 }
2091
2092 static float32 QEMU_SOFTFLOAT_ATTR
2093 soft_f32_mul(float32 a, float32 b, float_status *status)
2094 {
2095     FloatParts64 pa, pb, *pr;
2096
2097     float32_unpack_canonical(&pa, a, status);
2098     float32_unpack_canonical(&pb, b, status);
2099     pr = parts_mul(&pa, &pb, status);
2100
2101     return float32_round_pack_canonical(pr, status);
2102 }
2103
2104 static float64 QEMU_SOFTFLOAT_ATTR
2105 soft_f64_mul(float64 a, float64 b, float_status *status)
2106 {
2107     FloatParts64 pa, pb, *pr;
2108
2109     float64_unpack_canonical(&pa, a, status);
2110     float64_unpack_canonical(&pb, b, status);
2111     pr = parts_mul(&pa, &pb, status);
2112
2113     return float64_round_pack_canonical(pr, status);
2114 }
2115
2116 static float hard_f32_mul(float a, float b)
2117 {
2118     return a * b;
2119 }
2120
2121 static double hard_f64_mul(double a, double b)
2122 {
2123     return a * b;
2124 }
2125
2126 float32 QEMU_FLATTEN
2127 float32_mul(float32 a, float32 b, float_status *s)
2128 {
2129     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2130                         f32_is_zon2, f32_addsubmul_post);
2131 }
2132
2133 float64 QEMU_FLATTEN
2134 float64_mul(float64 a, float64 b, float_status *s)
2135 {
2136     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2137                         f64_is_zon2, f64_addsubmul_post);
2138 }
2139
2140 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2141 {
2142     FloatParts64 pa, pb, *pr;
2143
2144     float64_unpack_canonical(&pa, a, status);
2145     float64_unpack_canonical(&pb, b, status);
2146     pr = parts_mul(&pa, &pb, status);
2147
2148     return float64r32_round_pack_canonical(pr, status);
2149 }
2150
2151 bfloat16 QEMU_FLATTEN
2152 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2153 {
2154     FloatParts64 pa, pb, *pr;
2155
2156     bfloat16_unpack_canonical(&pa, a, status);
2157     bfloat16_unpack_canonical(&pb, b, status);
2158     pr = parts_mul(&pa, &pb, status);
2159
2160     return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN
2164 float128_mul(float128 a, float128 b, float_status *status)
2165 {
2166     FloatParts128 pa, pb, *pr;
2167
2168     float128_unpack_canonical(&pa, a, status);
2169     float128_unpack_canonical(&pb, b, status);
2170     pr = parts_mul(&pa, &pb, status);
2171
2172     return float128_round_pack_canonical(pr, status);
2173 }
2174
2175 floatx80 QEMU_FLATTEN
2176 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2177 {
2178     FloatParts128 pa, pb, *pr;
2179
2180     if (!floatx80_unpack_canonical(&pa, a, status) ||
2181         !floatx80_unpack_canonical(&pb, b, status)) {
2182         return floatx80_default_nan(status);
2183     }
2184
2185     pr = parts_mul(&pa, &pb, status);
2186     return floatx80_round_pack_canonical(pr, status);
2187 }
2188
2189 /*
2190  * Fused multiply-add
2191  */
2192
2193 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2194                                     int flags, float_status *status)
2195 {
2196     FloatParts64 pa, pb, pc, *pr;
2197
2198     float16_unpack_canonical(&pa, a, status);
2199     float16_unpack_canonical(&pb, b, status);
2200     float16_unpack_canonical(&pc, c, status);
2201     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2202
2203     return float16_round_pack_canonical(pr, status);
2204 }
2205
2206 static float32 QEMU_SOFTFLOAT_ATTR
2207 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2208                 float_status *status)
2209 {
2210     FloatParts64 pa, pb, pc, *pr;
2211
2212     float32_unpack_canonical(&pa, a, status);
2213     float32_unpack_canonical(&pb, b, status);
2214     float32_unpack_canonical(&pc, c, status);
2215     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2216
2217     return float32_round_pack_canonical(pr, status);
2218 }
2219
2220 static float64 QEMU_SOFTFLOAT_ATTR
2221 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2222                 float_status *status)
2223 {
2224     FloatParts64 pa, pb, pc, *pr;
2225
2226     float64_unpack_canonical(&pa, a, status);
2227     float64_unpack_canonical(&pb, b, status);
2228     float64_unpack_canonical(&pc, c, status);
2229     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2230
2231     return float64_round_pack_canonical(pr, status);
2232 }
2233
2234 static bool force_soft_fma;
2235
2236 float32 QEMU_FLATTEN
2237 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2238 {
2239     union_float32 ua, ub, uc, ur;
2240
2241     ua.s = xa;
2242     ub.s = xb;
2243     uc.s = xc;
2244
2245     if (unlikely(!can_use_fpu(s))) {
2246         goto soft;
2247     }
2248     if (unlikely(flags & float_muladd_halve_result)) {
2249         goto soft;
2250     }
2251
2252     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2253     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2254         goto soft;
2255     }
2256
2257     if (unlikely(force_soft_fma)) {
2258         goto soft;
2259     }
2260
2261     /*
2262      * When (a || b) == 0, there's no need to check for under/over flow,
2263      * since we know the addend is (normal || 0) and the product is 0.
2264      */
2265     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2266         union_float32 up;
2267         bool prod_sign;
2268
2269         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2270         prod_sign ^= !!(flags & float_muladd_negate_product);
2271         up.s = float32_set_sign(float32_zero, prod_sign);
2272
2273         if (flags & float_muladd_negate_c) {
2274             uc.h = -uc.h;
2275         }
2276         ur.h = up.h + uc.h;
2277     } else {
2278         union_float32 ua_orig = ua;
2279         union_float32 uc_orig = uc;
2280
2281         if (flags & float_muladd_negate_product) {
2282             ua.h = -ua.h;
2283         }
2284         if (flags & float_muladd_negate_c) {
2285             uc.h = -uc.h;
2286         }
2287
2288         ur.h = fmaf(ua.h, ub.h, uc.h);
2289
2290         if (unlikely(f32_is_inf(ur))) {
2291             float_raise(float_flag_overflow, s);
2292         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2293             ua = ua_orig;
2294             uc = uc_orig;
2295             goto soft;
2296         }
2297     }
2298     if (flags & float_muladd_negate_result) {
2299         return float32_chs(ur.s);
2300     }
2301     return ur.s;
2302
2303  soft:
2304     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2305 }
2306
2307 float64 QEMU_FLATTEN
2308 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2309 {
2310     union_float64 ua, ub, uc, ur;
2311
2312     ua.s = xa;
2313     ub.s = xb;
2314     uc.s = xc;
2315
2316     if (unlikely(!can_use_fpu(s))) {
2317         goto soft;
2318     }
2319     if (unlikely(flags & float_muladd_halve_result)) {
2320         goto soft;
2321     }
2322
2323     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2324     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2325         goto soft;
2326     }
2327
2328     if (unlikely(force_soft_fma)) {
2329         goto soft;
2330     }
2331
2332     /*
2333      * When (a || b) == 0, there's no need to check for under/over flow,
2334      * since we know the addend is (normal || 0) and the product is 0.
2335      */
2336     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2337         union_float64 up;
2338         bool prod_sign;
2339
2340         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2341         prod_sign ^= !!(flags & float_muladd_negate_product);
2342         up.s = float64_set_sign(float64_zero, prod_sign);
2343
2344         if (flags & float_muladd_negate_c) {
2345             uc.h = -uc.h;
2346         }
2347         ur.h = up.h + uc.h;
2348     } else {
2349         union_float64 ua_orig = ua;
2350         union_float64 uc_orig = uc;
2351
2352         if (flags & float_muladd_negate_product) {
2353             ua.h = -ua.h;
2354         }
2355         if (flags & float_muladd_negate_c) {
2356             uc.h = -uc.h;
2357         }
2358
2359         ur.h = fma(ua.h, ub.h, uc.h);
2360
2361         if (unlikely(f64_is_inf(ur))) {
2362             float_raise(float_flag_overflow, s);
2363         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2364             ua = ua_orig;
2365             uc = uc_orig;
2366             goto soft;
2367         }
2368     }
2369     if (flags & float_muladd_negate_result) {
2370         return float64_chs(ur.s);
2371     }
2372     return ur.s;
2373
2374  soft:
2375     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2376 }
2377
2378 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2379                           int flags, float_status *status)
2380 {
2381     FloatParts64 pa, pb, pc, *pr;
2382
2383     float64_unpack_canonical(&pa, a, status);
2384     float64_unpack_canonical(&pb, b, status);
2385     float64_unpack_canonical(&pc, c, status);
2386     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2387
2388     return float64r32_round_pack_canonical(pr, status);
2389 }
2390
2391 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2392                                       int flags, float_status *status)
2393 {
2394     FloatParts64 pa, pb, pc, *pr;
2395
2396     bfloat16_unpack_canonical(&pa, a, status);
2397     bfloat16_unpack_canonical(&pb, b, status);
2398     bfloat16_unpack_canonical(&pc, c, status);
2399     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2400
2401     return bfloat16_round_pack_canonical(pr, status);
2402 }
2403
2404 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2405                                       int flags, float_status *status)
2406 {
2407     FloatParts128 pa, pb, pc, *pr;
2408
2409     float128_unpack_canonical(&pa, a, status);
2410     float128_unpack_canonical(&pb, b, status);
2411     float128_unpack_canonical(&pc, c, status);
2412     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2413
2414     return float128_round_pack_canonical(pr, status);
2415 }
2416
2417 /*
2418  * Division
2419  */
2420
2421 float16 float16_div(float16 a, float16 b, float_status *status)
2422 {
2423     FloatParts64 pa, pb, *pr;
2424
2425     float16_unpack_canonical(&pa, a, status);
2426     float16_unpack_canonical(&pb, b, status);
2427     pr = parts_div(&pa, &pb, status);
2428
2429     return float16_round_pack_canonical(pr, status);
2430 }
2431
2432 static float32 QEMU_SOFTFLOAT_ATTR
2433 soft_f32_div(float32 a, float32 b, float_status *status)
2434 {
2435     FloatParts64 pa, pb, *pr;
2436
2437     float32_unpack_canonical(&pa, a, status);
2438     float32_unpack_canonical(&pb, b, status);
2439     pr = parts_div(&pa, &pb, status);
2440
2441     return float32_round_pack_canonical(pr, status);
2442 }
2443
2444 static float64 QEMU_SOFTFLOAT_ATTR
2445 soft_f64_div(float64 a, float64 b, float_status *status)
2446 {
2447     FloatParts64 pa, pb, *pr;
2448
2449     float64_unpack_canonical(&pa, a, status);
2450     float64_unpack_canonical(&pb, b, status);
2451     pr = parts_div(&pa, &pb, status);
2452
2453     return float64_round_pack_canonical(pr, status);
2454 }
2455
2456 static float hard_f32_div(float a, float b)
2457 {
2458     return a / b;
2459 }
2460
2461 static double hard_f64_div(double a, double b)
2462 {
2463     return a / b;
2464 }
2465
2466 static bool f32_div_pre(union_float32 a, union_float32 b)
2467 {
2468     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2469         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2470                fpclassify(b.h) == FP_NORMAL;
2471     }
2472     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2473 }
2474
2475 static bool f64_div_pre(union_float64 a, union_float64 b)
2476 {
2477     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2478         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2479                fpclassify(b.h) == FP_NORMAL;
2480     }
2481     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2482 }
2483
2484 static bool f32_div_post(union_float32 a, union_float32 b)
2485 {
2486     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2487         return fpclassify(a.h) != FP_ZERO;
2488     }
2489     return !float32_is_zero(a.s);
2490 }
2491
2492 static bool f64_div_post(union_float64 a, union_float64 b)
2493 {
2494     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2495         return fpclassify(a.h) != FP_ZERO;
2496     }
2497     return !float64_is_zero(a.s);
2498 }
2499
2500 float32 QEMU_FLATTEN
2501 float32_div(float32 a, float32 b, float_status *s)
2502 {
2503     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2504                         f32_div_pre, f32_div_post);
2505 }
2506
2507 float64 QEMU_FLATTEN
2508 float64_div(float64 a, float64 b, float_status *s)
2509 {
2510     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2511                         f64_div_pre, f64_div_post);
2512 }
2513
2514 float64 float64r32_div(float64 a, float64 b, float_status *status)
2515 {
2516     FloatParts64 pa, pb, *pr;
2517
2518     float64_unpack_canonical(&pa, a, status);
2519     float64_unpack_canonical(&pb, b, status);
2520     pr = parts_div(&pa, &pb, status);
2521
2522     return float64r32_round_pack_canonical(pr, status);
2523 }
2524
2525 bfloat16 QEMU_FLATTEN
2526 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2527 {
2528     FloatParts64 pa, pb, *pr;
2529
2530     bfloat16_unpack_canonical(&pa, a, status);
2531     bfloat16_unpack_canonical(&pb, b, status);
2532     pr = parts_div(&pa, &pb, status);
2533
2534     return bfloat16_round_pack_canonical(pr, status);
2535 }
2536
2537 float128 QEMU_FLATTEN
2538 float128_div(float128 a, float128 b, float_status *status)
2539 {
2540     FloatParts128 pa, pb, *pr;
2541
2542     float128_unpack_canonical(&pa, a, status);
2543     float128_unpack_canonical(&pb, b, status);
2544     pr = parts_div(&pa, &pb, status);
2545
2546     return float128_round_pack_canonical(pr, status);
2547 }
2548
2549 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2550 {
2551     FloatParts128 pa, pb, *pr;
2552
2553     if (!floatx80_unpack_canonical(&pa, a, status) ||
2554         !floatx80_unpack_canonical(&pb, b, status)) {
2555         return floatx80_default_nan(status);
2556     }
2557
2558     pr = parts_div(&pa, &pb, status);
2559     return floatx80_round_pack_canonical(pr, status);
2560 }
2561
2562 /*
2563  * Remainder
2564  */
2565
2566 float32 float32_rem(float32 a, float32 b, float_status *status)
2567 {
2568     FloatParts64 pa, pb, *pr;
2569
2570     float32_unpack_canonical(&pa, a, status);
2571     float32_unpack_canonical(&pb, b, status);
2572     pr = parts_modrem(&pa, &pb, NULL, status);
2573
2574     return float32_round_pack_canonical(pr, status);
2575 }
2576
2577 float64 float64_rem(float64 a, float64 b, float_status *status)
2578 {
2579     FloatParts64 pa, pb, *pr;
2580
2581     float64_unpack_canonical(&pa, a, status);
2582     float64_unpack_canonical(&pb, b, status);
2583     pr = parts_modrem(&pa, &pb, NULL, status);
2584
2585     return float64_round_pack_canonical(pr, status);
2586 }
2587
2588 float128 float128_rem(float128 a, float128 b, float_status *status)
2589 {
2590     FloatParts128 pa, pb, *pr;
2591
2592     float128_unpack_canonical(&pa, a, status);
2593     float128_unpack_canonical(&pb, b, status);
2594     pr = parts_modrem(&pa, &pb, NULL, status);
2595
2596     return float128_round_pack_canonical(pr, status);
2597 }
2598
2599 /*
2600  * Returns the remainder of the extended double-precision floating-point value
2601  * `a' with respect to the corresponding value `b'.
2602  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2603  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2604  * the remainder based on truncating the quotient toward zero instead and
2605  * *quotient is set to the low 64 bits of the absolute value of the integer
2606  * quotient.
2607  */
2608 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2609                          uint64_t *quotient, float_status *status)
2610 {
2611     FloatParts128 pa, pb, *pr;
2612
2613     *quotient = 0;
2614     if (!floatx80_unpack_canonical(&pa, a, status) ||
2615         !floatx80_unpack_canonical(&pb, b, status)) {
2616         return floatx80_default_nan(status);
2617     }
2618     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2619
2620     return floatx80_round_pack_canonical(pr, status);
2621 }
2622
2623 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2624 {
2625     uint64_t quotient;
2626     return floatx80_modrem(a, b, false, &quotient, status);
2627 }
2628
2629 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2630 {
2631     uint64_t quotient;
2632     return floatx80_modrem(a, b, true, &quotient, status);
2633 }
2634
2635 /*
2636  * Float to Float conversions
2637  *
2638  * Returns the result of converting one float format to another. The
2639  * conversion is performed according to the IEC/IEEE Standard for
2640  * Binary Floating-Point Arithmetic.
2641  *
2642  * Usually this only needs to take care of raising invalid exceptions
2643  * and handling the conversion on NaNs.
2644  */
2645
2646 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2647 {
2648     switch (a->cls) {
2649     case float_class_snan:
2650         float_raise(float_flag_invalid_snan, s);
2651         /* fall through */
2652     case float_class_qnan:
2653         /*
2654          * There is no NaN in the destination format.  Raise Invalid
2655          * and return a zero with the sign of the input NaN.
2656          */
2657         float_raise(float_flag_invalid, s);
2658         a->cls = float_class_zero;
2659         break;
2660
2661     case float_class_inf:
2662         /*
2663          * There is no Inf in the destination format.  Raise Invalid
2664          * and return the maximum normal with the correct sign.
2665          */
2666         float_raise(float_flag_invalid, s);
2667         a->cls = float_class_normal;
2668         a->exp = float16_params_ahp.exp_max;
2669         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2670                                   float16_params_ahp.frac_size + 1);
2671         break;
2672
2673     case float_class_normal:
2674     case float_class_zero:
2675         break;
2676
2677     default:
2678         g_assert_not_reached();
2679     }
2680 }
2681
2682 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2683 {
2684     if (is_nan(a->cls)) {
2685         parts_return_nan(a, s);
2686     }
2687 }
2688
2689 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2690 {
2691     if (is_nan(a->cls)) {
2692         parts_return_nan(a, s);
2693     }
2694 }
2695
2696 #define parts_float_to_float(P, S) \
2697     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2698
2699 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2700                                         float_status *s)
2701 {
2702     a->cls = b->cls;
2703     a->sign = b->sign;
2704     a->exp = b->exp;
2705
2706     if (a->cls == float_class_normal) {
2707         frac_truncjam(a, b);
2708     } else if (is_nan(a->cls)) {
2709         /* Discard the low bits of the NaN. */
2710         a->frac = b->frac_hi;
2711         parts_return_nan(a, s);
2712     }
2713 }
2714
2715 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2716                                        float_status *s)
2717 {
2718     a->cls = b->cls;
2719     a->sign = b->sign;
2720     a->exp = b->exp;
2721     frac_widen(a, b);
2722
2723     if (is_nan(a->cls)) {
2724         parts_return_nan(a, s);
2725     }
2726 }
2727
2728 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2729 {
2730     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2731     FloatParts64 p;
2732
2733     float16a_unpack_canonical(&p, a, s, fmt16);
2734     parts_float_to_float(&p, s);
2735     return float32_round_pack_canonical(&p, s);
2736 }
2737
2738 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2739 {
2740     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2741     FloatParts64 p;
2742
2743     float16a_unpack_canonical(&p, a, s, fmt16);
2744     parts_float_to_float(&p, s);
2745     return float64_round_pack_canonical(&p, s);
2746 }
2747
2748 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2749 {
2750     FloatParts64 p;
2751     const FloatFmt *fmt;
2752
2753     float32_unpack_canonical(&p, a, s);
2754     if (ieee) {
2755         parts_float_to_float(&p, s);
2756         fmt = &float16_params;
2757     } else {
2758         parts_float_to_ahp(&p, s);
2759         fmt = &float16_params_ahp;
2760     }
2761     return float16a_round_pack_canonical(&p, s, fmt);
2762 }
2763
2764 static float64 QEMU_SOFTFLOAT_ATTR
2765 soft_float32_to_float64(float32 a, float_status *s)
2766 {
2767     FloatParts64 p;
2768
2769     float32_unpack_canonical(&p, a, s);
2770     parts_float_to_float(&p, s);
2771     return float64_round_pack_canonical(&p, s);
2772 }
2773
2774 float64 float32_to_float64(float32 a, float_status *s)
2775 {
2776     if (likely(float32_is_normal(a))) {
2777         /* Widening conversion can never produce inexact results.  */
2778         union_float32 uf;
2779         union_float64 ud;
2780         uf.s = a;
2781         ud.h = uf.h;
2782         return ud.s;
2783     } else if (float32_is_zero(a)) {
2784         return float64_set_sign(float64_zero, float32_is_neg(a));
2785     } else {
2786         return soft_float32_to_float64(a, s);
2787     }
2788 }
2789
2790 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2791 {
2792     FloatParts64 p;
2793     const FloatFmt *fmt;
2794
2795     float64_unpack_canonical(&p, a, s);
2796     if (ieee) {
2797         parts_float_to_float(&p, s);
2798         fmt = &float16_params;
2799     } else {
2800         parts_float_to_ahp(&p, s);
2801         fmt = &float16_params_ahp;
2802     }
2803     return float16a_round_pack_canonical(&p, s, fmt);
2804 }
2805
2806 float32 float64_to_float32(float64 a, float_status *s)
2807 {
2808     FloatParts64 p;
2809
2810     float64_unpack_canonical(&p, a, s);
2811     parts_float_to_float(&p, s);
2812     return float32_round_pack_canonical(&p, s);
2813 }
2814
2815 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2816 {
2817     FloatParts64 p;
2818
2819     bfloat16_unpack_canonical(&p, a, s);
2820     parts_float_to_float(&p, s);
2821     return float32_round_pack_canonical(&p, s);
2822 }
2823
2824 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2825 {
2826     FloatParts64 p;
2827
2828     bfloat16_unpack_canonical(&p, a, s);
2829     parts_float_to_float(&p, s);
2830     return float64_round_pack_canonical(&p, s);
2831 }
2832
2833 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2834 {
2835     FloatParts64 p;
2836
2837     float32_unpack_canonical(&p, a, s);
2838     parts_float_to_float(&p, s);
2839     return bfloat16_round_pack_canonical(&p, s);
2840 }
2841
2842 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2843 {
2844     FloatParts64 p;
2845
2846     float64_unpack_canonical(&p, a, s);
2847     parts_float_to_float(&p, s);
2848     return bfloat16_round_pack_canonical(&p, s);
2849 }
2850
2851 float32 float128_to_float32(float128 a, float_status *s)
2852 {
2853     FloatParts64 p64;
2854     FloatParts128 p128;
2855
2856     float128_unpack_canonical(&p128, a, s);
2857     parts_float_to_float_narrow(&p64, &p128, s);
2858     return float32_round_pack_canonical(&p64, s);
2859 }
2860
2861 float64 float128_to_float64(float128 a, float_status *s)
2862 {
2863     FloatParts64 p64;
2864     FloatParts128 p128;
2865
2866     float128_unpack_canonical(&p128, a, s);
2867     parts_float_to_float_narrow(&p64, &p128, s);
2868     return float64_round_pack_canonical(&p64, s);
2869 }
2870
2871 float128 float32_to_float128(float32 a, float_status *s)
2872 {
2873     FloatParts64 p64;
2874     FloatParts128 p128;
2875
2876     float32_unpack_canonical(&p64, a, s);
2877     parts_float_to_float_widen(&p128, &p64, s);
2878     return float128_round_pack_canonical(&p128, s);
2879 }
2880
2881 float128 float64_to_float128(float64 a, float_status *s)
2882 {
2883     FloatParts64 p64;
2884     FloatParts128 p128;
2885
2886     float64_unpack_canonical(&p64, a, s);
2887     parts_float_to_float_widen(&p128, &p64, s);
2888     return float128_round_pack_canonical(&p128, s);
2889 }
2890
2891 float32 floatx80_to_float32(floatx80 a, float_status *s)
2892 {
2893     FloatParts64 p64;
2894     FloatParts128 p128;
2895
2896     if (floatx80_unpack_canonical(&p128, a, s)) {
2897         parts_float_to_float_narrow(&p64, &p128, s);
2898     } else {
2899         parts_default_nan(&p64, s);
2900     }
2901     return float32_round_pack_canonical(&p64, s);
2902 }
2903
2904 float64 floatx80_to_float64(floatx80 a, float_status *s)
2905 {
2906     FloatParts64 p64;
2907     FloatParts128 p128;
2908
2909     if (floatx80_unpack_canonical(&p128, a, s)) {
2910         parts_float_to_float_narrow(&p64, &p128, s);
2911     } else {
2912         parts_default_nan(&p64, s);
2913     }
2914     return float64_round_pack_canonical(&p64, s);
2915 }
2916
2917 float128 floatx80_to_float128(floatx80 a, float_status *s)
2918 {
2919     FloatParts128 p;
2920
2921     if (floatx80_unpack_canonical(&p, a, s)) {
2922         parts_float_to_float(&p, s);
2923     } else {
2924         parts_default_nan(&p, s);
2925     }
2926     return float128_round_pack_canonical(&p, s);
2927 }
2928
2929 floatx80 float32_to_floatx80(float32 a, float_status *s)
2930 {
2931     FloatParts64 p64;
2932     FloatParts128 p128;
2933
2934     float32_unpack_canonical(&p64, a, s);
2935     parts_float_to_float_widen(&p128, &p64, s);
2936     return floatx80_round_pack_canonical(&p128, s);
2937 }
2938
2939 floatx80 float64_to_floatx80(float64 a, float_status *s)
2940 {
2941     FloatParts64 p64;
2942     FloatParts128 p128;
2943
2944     float64_unpack_canonical(&p64, a, s);
2945     parts_float_to_float_widen(&p128, &p64, s);
2946     return floatx80_round_pack_canonical(&p128, s);
2947 }
2948
2949 floatx80 float128_to_floatx80(float128 a, float_status *s)
2950 {
2951     FloatParts128 p;
2952
2953     float128_unpack_canonical(&p, a, s);
2954     parts_float_to_float(&p, s);
2955     return floatx80_round_pack_canonical(&p, s);
2956 }
2957
2958 /*
2959  * Round to integral value
2960  */
2961
2962 float16 float16_round_to_int(float16 a, float_status *s)
2963 {
2964     FloatParts64 p;
2965
2966     float16_unpack_canonical(&p, a, s);
2967     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2968     return float16_round_pack_canonical(&p, s);
2969 }
2970
2971 float32 float32_round_to_int(float32 a, float_status *s)
2972 {
2973     FloatParts64 p;
2974
2975     float32_unpack_canonical(&p, a, s);
2976     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2977     return float32_round_pack_canonical(&p, s);
2978 }
2979
2980 float64 float64_round_to_int(float64 a, float_status *s)
2981 {
2982     FloatParts64 p;
2983
2984     float64_unpack_canonical(&p, a, s);
2985     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2986     return float64_round_pack_canonical(&p, s);
2987 }
2988
2989 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2990 {
2991     FloatParts64 p;
2992
2993     bfloat16_unpack_canonical(&p, a, s);
2994     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2995     return bfloat16_round_pack_canonical(&p, s);
2996 }
2997
2998 float128 float128_round_to_int(float128 a, float_status *s)
2999 {
3000     FloatParts128 p;
3001
3002     float128_unpack_canonical(&p, a, s);
3003     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3004     return float128_round_pack_canonical(&p, s);
3005 }
3006
3007 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3008 {
3009     FloatParts128 p;
3010
3011     if (!floatx80_unpack_canonical(&p, a, status)) {
3012         return floatx80_default_nan(status);
3013     }
3014
3015     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3016                        &floatx80_params[status->floatx80_rounding_precision]);
3017     return floatx80_round_pack_canonical(&p, status);
3018 }
3019
3020 /*
3021  * Floating-point to signed integer conversions
3022  */
3023
3024 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3025                               float_status *s)
3026 {
3027     FloatParts64 p;
3028
3029     float16_unpack_canonical(&p, a, s);
3030     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3031 }
3032
3033 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3034                                 float_status *s)
3035 {
3036     FloatParts64 p;
3037
3038     float16_unpack_canonical(&p, a, s);
3039     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3040 }
3041
3042 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3043                                 float_status *s)
3044 {
3045     FloatParts64 p;
3046
3047     float16_unpack_canonical(&p, a, s);
3048     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3049 }
3050
3051 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3052                                 float_status *s)
3053 {
3054     FloatParts64 p;
3055
3056     float16_unpack_canonical(&p, a, s);
3057     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3058 }
3059
3060 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3061                                 float_status *s)
3062 {
3063     FloatParts64 p;
3064
3065     float32_unpack_canonical(&p, a, s);
3066     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3067 }
3068
3069 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3070                                 float_status *s)
3071 {
3072     FloatParts64 p;
3073
3074     float32_unpack_canonical(&p, a, s);
3075     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3076 }
3077
3078 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3079                                 float_status *s)
3080 {
3081     FloatParts64 p;
3082
3083     float32_unpack_canonical(&p, a, s);
3084     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3085 }
3086
3087 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3088                                 float_status *s)
3089 {
3090     FloatParts64 p;
3091
3092     float64_unpack_canonical(&p, a, s);
3093     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3094 }
3095
3096 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3097                                 float_status *s)
3098 {
3099     FloatParts64 p;
3100
3101     float64_unpack_canonical(&p, a, s);
3102     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3103 }
3104
3105 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3106                                 float_status *s)
3107 {
3108     FloatParts64 p;
3109
3110     float64_unpack_canonical(&p, a, s);
3111     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3112 }
3113
3114 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3115                                  float_status *s)
3116 {
3117     FloatParts64 p;
3118
3119     bfloat16_unpack_canonical(&p, a, s);
3120     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3121 }
3122
3123 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3124                                  float_status *s)
3125 {
3126     FloatParts64 p;
3127
3128     bfloat16_unpack_canonical(&p, a, s);
3129     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3130 }
3131
3132 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3133                                  float_status *s)
3134 {
3135     FloatParts64 p;
3136
3137     bfloat16_unpack_canonical(&p, a, s);
3138     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3139 }
3140
3141 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3142                                         int scale, float_status *s)
3143 {
3144     FloatParts128 p;
3145
3146     float128_unpack_canonical(&p, a, s);
3147     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3148 }
3149
3150 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3151                                         int scale, float_status *s)
3152 {
3153     FloatParts128 p;
3154
3155     float128_unpack_canonical(&p, a, s);
3156     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3157 }
3158
3159 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3160                                         int scale, float_status *s)
3161 {
3162     int flags = 0;
3163     Int128 r;
3164     FloatParts128 p;
3165
3166     float128_unpack_canonical(&p, a, s);
3167
3168     switch (p.cls) {
3169     case float_class_snan:
3170         flags |= float_flag_invalid_snan;
3171         /* fall through */
3172     case float_class_qnan:
3173         flags |= float_flag_invalid;
3174         r = UINT128_MAX;
3175         break;
3176
3177     case float_class_inf:
3178         flags = float_flag_invalid | float_flag_invalid_cvti;
3179         r = p.sign ? INT128_MIN : INT128_MAX;
3180         break;
3181
3182     case float_class_zero:
3183         return int128_zero();
3184
3185     case float_class_normal:
3186         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3187             flags = float_flag_inexact;
3188         }
3189
3190         if (p.exp < 127) {
3191             int shift = 127 - p.exp;
3192             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3193             if (p.sign) {
3194                 r = int128_neg(r);
3195             }
3196         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3197                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3198             r = INT128_MIN;
3199         } else {
3200             flags = float_flag_invalid | float_flag_invalid_cvti;
3201             r = p.sign ? INT128_MIN : INT128_MAX;
3202         }
3203         break;
3204
3205     default:
3206         g_assert_not_reached();
3207     }
3208
3209     float_raise(flags, s);
3210     return r;
3211 }
3212
3213 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3214                                         int scale, float_status *s)
3215 {
3216     FloatParts128 p;
3217
3218     if (!floatx80_unpack_canonical(&p, a, s)) {
3219         parts_default_nan(&p, s);
3220     }
3221     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3222 }
3223
3224 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3225                                         int scale, float_status *s)
3226 {
3227     FloatParts128 p;
3228
3229     if (!floatx80_unpack_canonical(&p, a, s)) {
3230         parts_default_nan(&p, s);
3231     }
3232     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3233 }
3234
3235 int8_t float16_to_int8(float16 a, float_status *s)
3236 {
3237     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3238 }
3239
3240 int16_t float16_to_int16(float16 a, float_status *s)
3241 {
3242     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3243 }
3244
3245 int32_t float16_to_int32(float16 a, float_status *s)
3246 {
3247     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3248 }
3249
3250 int64_t float16_to_int64(float16 a, float_status *s)
3251 {
3252     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3253 }
3254
3255 int16_t float32_to_int16(float32 a, float_status *s)
3256 {
3257     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3258 }
3259
3260 int32_t float32_to_int32(float32 a, float_status *s)
3261 {
3262     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3263 }
3264
3265 int64_t float32_to_int64(float32 a, float_status *s)
3266 {
3267     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3268 }
3269
3270 int16_t float64_to_int16(float64 a, float_status *s)
3271 {
3272     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3273 }
3274
3275 int32_t float64_to_int32(float64 a, float_status *s)
3276 {
3277     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3278 }
3279
3280 int64_t float64_to_int64(float64 a, float_status *s)
3281 {
3282     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3283 }
3284
3285 int32_t float128_to_int32(float128 a, float_status *s)
3286 {
3287     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3288 }
3289
3290 int64_t float128_to_int64(float128 a, float_status *s)
3291 {
3292     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3293 }
3294
3295 Int128 float128_to_int128(float128 a, float_status *s)
3296 {
3297     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3298 }
3299
3300 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3301 {
3302     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3303 }
3304
3305 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3306 {
3307     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3308 }
3309
3310 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3311 {
3312     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3313 }
3314
3315 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3316 {
3317     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3318 }
3319
3320 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3321 {
3322     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3323 }
3324
3325 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3326 {
3327     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3328 }
3329
3330 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3331 {
3332     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3333 }
3334
3335 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3336 {
3337     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3338 }
3339
3340 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3341 {
3342     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3343 }
3344
3345 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3346 {
3347     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3348 }
3349
3350 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3351 {
3352     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3353 }
3354
3355 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3356 {
3357     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3358 }
3359
3360 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3361 {
3362     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3363 }
3364
3365 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3366 {
3367     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3368 }
3369
3370 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3371 {
3372     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3373 }
3374
3375 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3376 {
3377     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3378 }
3379
3380 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3381 {
3382     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3383 }
3384
3385 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3386 {
3387     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3388 }
3389
3390 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3391 {
3392     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3393 }
3394
3395 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3396 {
3397     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3398 }
3399
3400 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3401 {
3402     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3403 }
3404
3405 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3406 {
3407     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3408 }
3409
3410 /*
3411  * Floating-point to unsigned integer conversions
3412  */
3413
3414 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3415                                 float_status *s)
3416 {
3417     FloatParts64 p;
3418
3419     float16_unpack_canonical(&p, a, s);
3420     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3421 }
3422
3423 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3424                                   float_status *s)
3425 {
3426     FloatParts64 p;
3427
3428     float16_unpack_canonical(&p, a, s);
3429     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3430 }
3431
3432 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3433                                   float_status *s)
3434 {
3435     FloatParts64 p;
3436
3437     float16_unpack_canonical(&p, a, s);
3438     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3439 }
3440
3441 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3442                                   float_status *s)
3443 {
3444     FloatParts64 p;
3445
3446     float16_unpack_canonical(&p, a, s);
3447     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3448 }
3449
3450 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3451                                   float_status *s)
3452 {
3453     FloatParts64 p;
3454
3455     float32_unpack_canonical(&p, a, s);
3456     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3457 }
3458
3459 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3460                                   float_status *s)
3461 {
3462     FloatParts64 p;
3463
3464     float32_unpack_canonical(&p, a, s);
3465     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3466 }
3467
3468 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3469                                   float_status *s)
3470 {
3471     FloatParts64 p;
3472
3473     float32_unpack_canonical(&p, a, s);
3474     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3475 }
3476
3477 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3478                                   float_status *s)
3479 {
3480     FloatParts64 p;
3481
3482     float64_unpack_canonical(&p, a, s);
3483     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3484 }
3485
3486 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3487                                   float_status *s)
3488 {
3489     FloatParts64 p;
3490
3491     float64_unpack_canonical(&p, a, s);
3492     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3493 }
3494
3495 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3496                                   float_status *s)
3497 {
3498     FloatParts64 p;
3499
3500     float64_unpack_canonical(&p, a, s);
3501     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3502 }
3503
3504 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3505                                    int scale, float_status *s)
3506 {
3507     FloatParts64 p;
3508
3509     bfloat16_unpack_canonical(&p, a, s);
3510     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3511 }
3512
3513 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3514                                    int scale, float_status *s)
3515 {
3516     FloatParts64 p;
3517
3518     bfloat16_unpack_canonical(&p, a, s);
3519     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3520 }
3521
3522 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3523                                    int scale, float_status *s)
3524 {
3525     FloatParts64 p;
3526
3527     bfloat16_unpack_canonical(&p, a, s);
3528     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3529 }
3530
3531 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3532                                           int scale, float_status *s)
3533 {
3534     FloatParts128 p;
3535
3536     float128_unpack_canonical(&p, a, s);
3537     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3538 }
3539
3540 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3541                                           int scale, float_status *s)
3542 {
3543     FloatParts128 p;
3544
3545     float128_unpack_canonical(&p, a, s);
3546     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3547 }
3548
3549 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3550                                          int scale, float_status *s)
3551 {
3552     int flags = 0;
3553     Int128 r;
3554     FloatParts128 p;
3555
3556     float128_unpack_canonical(&p, a, s);
3557
3558     switch (p.cls) {
3559     case float_class_snan:
3560         flags |= float_flag_invalid_snan;
3561         /* fall through */
3562     case float_class_qnan:
3563         flags |= float_flag_invalid;
3564         r = UINT128_MAX;
3565         break;
3566
3567     case float_class_inf:
3568         flags = float_flag_invalid | float_flag_invalid_cvti;
3569         r = p.sign ? int128_zero() : UINT128_MAX;
3570         break;
3571
3572     case float_class_zero:
3573         return int128_zero();
3574
3575     case float_class_normal:
3576         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3577             flags = float_flag_inexact;
3578             if (p.cls == float_class_zero) {
3579                 r = int128_zero();
3580                 break;
3581             }
3582         }
3583
3584         if (p.sign) {
3585             flags = float_flag_invalid | float_flag_invalid_cvti;
3586             r = int128_zero();
3587         } else if (p.exp <= 127) {
3588             int shift = 127 - p.exp;
3589             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3590         } else {
3591             flags = float_flag_invalid | float_flag_invalid_cvti;
3592             r = UINT128_MAX;
3593         }
3594         break;
3595
3596     default:
3597         g_assert_not_reached();
3598     }
3599
3600     float_raise(flags, s);
3601     return r;
3602 }
3603
3604 uint8_t float16_to_uint8(float16 a, float_status *s)
3605 {
3606     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3607 }
3608
3609 uint16_t float16_to_uint16(float16 a, float_status *s)
3610 {
3611     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3612 }
3613
3614 uint32_t float16_to_uint32(float16 a, float_status *s)
3615 {
3616     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3617 }
3618
3619 uint64_t float16_to_uint64(float16 a, float_status *s)
3620 {
3621     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3622 }
3623
3624 uint16_t float32_to_uint16(float32 a, float_status *s)
3625 {
3626     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3627 }
3628
3629 uint32_t float32_to_uint32(float32 a, float_status *s)
3630 {
3631     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3632 }
3633
3634 uint64_t float32_to_uint64(float32 a, float_status *s)
3635 {
3636     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3637 }
3638
3639 uint16_t float64_to_uint16(float64 a, float_status *s)
3640 {
3641     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3642 }
3643
3644 uint32_t float64_to_uint32(float64 a, float_status *s)
3645 {
3646     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3647 }
3648
3649 uint64_t float64_to_uint64(float64 a, float_status *s)
3650 {
3651     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3652 }
3653
3654 uint32_t float128_to_uint32(float128 a, float_status *s)
3655 {
3656     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3657 }
3658
3659 uint64_t float128_to_uint64(float128 a, float_status *s)
3660 {
3661     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3662 }
3663
3664 Int128 float128_to_uint128(float128 a, float_status *s)
3665 {
3666     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3667 }
3668
3669 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3670 {
3671     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3672 }
3673
3674 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3675 {
3676     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3677 }
3678
3679 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3680 {
3681     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3682 }
3683
3684 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3685 {
3686     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3687 }
3688
3689 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3690 {
3691     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3692 }
3693
3694 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3695 {
3696     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3697 }
3698
3699 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3700 {
3701     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3702 }
3703
3704 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3705 {
3706     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3707 }
3708
3709 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3710 {
3711     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3712 }
3713
3714 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3715 {
3716     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3717 }
3718
3719 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3720 {
3721     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3722 }
3723
3724 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3725 {
3726     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3727 }
3728
3729 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3730 {
3731     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3732 }
3733
3734 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3735 {
3736     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3737 }
3738
3739 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3740 {
3741     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3742 }
3743
3744 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3745 {
3746     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3747 }
3748
3749 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3750 {
3751     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3752 }
3753
3754 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3755 {
3756     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3757 }
3758
3759 /*
3760  * Signed integer to floating-point conversions
3761  */
3762
3763 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3764 {
3765     FloatParts64 p;
3766
3767     parts_sint_to_float(&p, a, scale, status);
3768     return float16_round_pack_canonical(&p, status);
3769 }
3770
3771 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3772 {
3773     return int64_to_float16_scalbn(a, scale, status);
3774 }
3775
3776 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3777 {
3778     return int64_to_float16_scalbn(a, scale, status);
3779 }
3780
3781 float16 int64_to_float16(int64_t a, float_status *status)
3782 {
3783     return int64_to_float16_scalbn(a, 0, status);
3784 }
3785
3786 float16 int32_to_float16(int32_t a, float_status *status)
3787 {
3788     return int64_to_float16_scalbn(a, 0, status);
3789 }
3790
3791 float16 int16_to_float16(int16_t a, float_status *status)
3792 {
3793     return int64_to_float16_scalbn(a, 0, status);
3794 }
3795
3796 float16 int8_to_float16(int8_t a, float_status *status)
3797 {
3798     return int64_to_float16_scalbn(a, 0, status);
3799 }
3800
3801 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3802 {
3803     FloatParts64 p;
3804
3805     /* Without scaling, there are no overflow concerns. */
3806     if (likely(scale == 0) && can_use_fpu(status)) {
3807         union_float32 ur;
3808         ur.h = a;
3809         return ur.s;
3810     }
3811
3812     parts64_sint_to_float(&p, a, scale, status);
3813     return float32_round_pack_canonical(&p, status);
3814 }
3815
3816 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3817 {
3818     return int64_to_float32_scalbn(a, scale, status);
3819 }
3820
3821 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3822 {
3823     return int64_to_float32_scalbn(a, scale, status);
3824 }
3825
3826 float32 int64_to_float32(int64_t a, float_status *status)
3827 {
3828     return int64_to_float32_scalbn(a, 0, status);
3829 }
3830
3831 float32 int32_to_float32(int32_t a, float_status *status)
3832 {
3833     return int64_to_float32_scalbn(a, 0, status);
3834 }
3835
3836 float32 int16_to_float32(int16_t a, float_status *status)
3837 {
3838     return int64_to_float32_scalbn(a, 0, status);
3839 }
3840
3841 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3842 {
3843     FloatParts64 p;
3844
3845     /* Without scaling, there are no overflow concerns. */
3846     if (likely(scale == 0) && can_use_fpu(status)) {
3847         union_float64 ur;
3848         ur.h = a;
3849         return ur.s;
3850     }
3851
3852     parts_sint_to_float(&p, a, scale, status);
3853     return float64_round_pack_canonical(&p, status);
3854 }
3855
3856 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3857 {
3858     return int64_to_float64_scalbn(a, scale, status);
3859 }
3860
3861 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3862 {
3863     return int64_to_float64_scalbn(a, scale, status);
3864 }
3865
3866 float64 int64_to_float64(int64_t a, float_status *status)
3867 {
3868     return int64_to_float64_scalbn(a, 0, status);
3869 }
3870
3871 float64 int32_to_float64(int32_t a, float_status *status)
3872 {
3873     return int64_to_float64_scalbn(a, 0, status);
3874 }
3875
3876 float64 int16_to_float64(int16_t a, float_status *status)
3877 {
3878     return int64_to_float64_scalbn(a, 0, status);
3879 }
3880
3881 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3882 {
3883     FloatParts64 p;
3884
3885     parts_sint_to_float(&p, a, scale, status);
3886     return bfloat16_round_pack_canonical(&p, status);
3887 }
3888
3889 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3890 {
3891     return int64_to_bfloat16_scalbn(a, scale, status);
3892 }
3893
3894 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3895 {
3896     return int64_to_bfloat16_scalbn(a, scale, status);
3897 }
3898
3899 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3900 {
3901     return int64_to_bfloat16_scalbn(a, 0, status);
3902 }
3903
3904 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3905 {
3906     return int64_to_bfloat16_scalbn(a, 0, status);
3907 }
3908
3909 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3910 {
3911     return int64_to_bfloat16_scalbn(a, 0, status);
3912 }
3913
3914 float128 int128_to_float128(Int128 a, float_status *status)
3915 {
3916     FloatParts128 p = { };
3917     int shift;
3918
3919     if (int128_nz(a)) {
3920         p.cls = float_class_normal;
3921         if (!int128_nonneg(a)) {
3922             p.sign = true;
3923             a = int128_neg(a);
3924         }
3925
3926         shift = clz64(int128_gethi(a));
3927         if (shift == 64) {
3928             shift += clz64(int128_getlo(a));
3929         }
3930
3931         p.exp = 127 - shift;
3932         a = int128_lshift(a, shift);
3933
3934         p.frac_hi = int128_gethi(a);
3935         p.frac_lo = int128_getlo(a);
3936     } else {
3937         p.cls = float_class_zero;
3938     }
3939
3940     return float128_round_pack_canonical(&p, status);
3941 }
3942
3943 float128 int64_to_float128(int64_t a, float_status *status)
3944 {
3945     FloatParts128 p;
3946
3947     parts_sint_to_float(&p, a, 0, status);
3948     return float128_round_pack_canonical(&p, status);
3949 }
3950
3951 float128 int32_to_float128(int32_t a, float_status *status)
3952 {
3953     return int64_to_float128(a, status);
3954 }
3955
3956 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3957 {
3958     FloatParts128 p;
3959
3960     parts_sint_to_float(&p, a, 0, status);
3961     return floatx80_round_pack_canonical(&p, status);
3962 }
3963
3964 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3965 {
3966     return int64_to_floatx80(a, status);
3967 }
3968
3969 /*
3970  * Unsigned Integer to floating-point conversions
3971  */
3972
3973 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3974 {
3975     FloatParts64 p;
3976
3977     parts_uint_to_float(&p, a, scale, status);
3978     return float16_round_pack_canonical(&p, status);
3979 }
3980
3981 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3982 {
3983     return uint64_to_float16_scalbn(a, scale, status);
3984 }
3985
3986 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3987 {
3988     return uint64_to_float16_scalbn(a, scale, status);
3989 }
3990
3991 float16 uint64_to_float16(uint64_t a, float_status *status)
3992 {
3993     return uint64_to_float16_scalbn(a, 0, status);
3994 }
3995
3996 float16 uint32_to_float16(uint32_t a, float_status *status)
3997 {
3998     return uint64_to_float16_scalbn(a, 0, status);
3999 }
4000
4001 float16 uint16_to_float16(uint16_t a, float_status *status)
4002 {
4003     return uint64_to_float16_scalbn(a, 0, status);
4004 }
4005
4006 float16 uint8_to_float16(uint8_t a, float_status *status)
4007 {
4008     return uint64_to_float16_scalbn(a, 0, status);
4009 }
4010
4011 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4012 {
4013     FloatParts64 p;
4014
4015     /* Without scaling, there are no overflow concerns. */
4016     if (likely(scale == 0) && can_use_fpu(status)) {
4017         union_float32 ur;
4018         ur.h = a;
4019         return ur.s;
4020     }
4021
4022     parts_uint_to_float(&p, a, scale, status);
4023     return float32_round_pack_canonical(&p, status);
4024 }
4025
4026 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4027 {
4028     return uint64_to_float32_scalbn(a, scale, status);
4029 }
4030
4031 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4032 {
4033     return uint64_to_float32_scalbn(a, scale, status);
4034 }
4035
4036 float32 uint64_to_float32(uint64_t a, float_status *status)
4037 {
4038     return uint64_to_float32_scalbn(a, 0, status);
4039 }
4040
4041 float32 uint32_to_float32(uint32_t a, float_status *status)
4042 {
4043     return uint64_to_float32_scalbn(a, 0, status);
4044 }
4045
4046 float32 uint16_to_float32(uint16_t a, float_status *status)
4047 {
4048     return uint64_to_float32_scalbn(a, 0, status);
4049 }
4050
4051 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4052 {
4053     FloatParts64 p;
4054
4055     /* Without scaling, there are no overflow concerns. */
4056     if (likely(scale == 0) && can_use_fpu(status)) {
4057         union_float64 ur;
4058         ur.h = a;
4059         return ur.s;
4060     }
4061
4062     parts_uint_to_float(&p, a, scale, status);
4063     return float64_round_pack_canonical(&p, status);
4064 }
4065
4066 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4067 {
4068     return uint64_to_float64_scalbn(a, scale, status);
4069 }
4070
4071 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4072 {
4073     return uint64_to_float64_scalbn(a, scale, status);
4074 }
4075
4076 float64 uint64_to_float64(uint64_t a, float_status *status)
4077 {
4078     return uint64_to_float64_scalbn(a, 0, status);
4079 }
4080
4081 float64 uint32_to_float64(uint32_t a, float_status *status)
4082 {
4083     return uint64_to_float64_scalbn(a, 0, status);
4084 }
4085
4086 float64 uint16_to_float64(uint16_t a, float_status *status)
4087 {
4088     return uint64_to_float64_scalbn(a, 0, status);
4089 }
4090
4091 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4092 {
4093     FloatParts64 p;
4094
4095     parts_uint_to_float(&p, a, scale, status);
4096     return bfloat16_round_pack_canonical(&p, status);
4097 }
4098
4099 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4100 {
4101     return uint64_to_bfloat16_scalbn(a, scale, status);
4102 }
4103
4104 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4105 {
4106     return uint64_to_bfloat16_scalbn(a, scale, status);
4107 }
4108
4109 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4110 {
4111     return uint64_to_bfloat16_scalbn(a, 0, status);
4112 }
4113
4114 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4115 {
4116     return uint64_to_bfloat16_scalbn(a, 0, status);
4117 }
4118
4119 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4120 {
4121     return uint64_to_bfloat16_scalbn(a, 0, status);
4122 }
4123
4124 float128 uint64_to_float128(uint64_t a, float_status *status)
4125 {
4126     FloatParts128 p;
4127
4128     parts_uint_to_float(&p, a, 0, status);
4129     return float128_round_pack_canonical(&p, status);
4130 }
4131
4132 float128 uint128_to_float128(Int128 a, float_status *status)
4133 {
4134     FloatParts128 p = { };
4135     int shift;
4136
4137     if (int128_nz(a)) {
4138         p.cls = float_class_normal;
4139
4140         shift = clz64(int128_gethi(a));
4141         if (shift == 64) {
4142             shift += clz64(int128_getlo(a));
4143         }
4144
4145         p.exp = 127 - shift;
4146         a = int128_lshift(a, shift);
4147
4148         p.frac_hi = int128_gethi(a);
4149         p.frac_lo = int128_getlo(a);
4150     } else {
4151         p.cls = float_class_zero;
4152     }
4153
4154     return float128_round_pack_canonical(&p, status);
4155 }
4156
4157 /*
4158  * Minimum and maximum
4159  */
4160
4161 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4162 {
4163     FloatParts64 pa, pb, *pr;
4164
4165     float16_unpack_canonical(&pa, a, s);
4166     float16_unpack_canonical(&pb, b, s);
4167     pr = parts_minmax(&pa, &pb, s, flags);
4168
4169     return float16_round_pack_canonical(pr, s);
4170 }
4171
4172 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4173                                 float_status *s, int flags)
4174 {
4175     FloatParts64 pa, pb, *pr;
4176
4177     bfloat16_unpack_canonical(&pa, a, s);
4178     bfloat16_unpack_canonical(&pb, b, s);
4179     pr = parts_minmax(&pa, &pb, s, flags);
4180
4181     return bfloat16_round_pack_canonical(pr, s);
4182 }
4183
4184 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4185 {
4186     FloatParts64 pa, pb, *pr;
4187
4188     float32_unpack_canonical(&pa, a, s);
4189     float32_unpack_canonical(&pb, b, s);
4190     pr = parts_minmax(&pa, &pb, s, flags);
4191
4192     return float32_round_pack_canonical(pr, s);
4193 }
4194
4195 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4196 {
4197     FloatParts64 pa, pb, *pr;
4198
4199     float64_unpack_canonical(&pa, a, s);
4200     float64_unpack_canonical(&pb, b, s);
4201     pr = parts_minmax(&pa, &pb, s, flags);
4202
4203     return float64_round_pack_canonical(pr, s);
4204 }
4205
4206 static float128 float128_minmax(float128 a, float128 b,
4207                                 float_status *s, int flags)
4208 {
4209     FloatParts128 pa, pb, *pr;
4210
4211     float128_unpack_canonical(&pa, a, s);
4212     float128_unpack_canonical(&pb, b, s);
4213     pr = parts_minmax(&pa, &pb, s, flags);
4214
4215     return float128_round_pack_canonical(pr, s);
4216 }
4217
4218 #define MINMAX_1(type, name, flags) \
4219     type type##_##name(type a, type b, float_status *s) \
4220     { return type##_minmax(a, b, s, flags); }
4221
4222 #define MINMAX_2(type) \
4223     MINMAX_1(type, max, 0)                                                \
4224     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4225     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4226     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4227     MINMAX_1(type, min, minmax_ismin)                                     \
4228     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4229     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4230     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4231
4232 MINMAX_2(float16)
4233 MINMAX_2(bfloat16)
4234 MINMAX_2(float32)
4235 MINMAX_2(float64)
4236 MINMAX_2(float128)
4237
4238 #undef MINMAX_1
4239 #undef MINMAX_2
4240
4241 /*
4242  * Floating point compare
4243  */
4244
4245 static FloatRelation QEMU_FLATTEN
4246 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4247 {
4248     FloatParts64 pa, pb;
4249
4250     float16_unpack_canonical(&pa, a, s);
4251     float16_unpack_canonical(&pb, b, s);
4252     return parts_compare(&pa, &pb, s, is_quiet);
4253 }
4254
4255 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4256 {
4257     return float16_do_compare(a, b, s, false);
4258 }
4259
4260 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4261 {
4262     return float16_do_compare(a, b, s, true);
4263 }
4264
4265 static FloatRelation QEMU_SOFTFLOAT_ATTR
4266 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4267 {
4268     FloatParts64 pa, pb;
4269
4270     float32_unpack_canonical(&pa, a, s);
4271     float32_unpack_canonical(&pb, b, s);
4272     return parts_compare(&pa, &pb, s, is_quiet);
4273 }
4274
4275 static FloatRelation QEMU_FLATTEN
4276 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4277 {
4278     union_float32 ua, ub;
4279
4280     ua.s = xa;
4281     ub.s = xb;
4282
4283     if (QEMU_NO_HARDFLOAT) {
4284         goto soft;
4285     }
4286
4287     float32_input_flush2(&ua.s, &ub.s, s);
4288     if (isgreaterequal(ua.h, ub.h)) {
4289         if (isgreater(ua.h, ub.h)) {
4290             return float_relation_greater;
4291         }
4292         return float_relation_equal;
4293     }
4294     if (likely(isless(ua.h, ub.h))) {
4295         return float_relation_less;
4296     }
4297     /*
4298      * The only condition remaining is unordered.
4299      * Fall through to set flags.
4300      */
4301  soft:
4302     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4303 }
4304
4305 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4306 {
4307     return float32_hs_compare(a, b, s, false);
4308 }
4309
4310 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4311 {
4312     return float32_hs_compare(a, b, s, true);
4313 }
4314
4315 static FloatRelation QEMU_SOFTFLOAT_ATTR
4316 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4317 {
4318     FloatParts64 pa, pb;
4319
4320     float64_unpack_canonical(&pa, a, s);
4321     float64_unpack_canonical(&pb, b, s);
4322     return parts_compare(&pa, &pb, s, is_quiet);
4323 }
4324
4325 static FloatRelation QEMU_FLATTEN
4326 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4327 {
4328     union_float64 ua, ub;
4329
4330     ua.s = xa;
4331     ub.s = xb;
4332
4333     if (QEMU_NO_HARDFLOAT) {
4334         goto soft;
4335     }
4336
4337     float64_input_flush2(&ua.s, &ub.s, s);
4338     if (isgreaterequal(ua.h, ub.h)) {
4339         if (isgreater(ua.h, ub.h)) {
4340             return float_relation_greater;
4341         }
4342         return float_relation_equal;
4343     }
4344     if (likely(isless(ua.h, ub.h))) {
4345         return float_relation_less;
4346     }
4347     /*
4348      * The only condition remaining is unordered.
4349      * Fall through to set flags.
4350      */
4351  soft:
4352     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4353 }
4354
4355 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4356 {
4357     return float64_hs_compare(a, b, s, false);
4358 }
4359
4360 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4361 {
4362     return float64_hs_compare(a, b, s, true);
4363 }
4364
4365 static FloatRelation QEMU_FLATTEN
4366 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4367 {
4368     FloatParts64 pa, pb;
4369
4370     bfloat16_unpack_canonical(&pa, a, s);
4371     bfloat16_unpack_canonical(&pb, b, s);
4372     return parts_compare(&pa, &pb, s, is_quiet);
4373 }
4374
4375 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4376 {
4377     return bfloat16_do_compare(a, b, s, false);
4378 }
4379
4380 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4381 {
4382     return bfloat16_do_compare(a, b, s, true);
4383 }
4384
4385 static FloatRelation QEMU_FLATTEN
4386 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4387 {
4388     FloatParts128 pa, pb;
4389
4390     float128_unpack_canonical(&pa, a, s);
4391     float128_unpack_canonical(&pb, b, s);
4392     return parts_compare(&pa, &pb, s, is_quiet);
4393 }
4394
4395 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4396 {
4397     return float128_do_compare(a, b, s, false);
4398 }
4399
4400 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4401 {
4402     return float128_do_compare(a, b, s, true);
4403 }
4404
4405 static FloatRelation QEMU_FLATTEN
4406 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4407 {
4408     FloatParts128 pa, pb;
4409
4410     if (!floatx80_unpack_canonical(&pa, a, s) ||
4411         !floatx80_unpack_canonical(&pb, b, s)) {
4412         return float_relation_unordered;
4413     }
4414     return parts_compare(&pa, &pb, s, is_quiet);
4415 }
4416
4417 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4418 {
4419     return floatx80_do_compare(a, b, s, false);
4420 }
4421
4422 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4423 {
4424     return floatx80_do_compare(a, b, s, true);
4425 }
4426
4427 /*
4428  * Scale by 2**N
4429  */
4430
4431 float16 float16_scalbn(float16 a, int n, float_status *status)
4432 {
4433     FloatParts64 p;
4434
4435     float16_unpack_canonical(&p, a, status);
4436     parts_scalbn(&p, n, status);
4437     return float16_round_pack_canonical(&p, status);
4438 }
4439
4440 float32 float32_scalbn(float32 a, int n, float_status *status)
4441 {
4442     FloatParts64 p;
4443
4444     float32_unpack_canonical(&p, a, status);
4445     parts_scalbn(&p, n, status);
4446     return float32_round_pack_canonical(&p, status);
4447 }
4448
4449 float64 float64_scalbn(float64 a, int n, float_status *status)
4450 {
4451     FloatParts64 p;
4452
4453     float64_unpack_canonical(&p, a, status);
4454     parts_scalbn(&p, n, status);
4455     return float64_round_pack_canonical(&p, status);
4456 }
4457
4458 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4459 {
4460     FloatParts64 p;
4461
4462     bfloat16_unpack_canonical(&p, a, status);
4463     parts_scalbn(&p, n, status);
4464     return bfloat16_round_pack_canonical(&p, status);
4465 }
4466
4467 float128 float128_scalbn(float128 a, int n, float_status *status)
4468 {
4469     FloatParts128 p;
4470
4471     float128_unpack_canonical(&p, a, status);
4472     parts_scalbn(&p, n, status);
4473     return float128_round_pack_canonical(&p, status);
4474 }
4475
4476 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4477 {
4478     FloatParts128 p;
4479
4480     if (!floatx80_unpack_canonical(&p, a, status)) {
4481         return floatx80_default_nan(status);
4482     }
4483     parts_scalbn(&p, n, status);
4484     return floatx80_round_pack_canonical(&p, status);
4485 }
4486
4487 /*
4488  * Square Root
4489  */
4490
4491 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4492 {
4493     FloatParts64 p;
4494
4495     float16_unpack_canonical(&p, a, status);
4496     parts_sqrt(&p, status, &float16_params);
4497     return float16_round_pack_canonical(&p, status);
4498 }
4499
4500 static float32 QEMU_SOFTFLOAT_ATTR
4501 soft_f32_sqrt(float32 a, float_status *status)
4502 {
4503     FloatParts64 p;
4504
4505     float32_unpack_canonical(&p, a, status);
4506     parts_sqrt(&p, status, &float32_params);
4507     return float32_round_pack_canonical(&p, status);
4508 }
4509
4510 static float64 QEMU_SOFTFLOAT_ATTR
4511 soft_f64_sqrt(float64 a, float_status *status)
4512 {
4513     FloatParts64 p;
4514
4515     float64_unpack_canonical(&p, a, status);
4516     parts_sqrt(&p, status, &float64_params);
4517     return float64_round_pack_canonical(&p, status);
4518 }
4519
4520 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4521 {
4522     union_float32 ua, ur;
4523
4524     ua.s = xa;
4525     if (unlikely(!can_use_fpu(s))) {
4526         goto soft;
4527     }
4528
4529     float32_input_flush1(&ua.s, s);
4530     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4531         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4532                        fpclassify(ua.h) == FP_ZERO) ||
4533                      signbit(ua.h))) {
4534             goto soft;
4535         }
4536     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4537                         float32_is_neg(ua.s))) {
4538         goto soft;
4539     }
4540     ur.h = sqrtf(ua.h);
4541     return ur.s;
4542
4543  soft:
4544     return soft_f32_sqrt(ua.s, s);
4545 }
4546
4547 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4548 {
4549     union_float64 ua, ur;
4550
4551     ua.s = xa;
4552     if (unlikely(!can_use_fpu(s))) {
4553         goto soft;
4554     }
4555
4556     float64_input_flush1(&ua.s, s);
4557     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4558         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4559                        fpclassify(ua.h) == FP_ZERO) ||
4560                      signbit(ua.h))) {
4561             goto soft;
4562         }
4563     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4564                         float64_is_neg(ua.s))) {
4565         goto soft;
4566     }
4567     ur.h = sqrt(ua.h);
4568     return ur.s;
4569
4570  soft:
4571     return soft_f64_sqrt(ua.s, s);
4572 }
4573
4574 float64 float64r32_sqrt(float64 a, float_status *status)
4575 {
4576     FloatParts64 p;
4577
4578     float64_unpack_canonical(&p, a, status);
4579     parts_sqrt(&p, status, &float64_params);
4580     return float64r32_round_pack_canonical(&p, status);
4581 }
4582
4583 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4584 {
4585     FloatParts64 p;
4586
4587     bfloat16_unpack_canonical(&p, a, status);
4588     parts_sqrt(&p, status, &bfloat16_params);
4589     return bfloat16_round_pack_canonical(&p, status);
4590 }
4591
4592 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4593 {
4594     FloatParts128 p;
4595
4596     float128_unpack_canonical(&p, a, status);
4597     parts_sqrt(&p, status, &float128_params);
4598     return float128_round_pack_canonical(&p, status);
4599 }
4600
4601 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4602 {
4603     FloatParts128 p;
4604
4605     if (!floatx80_unpack_canonical(&p, a, s)) {
4606         return floatx80_default_nan(s);
4607     }
4608     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4609     return floatx80_round_pack_canonical(&p, s);
4610 }
4611
4612 /*
4613  * log2
4614  */
4615 float32 float32_log2(float32 a, float_status *status)
4616 {
4617     FloatParts64 p;
4618
4619     float32_unpack_canonical(&p, a, status);
4620     parts_log2(&p, status, &float32_params);
4621     return float32_round_pack_canonical(&p, status);
4622 }
4623
4624 float64 float64_log2(float64 a, float_status *status)
4625 {
4626     FloatParts64 p;
4627
4628     float64_unpack_canonical(&p, a, status);
4629     parts_log2(&p, status, &float64_params);
4630     return float64_round_pack_canonical(&p, status);
4631 }
4632
4633 /*----------------------------------------------------------------------------
4634 | The pattern for a default generated NaN.
4635 *----------------------------------------------------------------------------*/
4636
4637 float16 float16_default_nan(float_status *status)
4638 {
4639     FloatParts64 p;
4640
4641     parts_default_nan(&p, status);
4642     p.frac >>= float16_params.frac_shift;
4643     return float16_pack_raw(&p);
4644 }
4645
4646 float32 float32_default_nan(float_status *status)
4647 {
4648     FloatParts64 p;
4649
4650     parts_default_nan(&p, status);
4651     p.frac >>= float32_params.frac_shift;
4652     return float32_pack_raw(&p);
4653 }
4654
4655 float64 float64_default_nan(float_status *status)
4656 {
4657     FloatParts64 p;
4658
4659     parts_default_nan(&p, status);
4660     p.frac >>= float64_params.frac_shift;
4661     return float64_pack_raw(&p);
4662 }
4663
4664 float128 float128_default_nan(float_status *status)
4665 {
4666     FloatParts128 p;
4667
4668     parts_default_nan(&p, status);
4669     frac_shr(&p, float128_params.frac_shift);
4670     return float128_pack_raw(&p);
4671 }
4672
4673 bfloat16 bfloat16_default_nan(float_status *status)
4674 {
4675     FloatParts64 p;
4676
4677     parts_default_nan(&p, status);
4678     p.frac >>= bfloat16_params.frac_shift;
4679     return bfloat16_pack_raw(&p);
4680 }
4681
4682 /*----------------------------------------------------------------------------
4683 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4684 *----------------------------------------------------------------------------*/
4685
4686 float16 float16_silence_nan(float16 a, float_status *status)
4687 {
4688     FloatParts64 p;
4689
4690     float16_unpack_raw(&p, a);
4691     p.frac <<= float16_params.frac_shift;
4692     parts_silence_nan(&p, status);
4693     p.frac >>= float16_params.frac_shift;
4694     return float16_pack_raw(&p);
4695 }
4696
4697 float32 float32_silence_nan(float32 a, float_status *status)
4698 {
4699     FloatParts64 p;
4700
4701     float32_unpack_raw(&p, a);
4702     p.frac <<= float32_params.frac_shift;
4703     parts_silence_nan(&p, status);
4704     p.frac >>= float32_params.frac_shift;
4705     return float32_pack_raw(&p);
4706 }
4707
4708 float64 float64_silence_nan(float64 a, float_status *status)
4709 {
4710     FloatParts64 p;
4711
4712     float64_unpack_raw(&p, a);
4713     p.frac <<= float64_params.frac_shift;
4714     parts_silence_nan(&p, status);
4715     p.frac >>= float64_params.frac_shift;
4716     return float64_pack_raw(&p);
4717 }
4718
4719 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4720 {
4721     FloatParts64 p;
4722
4723     bfloat16_unpack_raw(&p, a);
4724     p.frac <<= bfloat16_params.frac_shift;
4725     parts_silence_nan(&p, status);
4726     p.frac >>= bfloat16_params.frac_shift;
4727     return bfloat16_pack_raw(&p);
4728 }
4729
4730 float128 float128_silence_nan(float128 a, float_status *status)
4731 {
4732     FloatParts128 p;
4733
4734     float128_unpack_raw(&p, a);
4735     frac_shl(&p, float128_params.frac_shift);
4736     parts_silence_nan(&p, status);
4737     frac_shr(&p, float128_params.frac_shift);
4738     return float128_pack_raw(&p);
4739 }
4740
4741 /*----------------------------------------------------------------------------
4742 | If `a' is denormal and we are in flush-to-zero mode then set the
4743 | input-denormal exception and return zero. Otherwise just return the value.
4744 *----------------------------------------------------------------------------*/
4745
4746 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4747 {
4748     if (p.exp == 0 && p.frac != 0) {
4749         float_raise(float_flag_input_denormal, status);
4750         return true;
4751     }
4752
4753     return false;
4754 }
4755
4756 float16 float16_squash_input_denormal(float16 a, float_status *status)
4757 {
4758     if (status->flush_inputs_to_zero) {
4759         FloatParts64 p;
4760
4761         float16_unpack_raw(&p, a);
4762         if (parts_squash_denormal(p, status)) {
4763             return float16_set_sign(float16_zero, p.sign);
4764         }
4765     }
4766     return a;
4767 }
4768
4769 float32 float32_squash_input_denormal(float32 a, float_status *status)
4770 {
4771     if (status->flush_inputs_to_zero) {
4772         FloatParts64 p;
4773
4774         float32_unpack_raw(&p, a);
4775         if (parts_squash_denormal(p, status)) {
4776             return float32_set_sign(float32_zero, p.sign);
4777         }
4778     }
4779     return a;
4780 }
4781
4782 float64 float64_squash_input_denormal(float64 a, float_status *status)
4783 {
4784     if (status->flush_inputs_to_zero) {
4785         FloatParts64 p;
4786
4787         float64_unpack_raw(&p, a);
4788         if (parts_squash_denormal(p, status)) {
4789             return float64_set_sign(float64_zero, p.sign);
4790         }
4791     }
4792     return a;
4793 }
4794
4795 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4796 {
4797     if (status->flush_inputs_to_zero) {
4798         FloatParts64 p;
4799
4800         bfloat16_unpack_raw(&p, a);
4801         if (parts_squash_denormal(p, status)) {
4802             return bfloat16_set_sign(bfloat16_zero, p.sign);
4803         }
4804     }
4805     return a;
4806 }
4807
4808 /*----------------------------------------------------------------------------
4809 | Normalizes the subnormal extended double-precision floating-point value
4810 | represented by the denormalized significand `aSig'.  The normalized exponent
4811 | and significand are stored at the locations pointed to by `zExpPtr' and
4812 | `zSigPtr', respectively.
4813 *----------------------------------------------------------------------------*/
4814
4815 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4816                                 uint64_t *zSigPtr)
4817 {
4818     int8_t shiftCount;
4819
4820     shiftCount = clz64(aSig);
4821     *zSigPtr = aSig<<shiftCount;
4822     *zExpPtr = 1 - shiftCount;
4823 }
4824
4825 /*----------------------------------------------------------------------------
4826 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4827 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4828 | and returns the proper extended double-precision floating-point value
4829 | corresponding to the abstract input.  Ordinarily, the abstract value is
4830 | rounded and packed into the extended double-precision format, with the
4831 | inexact exception raised if the abstract input cannot be represented
4832 | exactly.  However, if the abstract value is too large, the overflow and
4833 | inexact exceptions are raised and an infinity or maximal finite value is
4834 | returned.  If the abstract value is too small, the input value is rounded to
4835 | a subnormal number, and the underflow and inexact exceptions are raised if
4836 | the abstract input cannot be represented exactly as a subnormal extended
4837 | double-precision floating-point number.
4838 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4839 | the result is rounded to the same number of bits as single or double
4840 | precision, respectively.  Otherwise, the result is rounded to the full
4841 | precision of the extended double-precision format.
4842 |     The input significand must be normalized or smaller.  If the input
4843 | significand is not normalized, `zExp' must be 0; in that case, the result
4844 | returned is a subnormal number, and it must not require rounding.  The
4845 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4846 | Floating-Point Arithmetic.
4847 *----------------------------------------------------------------------------*/
4848
4849 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4850                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4851                               float_status *status)
4852 {
4853     FloatRoundMode roundingMode;
4854     bool roundNearestEven, increment, isTiny;
4855     int64_t roundIncrement, roundMask, roundBits;
4856
4857     roundingMode = status->float_rounding_mode;
4858     roundNearestEven = ( roundingMode == float_round_nearest_even );
4859     switch (roundingPrecision) {
4860     case floatx80_precision_x:
4861         goto precision80;
4862     case floatx80_precision_d:
4863         roundIncrement = UINT64_C(0x0000000000000400);
4864         roundMask = UINT64_C(0x00000000000007FF);
4865         break;
4866     case floatx80_precision_s:
4867         roundIncrement = UINT64_C(0x0000008000000000);
4868         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4869         break;
4870     default:
4871         g_assert_not_reached();
4872     }
4873     zSig0 |= ( zSig1 != 0 );
4874     switch (roundingMode) {
4875     case float_round_nearest_even:
4876     case float_round_ties_away:
4877         break;
4878     case float_round_to_zero:
4879         roundIncrement = 0;
4880         break;
4881     case float_round_up:
4882         roundIncrement = zSign ? 0 : roundMask;
4883         break;
4884     case float_round_down:
4885         roundIncrement = zSign ? roundMask : 0;
4886         break;
4887     default:
4888         abort();
4889     }
4890     roundBits = zSig0 & roundMask;
4891     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4892         if (    ( 0x7FFE < zExp )
4893              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4894            ) {
4895             goto overflow;
4896         }
4897         if ( zExp <= 0 ) {
4898             if (status->flush_to_zero) {
4899                 float_raise(float_flag_output_denormal, status);
4900                 return packFloatx80(zSign, 0, 0);
4901             }
4902             isTiny = status->tininess_before_rounding
4903                   || (zExp < 0 )
4904                   || (zSig0 <= zSig0 + roundIncrement);
4905             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4906             zExp = 0;
4907             roundBits = zSig0 & roundMask;
4908             if (isTiny && roundBits) {
4909                 float_raise(float_flag_underflow, status);
4910             }
4911             if (roundBits) {
4912                 float_raise(float_flag_inexact, status);
4913             }
4914             zSig0 += roundIncrement;
4915             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4916             roundIncrement = roundMask + 1;
4917             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4918                 roundMask |= roundIncrement;
4919             }
4920             zSig0 &= ~ roundMask;
4921             return packFloatx80( zSign, zExp, zSig0 );
4922         }
4923     }
4924     if (roundBits) {
4925         float_raise(float_flag_inexact, status);
4926     }
4927     zSig0 += roundIncrement;
4928     if ( zSig0 < roundIncrement ) {
4929         ++zExp;
4930         zSig0 = UINT64_C(0x8000000000000000);
4931     }
4932     roundIncrement = roundMask + 1;
4933     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4934         roundMask |= roundIncrement;
4935     }
4936     zSig0 &= ~ roundMask;
4937     if ( zSig0 == 0 ) zExp = 0;
4938     return packFloatx80( zSign, zExp, zSig0 );
4939  precision80:
4940     switch (roundingMode) {
4941     case float_round_nearest_even:
4942     case float_round_ties_away:
4943         increment = ((int64_t)zSig1 < 0);
4944         break;
4945     case float_round_to_zero:
4946         increment = 0;
4947         break;
4948     case float_round_up:
4949         increment = !zSign && zSig1;
4950         break;
4951     case float_round_down:
4952         increment = zSign && zSig1;
4953         break;
4954     default:
4955         abort();
4956     }
4957     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4958         if (    ( 0x7FFE < zExp )
4959              || (    ( zExp == 0x7FFE )
4960                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4961                   && increment
4962                 )
4963            ) {
4964             roundMask = 0;
4965  overflow:
4966             float_raise(float_flag_overflow | float_flag_inexact, status);
4967             if (    ( roundingMode == float_round_to_zero )
4968                  || ( zSign && ( roundingMode == float_round_up ) )
4969                  || ( ! zSign && ( roundingMode == float_round_down ) )
4970                ) {
4971                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4972             }
4973             return packFloatx80(zSign,
4974                                 floatx80_infinity_high,
4975                                 floatx80_infinity_low);
4976         }
4977         if ( zExp <= 0 ) {
4978             isTiny = status->tininess_before_rounding
4979                   || (zExp < 0)
4980                   || !increment
4981                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4982             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4983             zExp = 0;
4984             if (isTiny && zSig1) {
4985                 float_raise(float_flag_underflow, status);
4986             }
4987             if (zSig1) {
4988                 float_raise(float_flag_inexact, status);
4989             }
4990             switch (roundingMode) {
4991             case float_round_nearest_even:
4992             case float_round_ties_away:
4993                 increment = ((int64_t)zSig1 < 0);
4994                 break;
4995             case float_round_to_zero:
4996                 increment = 0;
4997                 break;
4998             case float_round_up:
4999                 increment = !zSign && zSig1;
5000                 break;
5001             case float_round_down:
5002                 increment = zSign && zSig1;
5003                 break;
5004             default:
5005                 abort();
5006             }
5007             if ( increment ) {
5008                 ++zSig0;
5009                 if (!(zSig1 << 1) && roundNearestEven) {
5010                     zSig0 &= ~1;
5011                 }
5012                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5013             }
5014             return packFloatx80( zSign, zExp, zSig0 );
5015         }
5016     }
5017     if (zSig1) {
5018         float_raise(float_flag_inexact, status);
5019     }
5020     if ( increment ) {
5021         ++zSig0;
5022         if ( zSig0 == 0 ) {
5023             ++zExp;
5024             zSig0 = UINT64_C(0x8000000000000000);
5025         }
5026         else {
5027             if (!(zSig1 << 1) && roundNearestEven) {
5028                 zSig0 &= ~1;
5029             }
5030         }
5031     }
5032     else {
5033         if ( zSig0 == 0 ) zExp = 0;
5034     }
5035     return packFloatx80( zSign, zExp, zSig0 );
5036
5037 }
5038
5039 /*----------------------------------------------------------------------------
5040 | Takes an abstract floating-point value having sign `zSign', exponent
5041 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5042 | and returns the proper extended double-precision floating-point value
5043 | corresponding to the abstract input.  This routine is just like
5044 | `roundAndPackFloatx80' except that the input significand does not have to be
5045 | normalized.
5046 *----------------------------------------------------------------------------*/
5047
5048 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5049                                        bool zSign, int32_t zExp,
5050                                        uint64_t zSig0, uint64_t zSig1,
5051                                        float_status *status)
5052 {
5053     int8_t shiftCount;
5054
5055     if ( zSig0 == 0 ) {
5056         zSig0 = zSig1;
5057         zSig1 = 0;
5058         zExp -= 64;
5059     }
5060     shiftCount = clz64(zSig0);
5061     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5062     zExp -= shiftCount;
5063     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5064                                 zSig0, zSig1, status);
5065
5066 }
5067
5068 /*----------------------------------------------------------------------------
5069 | Returns the binary exponential of the single-precision floating-point value
5070 | `a'. The operation is performed according to the IEC/IEEE Standard for
5071 | Binary Floating-Point Arithmetic.
5072 |
5073 | Uses the following identities:
5074 |
5075 | 1. -------------------------------------------------------------------------
5076 |      x    x*ln(2)
5077 |     2  = e
5078 |
5079 | 2. -------------------------------------------------------------------------
5080 |                      2     3     4     5           n
5081 |      x        x     x     x     x     x           x
5082 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5083 |               1!    2!    3!    4!    5!          n!
5084 *----------------------------------------------------------------------------*/
5085
5086 static const float64 float32_exp2_coefficients[15] =
5087 {
5088     const_float64( 0x3ff0000000000000ll ), /*  1 */
5089     const_float64( 0x3fe0000000000000ll ), /*  2 */
5090     const_float64( 0x3fc5555555555555ll ), /*  3 */
5091     const_float64( 0x3fa5555555555555ll ), /*  4 */
5092     const_float64( 0x3f81111111111111ll ), /*  5 */
5093     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5094     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5095     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5096     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5097     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5098     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5099     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5100     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5101     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5102     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5103 };
5104
5105 float32 float32_exp2(float32 a, float_status *status)
5106 {
5107     FloatParts64 xp, xnp, tp, rp;
5108     int i;
5109
5110     float32_unpack_canonical(&xp, a, status);
5111     if (unlikely(xp.cls != float_class_normal)) {
5112         switch (xp.cls) {
5113         case float_class_snan:
5114         case float_class_qnan:
5115             parts_return_nan(&xp, status);
5116             return float32_round_pack_canonical(&xp, status);
5117         case float_class_inf:
5118             return xp.sign ? float32_zero : a;
5119         case float_class_zero:
5120             return float32_one;
5121         default:
5122             break;
5123         }
5124         g_assert_not_reached();
5125     }
5126
5127     float_raise(float_flag_inexact, status);
5128
5129     float64_unpack_canonical(&tp, float64_ln2, status);
5130     xp = *parts_mul(&xp, &tp, status);
5131     xnp = xp;
5132
5133     float64_unpack_canonical(&rp, float64_one, status);
5134     for (i = 0 ; i < 15 ; i++) {
5135         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5136         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5137         xnp = *parts_mul(&xnp, &xp, status);
5138     }
5139
5140     return float32_round_pack_canonical(&rp, status);
5141 }
5142
5143 /*----------------------------------------------------------------------------
5144 | Rounds the extended double-precision floating-point value `a'
5145 | to the precision provided by floatx80_rounding_precision and returns the
5146 | result as an extended double-precision floating-point value.
5147 | The operation is performed according to the IEC/IEEE Standard for Binary
5148 | Floating-Point Arithmetic.
5149 *----------------------------------------------------------------------------*/
5150
5151 floatx80 floatx80_round(floatx80 a, float_status *status)
5152 {
5153     FloatParts128 p;
5154
5155     if (!floatx80_unpack_canonical(&p, a, status)) {
5156         return floatx80_default_nan(status);
5157     }
5158     return floatx80_round_pack_canonical(&p, status);
5159 }
5160
5161 static void __attribute__((constructor)) softfloat_init(void)
5162 {
5163     union_float64 ua, ub, uc, ur;
5164
5165     if (QEMU_NO_HARDFLOAT) {
5166         return;
5167     }
5168     /*
5169      * Test that the host's FMA is not obviously broken. For example,
5170      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5171      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5172      */
5173     ua.s = 0x0020000000000001ULL;
5174     ub.s = 0x3ca0000000000000ULL;
5175     uc.s = 0x0020000000000000ULL;
5176     ur.h = fma(ua.h, ub.h, uc.h);
5177     if (ur.s != 0x0020000000000001ULL) {
5178         force_soft_fma = true;
5179     }
5180 }