fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439     /*
 440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441      * operations.
 442      */
 443     minmax_isnumber = 8,
 444 };
 445
 446 /* Simple helpers for checking if, or what kind of, NaN we have */
 447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448 {
 449     return unlikely(c >= float_class_qnan);
 450 }
 451
 452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453 {
 454     return c == float_class_snan;
 455 }
 456
 457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458 {
 459     return c == float_class_qnan;
 460 }
 461
 462 /*
 463  * Structure holding all of the decomposed parts of a float.
 464  * The exponent is unbiased and the fraction is normalized.
 465  *
 466  * The fraction words are stored in big-endian word ordering,
 467  * so that truncation from a larger format to a smaller format
 468  * can be done simply by ignoring subsequent elements.
 469  */
 470
 471 typedef struct {
 472     FloatClass cls;
 473     bool sign;
 474     int32_t exp;
 475     union {
 476         /* Routines that know the structure may reference the singular name. */
 477         uint64_t frac;
 478         /*
 479          * Routines expanded with multiple structures reference "hi" and "lo"
 480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481          * both the same word and aliased here.
 482          */
 483         uint64_t frac_hi;
 484         uint64_t frac_lo;
 485     };
 486 } FloatParts64;
 487
 488 typedef struct {
 489     FloatClass cls;
 490     bool sign;
 491     int32_t exp;
 492     uint64_t frac_hi;
 493     uint64_t frac_lo;
 494 } FloatParts128;
 495
 496 typedef struct {
 497     FloatClass cls;
 498     bool sign;
 499     int32_t exp;
 500     uint64_t frac_hi;
 501     uint64_t frac_hm;  /* high-middle */
 502     uint64_t frac_lm;  /* low-middle */
 503     uint64_t frac_lo;
 504 } FloatParts256;
 505
 506 /* These apply to the most significant word of each FloatPartsN. */
 507 #define DECOMPOSED_BINARY_POINT    63
 508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510 /* Structure holding all of the relevant parameters for a format.
 511  *   exp_size: the size of the exponent field
 512  *   exp_bias: the offset applied to the exponent field
 513  *   exp_max: the maximum normalised exponent
 514  *   frac_size: the size of the fraction field
 515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516  * The following are computed based the size of fraction
 517  *   round_mask: bits below lsb which must be rounded
 518  * The following optional modifiers are available:
 519  *   arm_althp: handle ARM Alternative Half Precision
 520  */
 521 typedef struct {
 522     int exp_size;
 523     int exp_bias;
 524     int exp_max;
 525     int frac_size;
 526     int frac_shift;
 527     bool arm_althp;
 528     uint64_t round_mask;
 529 } FloatFmt;
 530
 531 /* Expand fields based on the size of exponent and fraction */
 532 #define FLOAT_PARAMS_(E)                                \
 533     .exp_size       = E,                                \
 534     .exp_bias       = ((1 << E) - 1) >> 1,              \
 535     .exp_max        = (1 << E) - 1
 536
 537 #define FLOAT_PARAMS(E, F)                              \
 538     FLOAT_PARAMS_(E),                                   \
 539     .frac_size      = F,                                \
 540     .frac_shift     = (-F - 1) & 63,                    \
 541     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 542
 543 static const FloatFmt float16_params = {
 544     FLOAT_PARAMS(5, 10)
 545 };
 546
 547 static const FloatFmt float16_params_ahp = {
 548     FLOAT_PARAMS(5, 10),
 549     .arm_althp = true
 550 };
 551
 552 static const FloatFmt bfloat16_params = {
 553     FLOAT_PARAMS(8, 7)
 554 };
 555
 556 static const FloatFmt float32_params = {
 557     FLOAT_PARAMS(8, 23)
 558 };
 559
 560 static const FloatFmt float64_params = {
 561     FLOAT_PARAMS(11, 52)
 562 };
 563
 564 static const FloatFmt float128_params = {
 565     FLOAT_PARAMS(15, 112)
 566 };
 567
 568 #define FLOATX80_PARAMS(R)              \
 569     FLOAT_PARAMS_(15),                  \
 570     .frac_size = R == 64 ? 63 : R,      \
 571     .frac_shift = 0,                    \
 572     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 573
 574 static const FloatFmt floatx80_params[3] = {
 575     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 576     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 577     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 578 };
 579
 580 /* Unpack a float to parts, but do not canonicalize.  */
 581 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 582 {
 583     const int f_size = fmt->frac_size;
 584     const int e_size = fmt->exp_size;
 585
 586     *r = (FloatParts64) {
 587         .cls = float_class_unclassified,
 588         .sign = extract64(raw, f_size + e_size, 1),
 589         .exp = extract64(raw, f_size, e_size),
 590         .frac = extract64(raw, 0, f_size)
 591     };
 592 }
 593
 594 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 595 {
 596     unpack_raw64(p, &float16_params, f);
 597 }
 598
 599 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 600 {
 601     unpack_raw64(p, &bfloat16_params, f);
 602 }
 603
 604 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 605 {
 606     unpack_raw64(p, &float32_params, f);
 607 }
 608
 609 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 610 {
 611     unpack_raw64(p, &float64_params, f);
 612 }
 613
 614 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 615 {
 616     *p = (FloatParts128) {
 617         .cls = float_class_unclassified,
 618         .sign = extract32(f.high, 15, 1),
 619         .exp = extract32(f.high, 0, 15),
 620         .frac_hi = f.low
 621     };
 622 }
 623
 624 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 625 {
 626     const int f_size = float128_params.frac_size - 64;
 627     const int e_size = float128_params.exp_size;
 628
 629     *p = (FloatParts128) {
 630         .cls = float_class_unclassified,
 631         .sign = extract64(f.high, f_size + e_size, 1),
 632         .exp = extract64(f.high, f_size, e_size),
 633         .frac_hi = extract64(f.high, 0, f_size),
 634         .frac_lo = f.low,
 635     };
 636 }
 637
 638 /* Pack a float from parts, but do not canonicalize.  */
 639 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 640 {
 641     const int f_size = fmt->frac_size;
 642     const int e_size = fmt->exp_size;
 643     uint64_t ret;
 644
 645     ret = (uint64_t)p->sign << (f_size + e_size);
 646     ret = deposit64(ret, f_size, e_size, p->exp);
 647     ret = deposit64(ret, 0, f_size, p->frac);
 648     return ret;
 649 }
 650
 651 static inline float16 float16_pack_raw(const FloatParts64 *p)
 652 {
 653     return make_float16(pack_raw64(p, &float16_params));
 654 }
 655
 656 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 657 {
 658     return pack_raw64(p, &bfloat16_params);
 659 }
 660
 661 static inline float32 float32_pack_raw(const FloatParts64 *p)
 662 {
 663     return make_float32(pack_raw64(p, &float32_params));
 664 }
 665
 666 static inline float64 float64_pack_raw(const FloatParts64 *p)
 667 {
 668     return make_float64(pack_raw64(p, &float64_params));
 669 }
 670
 671 static float128 float128_pack_raw(const FloatParts128 *p)
 672 {
 673     const int f_size = float128_params.frac_size - 64;
 674     const int e_size = float128_params.exp_size;
 675     uint64_t hi;
 676
 677     hi = (uint64_t)p->sign << (f_size + e_size);
 678     hi = deposit64(hi, f_size, e_size, p->exp);
 679     hi = deposit64(hi, 0, f_size, p->frac_hi);
 680     return make_float128(hi, p->frac_lo);
 681 }
 682
 683 /*----------------------------------------------------------------------------
 684 | Functions and definitions to determine:  (1) whether tininess for underflow
 685 | is detected before or after rounding by default, (2) what (if anything)
 686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 688 | are propagated from function inputs to output.  These details are target-
 689 | specific.
 690 *----------------------------------------------------------------------------*/
 691 #include "softfloat-specialize.c.inc"
 692
 693 #define PARTS_GENERIC_64_128(NAME, P) \
 694     _Generic((P), FloatParts64 *: parts64_##NAME, \
 695                   FloatParts128 *: parts128_##NAME)
 696
 697 #define PARTS_GENERIC_64_128_256(NAME, P) \
 698     _Generic((P), FloatParts64 *: parts64_##NAME, \
 699                   FloatParts128 *: parts128_##NAME, \
 700                   FloatParts256 *: parts256_##NAME)
 701
 702 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 703 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 704
 705 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 706 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 707
 708 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 709
 710 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 711                                       float_status *s);
 712 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 713                                         float_status *s);
 714
 715 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 716
 717 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 718                                              FloatParts64 *c, float_status *s,
 719                                              int ab_mask, int abc_mask);
 720 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 721                                                FloatParts128 *b,
 722                                                FloatParts128 *c,
 723                                                float_status *s,
 724                                                int ab_mask, int abc_mask);
 725
 726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 727     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 728
 729 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 730                                  const FloatFmt *fmt);
 731 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 732                                   const FloatFmt *fmt);
 733
 734 #define parts_canonicalize(A, S, F) \
 735     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 736
 737 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 738                                    const FloatFmt *fmt);
 739 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 740                                     const FloatFmt *fmt);
 741
 742 #define parts_uncanon_normal(A, S, F) \
 743     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 744
 745 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 746                             const FloatFmt *fmt);
 747 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 748                              const FloatFmt *fmt);
 749
 750 #define parts_uncanon(A, S, F) \
 751     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 752
 753 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 754 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 755 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757 #define parts_add_normal(A, B) \
 758     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 759
 760 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 761 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 762 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 763
 764 #define parts_sub_normal(A, B) \
 765     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 766
 767 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 768                                     float_status *s, bool subtract);
 769 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 770                                       float_status *s, bool subtract);
 771
 772 #define parts_addsub(A, B, S, Z) \
 773     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 774
 775 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 776                                  float_status *s);
 777 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 778                                    float_status *s);
 779
 780 #define parts_mul(A, B, S) \
 781     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 782
 783 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 784                                     FloatParts64 *c, int flags,
 785                                     float_status *s);
 786 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 787                                       FloatParts128 *c, int flags,
 788                                       float_status *s);
 789
 790 #define parts_muladd(A, B, C, Z, S) \
 791     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 792
 793 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 794                                  float_status *s);
 795 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 796                                    float_status *s);
 797
 798 #define parts_div(A, B, S) \
 799     PARTS_GENERIC_64_128(div, A)(A, B, S)
 800
 801 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 802                                     uint64_t *mod_quot, float_status *s);
 803 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 804                                       uint64_t *mod_quot, float_status *s);
 805
 806 #define parts_modrem(A, B, Q, S) \
 807     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 808
 809 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 810 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 811
 812 #define parts_sqrt(A, S, F) \
 813     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 814
 815 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 816                                         int scale, int frac_size);
 817 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 818                                          int scale, int frac_size);
 819
 820 #define parts_round_to_int_normal(A, R, C, F) \
 821     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 822
 823 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 824                                  int scale, float_status *s,
 825                                  const FloatFmt *fmt);
 826 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 827                                   int scale, float_status *s,
 828                                   const FloatFmt *fmt);
 829
 830 #define parts_round_to_int(A, R, C, S, F) \
 831     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 832
 833 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 834                                      int scale, int64_t min, int64_t max,
 835                                      float_status *s);
 836 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 837                                      int scale, int64_t min, int64_t max,
 838                                      float_status *s);
 839
 840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 841     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 842
 843 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 844                                       int scale, uint64_t max,
 845                                       float_status *s);
 846 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 847                                        int scale, uint64_t max,
 848                                        float_status *s);
 849
 850 #define parts_float_to_uint(P, R, Z, M, S) \
 851     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 852
 853 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 854                                   int scale, float_status *s);
 855 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 856                                    int scale, float_status *s);
 857
 858 #define parts_sint_to_float(P, I, Z, S) \
 859     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 860
 861 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 862                                   int scale, float_status *s);
 863 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 864                                    int scale, float_status *s);
 865
 866 #define parts_uint_to_float(P, I, Z, S) \
 867     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 868
 869 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 870                                     float_status *s, int flags);
 871 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 872                                       float_status *s, int flags);
 873
 874 #define parts_minmax(A, B, S, F) \
 875     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 876
 877 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 878                            float_status *s, bool q);
 879 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 880                             float_status *s, bool q);
 881
 882 #define parts_compare(A, B, S, Q) \
 883     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 884
 885 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 886 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 887
 888 #define parts_scalbn(A, N, S) \
 889     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 890
 891 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 892 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 893
 894 #define parts_log2(A, S, F) \
 895     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 896
 897 /*
 898  * Helper functions for softfloat-parts.c.inc, per-size operations.
 899  */
 900
 901 #define FRAC_GENERIC_64_128(NAME, P) \
 902     _Generic((P), FloatParts64 *: frac64_##NAME, \
 903                   FloatParts128 *: frac128_##NAME)
 904
 905 #define FRAC_GENERIC_64_128_256(NAME, P) \
 906     _Generic((P), FloatParts64 *: frac64_##NAME, \
 907                   FloatParts128 *: frac128_##NAME, \
 908                   FloatParts256 *: frac256_##NAME)
 909
 910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911 {
 912     return uadd64_overflow(a->frac, b->frac, &r->frac);
 913 }
 914
 915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916 {
 917     bool c = 0;
 918     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920     return c;
 921 }
 922
 923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924 {
 925     bool c = 0;
 926     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930     return c;
 931 }
 932
 933 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936 {
 937     return uadd64_overflow(a->frac, c, &r->frac);
 938 }
 939
 940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941 {
 942     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944 }
 945
 946 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948 static void frac64_allones(FloatParts64 *a)
 949 {
 950     a->frac = -1;
 951 }
 952
 953 static void frac128_allones(FloatParts128 *a)
 954 {
 955     a->frac_hi = a->frac_lo = -1;
 956 }
 957
 958 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961 {
 962     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 963 }
 964
 965 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 966 {
 967     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 968     if (ta == tb) {
 969         ta = a->frac_lo, tb = b->frac_lo;
 970         if (ta == tb) {
 971             return 0;
 972         }
 973     }
 974     return ta < tb ? -1 : 1;
 975 }
 976
 977 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 978
 979 static void frac64_clear(FloatParts64 *a)
 980 {
 981     a->frac = 0;
 982 }
 983
 984 static void frac128_clear(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = 0;
 987 }
 988
 989 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 990
 991 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     uint64_t n1, n0, r, q;
 994     bool ret;
 995
 996     /*
 997      * We want a 2*N / N-bit division to produce exactly an N-bit
 998      * result, so that we do not lose any precision and so that we
 999      * do not have to renormalize afterward.  If A.frac < B.frac,
1000      * then division would produce an (N-1)-bit result; shift A left
1001      * by one to produce the an N-bit result, and return true to
1002      * decrement the exponent to match.
1003      *
1004      * The udiv_qrnnd algorithm that we're using requires normalization,
1005      * i.e. the msb of the denominator must be set, which is already true.
1006      */
1007     ret = a->frac < b->frac;
1008     if (ret) {
1009         n0 = a->frac;
1010         n1 = 0;
1011     } else {
1012         n0 = a->frac >> 1;
1013         n1 = a->frac << 63;
1014     }
1015     q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017     /* Set lsb if there is a remainder, to set inexact. */
1018     a->frac = q | (r != 0);
1019
1020     return ret;
1021 }
1022
1023 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024 {
1025     uint64_t q0, q1, a0, a1, b0, b1;
1026     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027     bool ret = false;
1028
1029     a0 = a->frac_hi, a1 = a->frac_lo;
1030     b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032     ret = lt128(a0, a1, b0, b1);
1033     if (!ret) {
1034         a1 = shr_double(a0, a1, 1);
1035         a0 = a0 >> 1;
1036     }
1037
1038     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039     q0 = estimateDiv128To64(a0, a1, b0);
1040
1041     /*
1042      * Estimate is high because B1 was not included (unless B1 == 0).
1043      * Reduce quotient and increase remainder until remainder is non-negative.
1044      * This loop will execute 0 to 2 times.
1045      */
1046     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048     while (r0 != 0) {
1049         q0--;
1050         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051     }
1052
1053     /* Repeat using the remainder, producing a second word of quotient. */
1054     q1 = estimateDiv128To64(r1, r2, b0);
1055     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057     while (r1 != 0) {
1058         q1--;
1059         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060     }
1061
1062     /* Any remainder indicates inexact; set sticky bit. */
1063     q1 |= (r2 | r3) != 0;
1064
1065     a->frac_hi = q0;
1066     a->frac_lo = q1;
1067     return ret;
1068 }
1069
1070 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071
1072 static bool frac64_eqz(FloatParts64 *a)
1073 {
1074     return a->frac == 0;
1075 }
1076
1077 static bool frac128_eqz(FloatParts128 *a)
1078 {
1079     return (a->frac_hi | a->frac_lo) == 0;
1080 }
1081
1082 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083
1084 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085 {
1086     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087 }
1088
1089 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090 {
1091     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093 }
1094
1095 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
1097 static void frac64_neg(FloatParts64 *a)
1098 {
1099     a->frac = -a->frac;
1100 }
1101
1102 static void frac128_neg(FloatParts128 *a)
1103 {
1104     bool c = 0;
1105     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 static void frac256_neg(FloatParts256 *a)
1110 {
1111     bool c = 0;
1112     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116 }
1117
1118 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119
1120 static int frac64_normalize(FloatParts64 *a)
1121 {
1122     if (a->frac) {
1123         int shift = clz64(a->frac);
1124         a->frac <<= shift;
1125         return shift;
1126     }
1127     return 64;
1128 }
1129
1130 static int frac128_normalize(FloatParts128 *a)
1131 {
1132     if (a->frac_hi) {
1133         int shl = clz64(a->frac_hi);
1134         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135         a->frac_lo <<= shl;
1136         return shl;
1137     } else if (a->frac_lo) {
1138         int shl = clz64(a->frac_lo);
1139         a->frac_hi = a->frac_lo << shl;
1140         a->frac_lo = 0;
1141         return shl + 64;
1142     }
1143     return 128;
1144 }
1145
1146 static int frac256_normalize(FloatParts256 *a)
1147 {
1148     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150     int ret, shl;
1151
1152     if (likely(a0)) {
1153         shl = clz64(a0);
1154         if (shl == 0) {
1155             return 0;
1156         }
1157         ret = shl;
1158     } else {
1159         if (a1) {
1160             ret = 64;
1161             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162         } else if (a2) {
1163             ret = 128;
1164             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165         } else if (a3) {
1166             ret = 192;
1167             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168         } else {
1169             ret = 256;
1170             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171             goto done;
1172         }
1173         shl = clz64(a0);
1174         if (shl == 0) {
1175             goto done;
1176         }
1177         ret += shl;
1178     }
1179
1180     a0 = shl_double(a0, a1, shl);
1181     a1 = shl_double(a1, a2, shl);
1182     a2 = shl_double(a2, a3, shl);
1183     a3 <<= shl;
1184
1185  done:
1186     a->frac_hi = a0;
1187     a->frac_hm = a1;
1188     a->frac_lm = a2;
1189     a->frac_lo = a3;
1190     return ret;
1191 }
1192
1193 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194
1195 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196 {
1197     uint64_t a0, a1, b0, t0, t1, q, quot;
1198     int exp_diff = a->exp - b->exp;
1199     int shift;
1200
1201     a0 = a->frac;
1202     a1 = 0;
1203
1204     if (exp_diff < -1) {
1205         if (mod_quot) {
1206             *mod_quot = 0;
1207         }
1208         return;
1209     }
1210     if (exp_diff == -1) {
1211         a0 >>= 1;
1212         exp_diff = 0;
1213     }
1214
1215     b0 = b->frac;
1216     quot = q = b0 <= a0;
1217     if (q) {
1218         a0 -= b0;
1219     }
1220
1221     exp_diff -= 64;
1222     while (exp_diff > 0) {
1223         q = estimateDiv128To64(a0, a1, b0);
1224         q = q > 2 ? q - 2 : 0;
1225         mul64To128(b0, q, &t0, &t1);
1226         sub128(a0, a1, t0, t1, &a0, &a1);
1227         shortShift128Left(a0, a1, 62, &a0, &a1);
1228         exp_diff -= 62;
1229         quot = (quot << 62) + q;
1230     }
1231
1232     exp_diff += 64;
1233     if (exp_diff > 0) {
1234         q = estimateDiv128To64(a0, a1, b0);
1235         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237         sub128(a0, a1, t0, t1, &a0, &a1);
1238         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239         while (le128(t0, t1, a0, a1)) {
1240             ++q;
1241             sub128(a0, a1, t0, t1, &a0, &a1);
1242         }
1243         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244     } else {
1245         t0 = b0;
1246         t1 = 0;
1247     }
1248
1249     if (mod_quot) {
1250         *mod_quot = quot;
1251     } else {
1252         sub128(t0, t1, a0, a1, &t0, &t1);
1253         if (lt128(t0, t1, a0, a1) ||
1254             (eq128(t0, t1, a0, a1) && (q & 1))) {
1255             a0 = t0;
1256             a1 = t1;
1257             a->sign = !a->sign;
1258         }
1259     }
1260
1261     if (likely(a0)) {
1262         shift = clz64(a0);
1263         shortShift128Left(a0, a1, shift, &a0, &a1);
1264     } else if (likely(a1)) {
1265         shift = clz64(a1);
1266         a0 = a1 << shift;
1267         a1 = 0;
1268         shift += 64;
1269     } else {
1270         a->cls = float_class_zero;
1271         return;
1272     }
1273
1274     a->exp = b->exp + exp_diff - shift;
1275     a->frac = a0 | (a1 != 0);
1276 }
1277
1278 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279                            uint64_t *mod_quot)
1280 {
1281     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282     int exp_diff = a->exp - b->exp;
1283     int shift;
1284
1285     a0 = a->frac_hi;
1286     a1 = a->frac_lo;
1287     a2 = 0;
1288
1289     if (exp_diff < -1) {
1290         if (mod_quot) {
1291             *mod_quot = 0;
1292         }
1293         return;
1294     }
1295     if (exp_diff == -1) {
1296         shift128Right(a0, a1, 1, &a0, &a1);
1297         exp_diff = 0;
1298     }
1299
1300     b0 = b->frac_hi;
1301     b1 = b->frac_lo;
1302
1303     quot = q = le128(b0, b1, a0, a1);
1304     if (q) {
1305         sub128(a0, a1, b0, b1, &a0, &a1);
1306     }
1307
1308     exp_diff -= 64;
1309     while (exp_diff > 0) {
1310         q = estimateDiv128To64(a0, a1, b0);
1311         q = q > 4 ? q - 4 : 0;
1312         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315         exp_diff -= 61;
1316         quot = (quot << 61) + q;
1317     }
1318
1319     exp_diff += 64;
1320     if (exp_diff > 0) {
1321         q = estimateDiv128To64(a0, a1, b0);
1322         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326         while (le192(t0, t1, t2, a0, a1, a2)) {
1327             ++q;
1328             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329         }
1330         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331     } else {
1332         t0 = b0;
1333         t1 = b1;
1334         t2 = 0;
1335     }
1336
1337     if (mod_quot) {
1338         *mod_quot = quot;
1339     } else {
1340         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341         if (lt192(t0, t1, t2, a0, a1, a2) ||
1342             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343             a0 = t0;
1344             a1 = t1;
1345             a2 = t2;
1346             a->sign = !a->sign;
1347         }
1348     }
1349
1350     if (likely(a0)) {
1351         shift = clz64(a0);
1352         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353     } else if (likely(a1)) {
1354         shift = clz64(a1);
1355         shortShift128Left(a1, a2, shift, &a0, &a1);
1356         a2 = 0;
1357         shift += 64;
1358     } else if (likely(a2)) {
1359         shift = clz64(a2);
1360         a0 = a2 << shift;
1361         a1 = a2 = 0;
1362         shift += 128;
1363     } else {
1364         a->cls = float_class_zero;
1365         return;
1366     }
1367
1368     a->exp = b->exp + exp_diff - shift;
1369     a->frac_hi = a0;
1370     a->frac_lo = a1 | (a2 != 0);
1371 }
1372
1373 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374
1375 static void frac64_shl(FloatParts64 *a, int c)
1376 {
1377     a->frac <<= c;
1378 }
1379
1380 static void frac128_shl(FloatParts128 *a, int c)
1381 {
1382     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383
1384     if (c & 64) {
1385         a0 = a1, a1 = 0;
1386     }
1387
1388     c &= 63;
1389     if (c) {
1390         a0 = shl_double(a0, a1, c);
1391         a1 = a1 << c;
1392     }
1393
1394     a->frac_hi = a0;
1395     a->frac_lo = a1;
1396 }
1397
1398 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1399
1400 static void frac64_shr(FloatParts64 *a, int c)
1401 {
1402     a->frac >>= c;
1403 }
1404
1405 static void frac128_shr(FloatParts128 *a, int c)
1406 {
1407     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408
1409     if (c & 64) {
1410         a1 = a0, a0 = 0;
1411     }
1412
1413     c &= 63;
1414     if (c) {
1415         a1 = shr_double(a0, a1, c);
1416         a0 = a0 >> c;
1417     }
1418
1419     a->frac_hi = a0;
1420     a->frac_lo = a1;
1421 }
1422
1423 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1424
1425 static void frac64_shrjam(FloatParts64 *a, int c)
1426 {
1427     uint64_t a0 = a->frac;
1428
1429     if (likely(c != 0)) {
1430         if (likely(c < 64)) {
1431             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432         } else {
1433             a0 = a0 != 0;
1434         }
1435         a->frac = a0;
1436     }
1437 }
1438
1439 static void frac128_shrjam(FloatParts128 *a, int c)
1440 {
1441     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442     uint64_t sticky = 0;
1443
1444     if (unlikely(c == 0)) {
1445         return;
1446     } else if (likely(c < 64)) {
1447         /* nothing */
1448     } else if (likely(c < 128)) {
1449         sticky = a1;
1450         a1 = a0;
1451         a0 = 0;
1452         c &= 63;
1453         if (c == 0) {
1454             goto done;
1455         }
1456     } else {
1457         sticky = a0 | a1;
1458         a0 = a1 = 0;
1459         goto done;
1460     }
1461
1462     sticky |= shr_double(a1, 0, c);
1463     a1 = shr_double(a0, a1, c);
1464     a0 = a0 >> c;
1465
1466  done:
1467     a->frac_lo = a1 | (sticky != 0);
1468     a->frac_hi = a0;
1469 }
1470
1471 static void frac256_shrjam(FloatParts256 *a, int c)
1472 {
1473     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475     uint64_t sticky = 0;
1476
1477     if (unlikely(c == 0)) {
1478         return;
1479     } else if (likely(c < 64)) {
1480         /* nothing */
1481     } else if (likely(c < 256)) {
1482         if (unlikely(c & 128)) {
1483             sticky |= a2 | a3;
1484             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485         }
1486         if (unlikely(c & 64)) {
1487             sticky |= a3;
1488             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489         }
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1 | a2 | a3;
1496         a0 = a1 = a2 = a3 = 0;
1497         goto done;
1498     }
1499
1500     sticky |= shr_double(a3, 0, c);
1501     a3 = shr_double(a2, a3, c);
1502     a2 = shr_double(a1, a2, c);
1503     a1 = shr_double(a0, a1, c);
1504     a0 = a0 >> c;
1505
1506  done:
1507     a->frac_lo = a3 | (sticky != 0);
1508     a->frac_lm = a2;
1509     a->frac_hm = a1;
1510     a->frac_hi = a0;
1511 }
1512
1513 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1514
1515 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516 {
1517     return usub64_overflow(a->frac, b->frac, &r->frac);
1518 }
1519
1520 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521 {
1522     bool c = 0;
1523     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525     return c;
1526 }
1527
1528 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529 {
1530     bool c = 0;
1531     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535     return c;
1536 }
1537
1538 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1539
1540 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541 {
1542     r->frac = a->frac_hi | (a->frac_lo != 0);
1543 }
1544
1545 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546 {
1547     r->frac_hi = a->frac_hi;
1548     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549 }
1550
1551 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552
1553 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554 {
1555     r->frac_hi = a->frac;
1556     r->frac_lo = 0;
1557 }
1558
1559 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560 {
1561     r->frac_hi = a->frac_hi;
1562     r->frac_hm = a->frac_lo;
1563     r->frac_lm = 0;
1564     r->frac_lo = 0;
1565 }
1566
1567 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1568
1569 /*
1570  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1571  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572  * and thus MIT licenced.
1573  */
1574 static const uint16_t rsqrt_tab[128] = {
1575     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591 };
1592
1593 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1594 #define FloatPartsN    glue(FloatParts,N)
1595 #define FloatPartsW    glue(FloatParts,W)
1596
1597 #define N 64
1598 #define W 128
1599
1600 #include "softfloat-parts-addsub.c.inc"
1601 #include "softfloat-parts.c.inc"
1602
1603 #undef  N
1604 #undef  W
1605 #define N 128
1606 #define W 256
1607
1608 #include "softfloat-parts-addsub.c.inc"
1609 #include "softfloat-parts.c.inc"
1610
1611 #undef  N
1612 #undef  W
1613 #define N            256
1614
1615 #include "softfloat-parts-addsub.c.inc"
1616
1617 #undef  N
1618 #undef  W
1619 #undef  partsN
1620 #undef  FloatPartsN
1621 #undef  FloatPartsW
1622
1623 /*
1624  * Pack/unpack routines with a specific FloatFmt.
1625  */
1626
1627 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628                                       float_status *s, const FloatFmt *params)
1629 {
1630     float16_unpack_raw(p, f);
1631     parts_canonicalize(p, s, params);
1632 }
1633
1634 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635                                      float_status *s)
1636 {
1637     float16a_unpack_canonical(p, f, s, &float16_params);
1638 }
1639
1640 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641                                       float_status *s)
1642 {
1643     bfloat16_unpack_raw(p, f);
1644     parts_canonicalize(p, s, &bfloat16_params);
1645 }
1646
1647 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648                                              float_status *s,
1649                                              const FloatFmt *params)
1650 {
1651     parts_uncanon(p, s, params);
1652     return float16_pack_raw(p);
1653 }
1654
1655 static float16 float16_round_pack_canonical(FloatParts64 *p,
1656                                             float_status *s)
1657 {
1658     return float16a_round_pack_canonical(p, s, &float16_params);
1659 }
1660
1661 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662                                               float_status *s)
1663 {
1664     parts_uncanon(p, s, &bfloat16_params);
1665     return bfloat16_pack_raw(p);
1666 }
1667
1668 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669                                      float_status *s)
1670 {
1671     float32_unpack_raw(p, f);
1672     parts_canonicalize(p, s, &float32_params);
1673 }
1674
1675 static float32 float32_round_pack_canonical(FloatParts64 *p,
1676                                             float_status *s)
1677 {
1678     parts_uncanon(p, s, &float32_params);
1679     return float32_pack_raw(p);
1680 }
1681
1682 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683                                      float_status *s)
1684 {
1685     float64_unpack_raw(p, f);
1686     parts_canonicalize(p, s, &float64_params);
1687 }
1688
1689 static float64 float64_round_pack_canonical(FloatParts64 *p,
1690                                             float_status *s)
1691 {
1692     parts_uncanon(p, s, &float64_params);
1693     return float64_pack_raw(p);
1694 }
1695
1696 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1697                                                float_status *s)
1698 {
1699     parts_uncanon(p, s, &float32_params);
1700
1701     /*
1702      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1703      * We need to adjust the fraction higher so that the least N bits are
1704      * zero, and the fraction is adjacent to the float64 implicit bit.
1705      */
1706     switch (p->cls) {
1707     case float_class_normal:
1708         if (unlikely(p->exp == 0)) {
1709             /*
1710              * The result is denormal for float32, but can be represented
1711              * in normalized form for float64.  Adjust, per canonicalize.
1712              */
1713             int shift = frac_normalize(p);
1714             p->exp = (float32_params.frac_shift -
1715                       float32_params.exp_bias - shift + 1 +
1716                       float64_params.exp_bias);
1717             frac_shr(p, float64_params.frac_shift);
1718         } else {
1719             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1720             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1721         }
1722         break;
1723     case float_class_snan:
1724     case float_class_qnan:
1725         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1726         p->exp = float64_params.exp_max;
1727         break;
1728     case float_class_inf:
1729         p->exp = float64_params.exp_max;
1730         break;
1731     case float_class_zero:
1732         break;
1733     default:
1734         g_assert_not_reached();
1735     }
1736
1737     return float64_pack_raw(p);
1738 }
1739
1740 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1741                                       float_status *s)
1742 {
1743     float128_unpack_raw(p, f);
1744     parts_canonicalize(p, s, &float128_params);
1745 }
1746
1747 static float128 float128_round_pack_canonical(FloatParts128 *p,
1748                                               float_status *s)
1749 {
1750     parts_uncanon(p, s, &float128_params);
1751     return float128_pack_raw(p);
1752 }
1753
1754 /* Returns false if the encoding is invalid. */
1755 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1756                                       float_status *s)
1757 {
1758     /* Ensure rounding precision is set before beginning. */
1759     switch (s->floatx80_rounding_precision) {
1760     case floatx80_precision_x:
1761     case floatx80_precision_d:
1762     case floatx80_precision_s:
1763         break;
1764     default:
1765         g_assert_not_reached();
1766     }
1767
1768     if (unlikely(floatx80_invalid_encoding(f))) {
1769         float_raise(float_flag_invalid, s);
1770         return false;
1771     }
1772
1773     floatx80_unpack_raw(p, f);
1774
1775     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1776         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1777     } else {
1778         /* The explicit integer bit is ignored, after invalid checks. */
1779         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1780         p->cls = (p->frac_hi == 0 ? float_class_inf
1781                   : parts_is_snan_frac(p->frac_hi, s)
1782                   ? float_class_snan : float_class_qnan);
1783     }
1784     return true;
1785 }
1786
1787 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1788                                               float_status *s)
1789 {
1790     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1791     uint64_t frac;
1792     int exp;
1793
1794     switch (p->cls) {
1795     case float_class_normal:
1796         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1797             parts_uncanon_normal(p, s, fmt);
1798             frac = p->frac_hi;
1799             exp = p->exp;
1800         } else {
1801             FloatParts64 p64;
1802
1803             p64.sign = p->sign;
1804             p64.exp = p->exp;
1805             frac_truncjam(&p64, p);
1806             parts_uncanon_normal(&p64, s, fmt);
1807             frac = p64.frac;
1808             exp = p64.exp;
1809         }
1810         if (exp != fmt->exp_max) {
1811             break;
1812         }
1813         /* rounded to inf -- fall through to set frac correctly */
1814
1815     case float_class_inf:
1816         /* x86 and m68k differ in the setting of the integer bit. */
1817         frac = floatx80_infinity_low;
1818         exp = fmt->exp_max;
1819         break;
1820
1821     case float_class_zero:
1822         frac = 0;
1823         exp = 0;
1824         break;
1825
1826     case float_class_snan:
1827     case float_class_qnan:
1828         /* NaNs have the integer bit set. */
1829         frac = p->frac_hi | (1ull << 63);
1830         exp = fmt->exp_max;
1831         break;
1832
1833     default:
1834         g_assert_not_reached();
1835     }
1836
1837     return packFloatx80(p->sign, exp, frac);
1838 }
1839
1840 /*
1841  * Addition and subtraction
1842  */
1843
1844 static float16 QEMU_FLATTEN
1845 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1846 {
1847     FloatParts64 pa, pb, *pr;
1848
1849     float16_unpack_canonical(&pa, a, status);
1850     float16_unpack_canonical(&pb, b, status);
1851     pr = parts_addsub(&pa, &pb, status, subtract);
1852
1853     return float16_round_pack_canonical(pr, status);
1854 }
1855
1856 float16 float16_add(float16 a, float16 b, float_status *status)
1857 {
1858     return float16_addsub(a, b, status, false);
1859 }
1860
1861 float16 float16_sub(float16 a, float16 b, float_status *status)
1862 {
1863     return float16_addsub(a, b, status, true);
1864 }
1865
1866 static float32 QEMU_SOFTFLOAT_ATTR
1867 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1868 {
1869     FloatParts64 pa, pb, *pr;
1870
1871     float32_unpack_canonical(&pa, a, status);
1872     float32_unpack_canonical(&pb, b, status);
1873     pr = parts_addsub(&pa, &pb, status, subtract);
1874
1875     return float32_round_pack_canonical(pr, status);
1876 }
1877
1878 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1879 {
1880     return soft_f32_addsub(a, b, status, false);
1881 }
1882
1883 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1884 {
1885     return soft_f32_addsub(a, b, status, true);
1886 }
1887
1888 static float64 QEMU_SOFTFLOAT_ATTR
1889 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1890 {
1891     FloatParts64 pa, pb, *pr;
1892
1893     float64_unpack_canonical(&pa, a, status);
1894     float64_unpack_canonical(&pb, b, status);
1895     pr = parts_addsub(&pa, &pb, status, subtract);
1896
1897     return float64_round_pack_canonical(pr, status);
1898 }
1899
1900 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1901 {
1902     return soft_f64_addsub(a, b, status, false);
1903 }
1904
1905 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1906 {
1907     return soft_f64_addsub(a, b, status, true);
1908 }
1909
1910 static float hard_f32_add(float a, float b)
1911 {
1912     return a + b;
1913 }
1914
1915 static float hard_f32_sub(float a, float b)
1916 {
1917     return a - b;
1918 }
1919
1920 static double hard_f64_add(double a, double b)
1921 {
1922     return a + b;
1923 }
1924
1925 static double hard_f64_sub(double a, double b)
1926 {
1927     return a - b;
1928 }
1929
1930 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1931 {
1932     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1933         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1934     }
1935     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1936 }
1937
1938 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1939 {
1940     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1941         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1942     } else {
1943         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1944     }
1945 }
1946
1947 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1948                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1949 {
1950     return float32_gen2(a, b, s, hard, soft,
1951                         f32_is_zon2, f32_addsubmul_post);
1952 }
1953
1954 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1955                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1956 {
1957     return float64_gen2(a, b, s, hard, soft,
1958                         f64_is_zon2, f64_addsubmul_post);
1959 }
1960
1961 float32 QEMU_FLATTEN
1962 float32_add(float32 a, float32 b, float_status *s)
1963 {
1964     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1965 }
1966
1967 float32 QEMU_FLATTEN
1968 float32_sub(float32 a, float32 b, float_status *s)
1969 {
1970     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1971 }
1972
1973 float64 QEMU_FLATTEN
1974 float64_add(float64 a, float64 b, float_status *s)
1975 {
1976     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1977 }
1978
1979 float64 QEMU_FLATTEN
1980 float64_sub(float64 a, float64 b, float_status *s)
1981 {
1982     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1983 }
1984
1985 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1986                                  bool subtract)
1987 {
1988     FloatParts64 pa, pb, *pr;
1989
1990     float64_unpack_canonical(&pa, a, status);
1991     float64_unpack_canonical(&pb, b, status);
1992     pr = parts_addsub(&pa, &pb, status, subtract);
1993
1994     return float64r32_round_pack_canonical(pr, status);
1995 }
1996
1997 float64 float64r32_add(float64 a, float64 b, float_status *status)
1998 {
1999     return float64r32_addsub(a, b, status, false);
2000 }
2001
2002 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2003 {
2004     return float64r32_addsub(a, b, status, true);
2005 }
2006
2007 static bfloat16 QEMU_FLATTEN
2008 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2009 {
2010     FloatParts64 pa, pb, *pr;
2011
2012     bfloat16_unpack_canonical(&pa, a, status);
2013     bfloat16_unpack_canonical(&pb, b, status);
2014     pr = parts_addsub(&pa, &pb, status, subtract);
2015
2016     return bfloat16_round_pack_canonical(pr, status);
2017 }
2018
2019 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2020 {
2021     return bfloat16_addsub(a, b, status, false);
2022 }
2023
2024 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2025 {
2026     return bfloat16_addsub(a, b, status, true);
2027 }
2028
2029 static float128 QEMU_FLATTEN
2030 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2031 {
2032     FloatParts128 pa, pb, *pr;
2033
2034     float128_unpack_canonical(&pa, a, status);
2035     float128_unpack_canonical(&pb, b, status);
2036     pr = parts_addsub(&pa, &pb, status, subtract);
2037
2038     return float128_round_pack_canonical(pr, status);
2039 }
2040
2041 float128 float128_add(float128 a, float128 b, float_status *status)
2042 {
2043     return float128_addsub(a, b, status, false);
2044 }
2045
2046 float128 float128_sub(float128 a, float128 b, float_status *status)
2047 {
2048     return float128_addsub(a, b, status, true);
2049 }
2050
2051 static floatx80 QEMU_FLATTEN
2052 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2053 {
2054     FloatParts128 pa, pb, *pr;
2055
2056     if (!floatx80_unpack_canonical(&pa, a, status) ||
2057         !floatx80_unpack_canonical(&pb, b, status)) {
2058         return floatx80_default_nan(status);
2059     }
2060
2061     pr = parts_addsub(&pa, &pb, status, subtract);
2062     return floatx80_round_pack_canonical(pr, status);
2063 }
2064
2065 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2066 {
2067     return floatx80_addsub(a, b, status, false);
2068 }
2069
2070 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2071 {
2072     return floatx80_addsub(a, b, status, true);
2073 }
2074
2075 /*
2076  * Multiplication
2077  */
2078
2079 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2080 {
2081     FloatParts64 pa, pb, *pr;
2082
2083     float16_unpack_canonical(&pa, a, status);
2084     float16_unpack_canonical(&pb, b, status);
2085     pr = parts_mul(&pa, &pb, status);
2086
2087     return float16_round_pack_canonical(pr, status);
2088 }
2089
2090 static float32 QEMU_SOFTFLOAT_ATTR
2091 soft_f32_mul(float32 a, float32 b, float_status *status)
2092 {
2093     FloatParts64 pa, pb, *pr;
2094
2095     float32_unpack_canonical(&pa, a, status);
2096     float32_unpack_canonical(&pb, b, status);
2097     pr = parts_mul(&pa, &pb, status);
2098
2099     return float32_round_pack_canonical(pr, status);
2100 }
2101
2102 static float64 QEMU_SOFTFLOAT_ATTR
2103 soft_f64_mul(float64 a, float64 b, float_status *status)
2104 {
2105     FloatParts64 pa, pb, *pr;
2106
2107     float64_unpack_canonical(&pa, a, status);
2108     float64_unpack_canonical(&pb, b, status);
2109     pr = parts_mul(&pa, &pb, status);
2110
2111     return float64_round_pack_canonical(pr, status);
2112 }
2113
2114 static float hard_f32_mul(float a, float b)
2115 {
2116     return a * b;
2117 }
2118
2119 static double hard_f64_mul(double a, double b)
2120 {
2121     return a * b;
2122 }
2123
2124 float32 QEMU_FLATTEN
2125 float32_mul(float32 a, float32 b, float_status *s)
2126 {
2127     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2128                         f32_is_zon2, f32_addsubmul_post);
2129 }
2130
2131 float64 QEMU_FLATTEN
2132 float64_mul(float64 a, float64 b, float_status *s)
2133 {
2134     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2135                         f64_is_zon2, f64_addsubmul_post);
2136 }
2137
2138 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2139 {
2140     FloatParts64 pa, pb, *pr;
2141
2142     float64_unpack_canonical(&pa, a, status);
2143     float64_unpack_canonical(&pb, b, status);
2144     pr = parts_mul(&pa, &pb, status);
2145
2146     return float64r32_round_pack_canonical(pr, status);
2147 }
2148
2149 bfloat16 QEMU_FLATTEN
2150 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2151 {
2152     FloatParts64 pa, pb, *pr;
2153
2154     bfloat16_unpack_canonical(&pa, a, status);
2155     bfloat16_unpack_canonical(&pb, b, status);
2156     pr = parts_mul(&pa, &pb, status);
2157
2158     return bfloat16_round_pack_canonical(pr, status);
2159 }
2160
2161 float128 QEMU_FLATTEN
2162 float128_mul(float128 a, float128 b, float_status *status)
2163 {
2164     FloatParts128 pa, pb, *pr;
2165
2166     float128_unpack_canonical(&pa, a, status);
2167     float128_unpack_canonical(&pb, b, status);
2168     pr = parts_mul(&pa, &pb, status);
2169
2170     return float128_round_pack_canonical(pr, status);
2171 }
2172
2173 floatx80 QEMU_FLATTEN
2174 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2175 {
2176     FloatParts128 pa, pb, *pr;
2177
2178     if (!floatx80_unpack_canonical(&pa, a, status) ||
2179         !floatx80_unpack_canonical(&pb, b, status)) {
2180         return floatx80_default_nan(status);
2181     }
2182
2183     pr = parts_mul(&pa, &pb, status);
2184     return floatx80_round_pack_canonical(pr, status);
2185 }
2186
2187 /*
2188  * Fused multiply-add
2189  */
2190
2191 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2192                                     int flags, float_status *status)
2193 {
2194     FloatParts64 pa, pb, pc, *pr;
2195
2196     float16_unpack_canonical(&pa, a, status);
2197     float16_unpack_canonical(&pb, b, status);
2198     float16_unpack_canonical(&pc, c, status);
2199     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2200
2201     return float16_round_pack_canonical(pr, status);
2202 }
2203
2204 static float32 QEMU_SOFTFLOAT_ATTR
2205 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2206                 float_status *status)
2207 {
2208     FloatParts64 pa, pb, pc, *pr;
2209
2210     float32_unpack_canonical(&pa, a, status);
2211     float32_unpack_canonical(&pb, b, status);
2212     float32_unpack_canonical(&pc, c, status);
2213     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2214
2215     return float32_round_pack_canonical(pr, status);
2216 }
2217
2218 static float64 QEMU_SOFTFLOAT_ATTR
2219 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2220                 float_status *status)
2221 {
2222     FloatParts64 pa, pb, pc, *pr;
2223
2224     float64_unpack_canonical(&pa, a, status);
2225     float64_unpack_canonical(&pb, b, status);
2226     float64_unpack_canonical(&pc, c, status);
2227     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2228
2229     return float64_round_pack_canonical(pr, status);
2230 }
2231
2232 static bool force_soft_fma;
2233
2234 float32 QEMU_FLATTEN
2235 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2236 {
2237     union_float32 ua, ub, uc, ur;
2238
2239     ua.s = xa;
2240     ub.s = xb;
2241     uc.s = xc;
2242
2243     if (unlikely(!can_use_fpu(s))) {
2244         goto soft;
2245     }
2246     if (unlikely(flags & float_muladd_halve_result)) {
2247         goto soft;
2248     }
2249
2250     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2251     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2252         goto soft;
2253     }
2254
2255     if (unlikely(force_soft_fma)) {
2256         goto soft;
2257     }
2258
2259     /*
2260      * When (a || b) == 0, there's no need to check for under/over flow,
2261      * since we know the addend is (normal || 0) and the product is 0.
2262      */
2263     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2264         union_float32 up;
2265         bool prod_sign;
2266
2267         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2268         prod_sign ^= !!(flags & float_muladd_negate_product);
2269         up.s = float32_set_sign(float32_zero, prod_sign);
2270
2271         if (flags & float_muladd_negate_c) {
2272             uc.h = -uc.h;
2273         }
2274         ur.h = up.h + uc.h;
2275     } else {
2276         union_float32 ua_orig = ua;
2277         union_float32 uc_orig = uc;
2278
2279         if (flags & float_muladd_negate_product) {
2280             ua.h = -ua.h;
2281         }
2282         if (flags & float_muladd_negate_c) {
2283             uc.h = -uc.h;
2284         }
2285
2286         ur.h = fmaf(ua.h, ub.h, uc.h);
2287
2288         if (unlikely(f32_is_inf(ur))) {
2289             float_raise(float_flag_overflow, s);
2290         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2291             ua = ua_orig;
2292             uc = uc_orig;
2293             goto soft;
2294         }
2295     }
2296     if (flags & float_muladd_negate_result) {
2297         return float32_chs(ur.s);
2298     }
2299     return ur.s;
2300
2301  soft:
2302     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2303 }
2304
2305 float64 QEMU_FLATTEN
2306 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2307 {
2308     union_float64 ua, ub, uc, ur;
2309
2310     ua.s = xa;
2311     ub.s = xb;
2312     uc.s = xc;
2313
2314     if (unlikely(!can_use_fpu(s))) {
2315         goto soft;
2316     }
2317     if (unlikely(flags & float_muladd_halve_result)) {
2318         goto soft;
2319     }
2320
2321     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2322     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2323         goto soft;
2324     }
2325
2326     if (unlikely(force_soft_fma)) {
2327         goto soft;
2328     }
2329
2330     /*
2331      * When (a || b) == 0, there's no need to check for under/over flow,
2332      * since we know the addend is (normal || 0) and the product is 0.
2333      */
2334     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2335         union_float64 up;
2336         bool prod_sign;
2337
2338         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2339         prod_sign ^= !!(flags & float_muladd_negate_product);
2340         up.s = float64_set_sign(float64_zero, prod_sign);
2341
2342         if (flags & float_muladd_negate_c) {
2343             uc.h = -uc.h;
2344         }
2345         ur.h = up.h + uc.h;
2346     } else {
2347         union_float64 ua_orig = ua;
2348         union_float64 uc_orig = uc;
2349
2350         if (flags & float_muladd_negate_product) {
2351             ua.h = -ua.h;
2352         }
2353         if (flags & float_muladd_negate_c) {
2354             uc.h = -uc.h;
2355         }
2356
2357         ur.h = fma(ua.h, ub.h, uc.h);
2358
2359         if (unlikely(f64_is_inf(ur))) {
2360             float_raise(float_flag_overflow, s);
2361         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2362             ua = ua_orig;
2363             uc = uc_orig;
2364             goto soft;
2365         }
2366     }
2367     if (flags & float_muladd_negate_result) {
2368         return float64_chs(ur.s);
2369     }
2370     return ur.s;
2371
2372  soft:
2373     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2374 }
2375
2376 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2377                           int flags, float_status *status)
2378 {
2379     FloatParts64 pa, pb, pc, *pr;
2380
2381     float64_unpack_canonical(&pa, a, status);
2382     float64_unpack_canonical(&pb, b, status);
2383     float64_unpack_canonical(&pc, c, status);
2384     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2385
2386     return float64r32_round_pack_canonical(pr, status);
2387 }
2388
2389 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2390                                       int flags, float_status *status)
2391 {
2392     FloatParts64 pa, pb, pc, *pr;
2393
2394     bfloat16_unpack_canonical(&pa, a, status);
2395     bfloat16_unpack_canonical(&pb, b, status);
2396     bfloat16_unpack_canonical(&pc, c, status);
2397     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2398
2399     return bfloat16_round_pack_canonical(pr, status);
2400 }
2401
2402 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2403                                       int flags, float_status *status)
2404 {
2405     FloatParts128 pa, pb, pc, *pr;
2406
2407     float128_unpack_canonical(&pa, a, status);
2408     float128_unpack_canonical(&pb, b, status);
2409     float128_unpack_canonical(&pc, c, status);
2410     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2411
2412     return float128_round_pack_canonical(pr, status);
2413 }
2414
2415 /*
2416  * Division
2417  */
2418
2419 float16 float16_div(float16 a, float16 b, float_status *status)
2420 {
2421     FloatParts64 pa, pb, *pr;
2422
2423     float16_unpack_canonical(&pa, a, status);
2424     float16_unpack_canonical(&pb, b, status);
2425     pr = parts_div(&pa, &pb, status);
2426
2427     return float16_round_pack_canonical(pr, status);
2428 }
2429
2430 static float32 QEMU_SOFTFLOAT_ATTR
2431 soft_f32_div(float32 a, float32 b, float_status *status)
2432 {
2433     FloatParts64 pa, pb, *pr;
2434
2435     float32_unpack_canonical(&pa, a, status);
2436     float32_unpack_canonical(&pb, b, status);
2437     pr = parts_div(&pa, &pb, status);
2438
2439     return float32_round_pack_canonical(pr, status);
2440 }
2441
2442 static float64 QEMU_SOFTFLOAT_ATTR
2443 soft_f64_div(float64 a, float64 b, float_status *status)
2444 {
2445     FloatParts64 pa, pb, *pr;
2446
2447     float64_unpack_canonical(&pa, a, status);
2448     float64_unpack_canonical(&pb, b, status);
2449     pr = parts_div(&pa, &pb, status);
2450
2451     return float64_round_pack_canonical(pr, status);
2452 }
2453
2454 static float hard_f32_div(float a, float b)
2455 {
2456     return a / b;
2457 }
2458
2459 static double hard_f64_div(double a, double b)
2460 {
2461     return a / b;
2462 }
2463
2464 static bool f32_div_pre(union_float32 a, union_float32 b)
2465 {
2466     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2467         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2468                fpclassify(b.h) == FP_NORMAL;
2469     }
2470     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2471 }
2472
2473 static bool f64_div_pre(union_float64 a, union_float64 b)
2474 {
2475     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2476         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2477                fpclassify(b.h) == FP_NORMAL;
2478     }
2479     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2480 }
2481
2482 static bool f32_div_post(union_float32 a, union_float32 b)
2483 {
2484     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2485         return fpclassify(a.h) != FP_ZERO;
2486     }
2487     return !float32_is_zero(a.s);
2488 }
2489
2490 static bool f64_div_post(union_float64 a, union_float64 b)
2491 {
2492     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2493         return fpclassify(a.h) != FP_ZERO;
2494     }
2495     return !float64_is_zero(a.s);
2496 }
2497
2498 float32 QEMU_FLATTEN
2499 float32_div(float32 a, float32 b, float_status *s)
2500 {
2501     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2502                         f32_div_pre, f32_div_post);
2503 }
2504
2505 float64 QEMU_FLATTEN
2506 float64_div(float64 a, float64 b, float_status *s)
2507 {
2508     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2509                         f64_div_pre, f64_div_post);
2510 }
2511
2512 float64 float64r32_div(float64 a, float64 b, float_status *status)
2513 {
2514     FloatParts64 pa, pb, *pr;
2515
2516     float64_unpack_canonical(&pa, a, status);
2517     float64_unpack_canonical(&pb, b, status);
2518     pr = parts_div(&pa, &pb, status);
2519
2520     return float64r32_round_pack_canonical(pr, status);
2521 }
2522
2523 bfloat16 QEMU_FLATTEN
2524 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2525 {
2526     FloatParts64 pa, pb, *pr;
2527
2528     bfloat16_unpack_canonical(&pa, a, status);
2529     bfloat16_unpack_canonical(&pb, b, status);
2530     pr = parts_div(&pa, &pb, status);
2531
2532     return bfloat16_round_pack_canonical(pr, status);
2533 }
2534
2535 float128 QEMU_FLATTEN
2536 float128_div(float128 a, float128 b, float_status *status)
2537 {
2538     FloatParts128 pa, pb, *pr;
2539
2540     float128_unpack_canonical(&pa, a, status);
2541     float128_unpack_canonical(&pb, b, status);
2542     pr = parts_div(&pa, &pb, status);
2543
2544     return float128_round_pack_canonical(pr, status);
2545 }
2546
2547 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2548 {
2549     FloatParts128 pa, pb, *pr;
2550
2551     if (!floatx80_unpack_canonical(&pa, a, status) ||
2552         !floatx80_unpack_canonical(&pb, b, status)) {
2553         return floatx80_default_nan(status);
2554     }
2555
2556     pr = parts_div(&pa, &pb, status);
2557     return floatx80_round_pack_canonical(pr, status);
2558 }
2559
2560 /*
2561  * Remainder
2562  */
2563
2564 float32 float32_rem(float32 a, float32 b, float_status *status)
2565 {
2566     FloatParts64 pa, pb, *pr;
2567
2568     float32_unpack_canonical(&pa, a, status);
2569     float32_unpack_canonical(&pb, b, status);
2570     pr = parts_modrem(&pa, &pb, NULL, status);
2571
2572     return float32_round_pack_canonical(pr, status);
2573 }
2574
2575 float64 float64_rem(float64 a, float64 b, float_status *status)
2576 {
2577     FloatParts64 pa, pb, *pr;
2578
2579     float64_unpack_canonical(&pa, a, status);
2580     float64_unpack_canonical(&pb, b, status);
2581     pr = parts_modrem(&pa, &pb, NULL, status);
2582
2583     return float64_round_pack_canonical(pr, status);
2584 }
2585
2586 float128 float128_rem(float128 a, float128 b, float_status *status)
2587 {
2588     FloatParts128 pa, pb, *pr;
2589
2590     float128_unpack_canonical(&pa, a, status);
2591     float128_unpack_canonical(&pb, b, status);
2592     pr = parts_modrem(&pa, &pb, NULL, status);
2593
2594     return float128_round_pack_canonical(pr, status);
2595 }
2596
2597 /*
2598  * Returns the remainder of the extended double-precision floating-point value
2599  * `a' with respect to the corresponding value `b'.
2600  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2601  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2602  * the remainder based on truncating the quotient toward zero instead and
2603  * *quotient is set to the low 64 bits of the absolute value of the integer
2604  * quotient.
2605  */
2606 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2607                          uint64_t *quotient, float_status *status)
2608 {
2609     FloatParts128 pa, pb, *pr;
2610
2611     *quotient = 0;
2612     if (!floatx80_unpack_canonical(&pa, a, status) ||
2613         !floatx80_unpack_canonical(&pb, b, status)) {
2614         return floatx80_default_nan(status);
2615     }
2616     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2617
2618     return floatx80_round_pack_canonical(pr, status);
2619 }
2620
2621 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2622 {
2623     uint64_t quotient;
2624     return floatx80_modrem(a, b, false, &quotient, status);
2625 }
2626
2627 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2628 {
2629     uint64_t quotient;
2630     return floatx80_modrem(a, b, true, &quotient, status);
2631 }
2632
2633 /*
2634  * Float to Float conversions
2635  *
2636  * Returns the result of converting one float format to another. The
2637  * conversion is performed according to the IEC/IEEE Standard for
2638  * Binary Floating-Point Arithmetic.
2639  *
2640  * Usually this only needs to take care of raising invalid exceptions
2641  * and handling the conversion on NaNs.
2642  */
2643
2644 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2645 {
2646     switch (a->cls) {
2647     case float_class_snan:
2648         float_raise(float_flag_invalid_snan, s);
2649         /* fall through */
2650     case float_class_qnan:
2651         /*
2652          * There is no NaN in the destination format.  Raise Invalid
2653          * and return a zero with the sign of the input NaN.
2654          */
2655         float_raise(float_flag_invalid, s);
2656         a->cls = float_class_zero;
2657         break;
2658
2659     case float_class_inf:
2660         /*
2661          * There is no Inf in the destination format.  Raise Invalid
2662          * and return the maximum normal with the correct sign.
2663          */
2664         float_raise(float_flag_invalid, s);
2665         a->cls = float_class_normal;
2666         a->exp = float16_params_ahp.exp_max;
2667         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2668                                   float16_params_ahp.frac_size + 1);
2669         break;
2670
2671     case float_class_normal:
2672     case float_class_zero:
2673         break;
2674
2675     default:
2676         g_assert_not_reached();
2677     }
2678 }
2679
2680 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2681 {
2682     if (is_nan(a->cls)) {
2683         parts_return_nan(a, s);
2684     }
2685 }
2686
2687 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2688 {
2689     if (is_nan(a->cls)) {
2690         parts_return_nan(a, s);
2691     }
2692 }
2693
2694 #define parts_float_to_float(P, S) \
2695     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2696
2697 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2698                                         float_status *s)
2699 {
2700     a->cls = b->cls;
2701     a->sign = b->sign;
2702     a->exp = b->exp;
2703
2704     if (a->cls == float_class_normal) {
2705         frac_truncjam(a, b);
2706     } else if (is_nan(a->cls)) {
2707         /* Discard the low bits of the NaN. */
2708         a->frac = b->frac_hi;
2709         parts_return_nan(a, s);
2710     }
2711 }
2712
2713 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2714                                        float_status *s)
2715 {
2716     a->cls = b->cls;
2717     a->sign = b->sign;
2718     a->exp = b->exp;
2719     frac_widen(a, b);
2720
2721     if (is_nan(a->cls)) {
2722         parts_return_nan(a, s);
2723     }
2724 }
2725
2726 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2727 {
2728     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2729     FloatParts64 p;
2730
2731     float16a_unpack_canonical(&p, a, s, fmt16);
2732     parts_float_to_float(&p, s);
2733     return float32_round_pack_canonical(&p, s);
2734 }
2735
2736 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2737 {
2738     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2739     FloatParts64 p;
2740
2741     float16a_unpack_canonical(&p, a, s, fmt16);
2742     parts_float_to_float(&p, s);
2743     return float64_round_pack_canonical(&p, s);
2744 }
2745
2746 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2747 {
2748     FloatParts64 p;
2749     const FloatFmt *fmt;
2750
2751     float32_unpack_canonical(&p, a, s);
2752     if (ieee) {
2753         parts_float_to_float(&p, s);
2754         fmt = &float16_params;
2755     } else {
2756         parts_float_to_ahp(&p, s);
2757         fmt = &float16_params_ahp;
2758     }
2759     return float16a_round_pack_canonical(&p, s, fmt);
2760 }
2761
2762 static float64 QEMU_SOFTFLOAT_ATTR
2763 soft_float32_to_float64(float32 a, float_status *s)
2764 {
2765     FloatParts64 p;
2766
2767     float32_unpack_canonical(&p, a, s);
2768     parts_float_to_float(&p, s);
2769     return float64_round_pack_canonical(&p, s);
2770 }
2771
2772 float64 float32_to_float64(float32 a, float_status *s)
2773 {
2774     if (likely(float32_is_normal(a))) {
2775         /* Widening conversion can never produce inexact results.  */
2776         union_float32 uf;
2777         union_float64 ud;
2778         uf.s = a;
2779         ud.h = uf.h;
2780         return ud.s;
2781     } else if (float32_is_zero(a)) {
2782         return float64_set_sign(float64_zero, float32_is_neg(a));
2783     } else {
2784         return soft_float32_to_float64(a, s);
2785     }
2786 }
2787
2788 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2789 {
2790     FloatParts64 p;
2791     const FloatFmt *fmt;
2792
2793     float64_unpack_canonical(&p, a, s);
2794     if (ieee) {
2795         parts_float_to_float(&p, s);
2796         fmt = &float16_params;
2797     } else {
2798         parts_float_to_ahp(&p, s);
2799         fmt = &float16_params_ahp;
2800     }
2801     return float16a_round_pack_canonical(&p, s, fmt);
2802 }
2803
2804 float32 float64_to_float32(float64 a, float_status *s)
2805 {
2806     FloatParts64 p;
2807
2808     float64_unpack_canonical(&p, a, s);
2809     parts_float_to_float(&p, s);
2810     return float32_round_pack_canonical(&p, s);
2811 }
2812
2813 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2814 {
2815     FloatParts64 p;
2816
2817     bfloat16_unpack_canonical(&p, a, s);
2818     parts_float_to_float(&p, s);
2819     return float32_round_pack_canonical(&p, s);
2820 }
2821
2822 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2823 {
2824     FloatParts64 p;
2825
2826     bfloat16_unpack_canonical(&p, a, s);
2827     parts_float_to_float(&p, s);
2828     return float64_round_pack_canonical(&p, s);
2829 }
2830
2831 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2832 {
2833     FloatParts64 p;
2834
2835     float32_unpack_canonical(&p, a, s);
2836     parts_float_to_float(&p, s);
2837     return bfloat16_round_pack_canonical(&p, s);
2838 }
2839
2840 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2841 {
2842     FloatParts64 p;
2843
2844     float64_unpack_canonical(&p, a, s);
2845     parts_float_to_float(&p, s);
2846     return bfloat16_round_pack_canonical(&p, s);
2847 }
2848
2849 float32 float128_to_float32(float128 a, float_status *s)
2850 {
2851     FloatParts64 p64;
2852     FloatParts128 p128;
2853
2854     float128_unpack_canonical(&p128, a, s);
2855     parts_float_to_float_narrow(&p64, &p128, s);
2856     return float32_round_pack_canonical(&p64, s);
2857 }
2858
2859 float64 float128_to_float64(float128 a, float_status *s)
2860 {
2861     FloatParts64 p64;
2862     FloatParts128 p128;
2863
2864     float128_unpack_canonical(&p128, a, s);
2865     parts_float_to_float_narrow(&p64, &p128, s);
2866     return float64_round_pack_canonical(&p64, s);
2867 }
2868
2869 float128 float32_to_float128(float32 a, float_status *s)
2870 {
2871     FloatParts64 p64;
2872     FloatParts128 p128;
2873
2874     float32_unpack_canonical(&p64, a, s);
2875     parts_float_to_float_widen(&p128, &p64, s);
2876     return float128_round_pack_canonical(&p128, s);
2877 }
2878
2879 float128 float64_to_float128(float64 a, float_status *s)
2880 {
2881     FloatParts64 p64;
2882     FloatParts128 p128;
2883
2884     float64_unpack_canonical(&p64, a, s);
2885     parts_float_to_float_widen(&p128, &p64, s);
2886     return float128_round_pack_canonical(&p128, s);
2887 }
2888
2889 float32 floatx80_to_float32(floatx80 a, float_status *s)
2890 {
2891     FloatParts64 p64;
2892     FloatParts128 p128;
2893
2894     if (floatx80_unpack_canonical(&p128, a, s)) {
2895         parts_float_to_float_narrow(&p64, &p128, s);
2896     } else {
2897         parts_default_nan(&p64, s);
2898     }
2899     return float32_round_pack_canonical(&p64, s);
2900 }
2901
2902 float64 floatx80_to_float64(floatx80 a, float_status *s)
2903 {
2904     FloatParts64 p64;
2905     FloatParts128 p128;
2906
2907     if (floatx80_unpack_canonical(&p128, a, s)) {
2908         parts_float_to_float_narrow(&p64, &p128, s);
2909     } else {
2910         parts_default_nan(&p64, s);
2911     }
2912     return float64_round_pack_canonical(&p64, s);
2913 }
2914
2915 float128 floatx80_to_float128(floatx80 a, float_status *s)
2916 {
2917     FloatParts128 p;
2918
2919     if (floatx80_unpack_canonical(&p, a, s)) {
2920         parts_float_to_float(&p, s);
2921     } else {
2922         parts_default_nan(&p, s);
2923     }
2924     return float128_round_pack_canonical(&p, s);
2925 }
2926
2927 floatx80 float32_to_floatx80(float32 a, float_status *s)
2928 {
2929     FloatParts64 p64;
2930     FloatParts128 p128;
2931
2932     float32_unpack_canonical(&p64, a, s);
2933     parts_float_to_float_widen(&p128, &p64, s);
2934     return floatx80_round_pack_canonical(&p128, s);
2935 }
2936
2937 floatx80 float64_to_floatx80(float64 a, float_status *s)
2938 {
2939     FloatParts64 p64;
2940     FloatParts128 p128;
2941
2942     float64_unpack_canonical(&p64, a, s);
2943     parts_float_to_float_widen(&p128, &p64, s);
2944     return floatx80_round_pack_canonical(&p128, s);
2945 }
2946
2947 floatx80 float128_to_floatx80(float128 a, float_status *s)
2948 {
2949     FloatParts128 p;
2950
2951     float128_unpack_canonical(&p, a, s);
2952     parts_float_to_float(&p, s);
2953     return floatx80_round_pack_canonical(&p, s);
2954 }
2955
2956 /*
2957  * Round to integral value
2958  */
2959
2960 float16 float16_round_to_int(float16 a, float_status *s)
2961 {
2962     FloatParts64 p;
2963
2964     float16_unpack_canonical(&p, a, s);
2965     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2966     return float16_round_pack_canonical(&p, s);
2967 }
2968
2969 float32 float32_round_to_int(float32 a, float_status *s)
2970 {
2971     FloatParts64 p;
2972
2973     float32_unpack_canonical(&p, a, s);
2974     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2975     return float32_round_pack_canonical(&p, s);
2976 }
2977
2978 float64 float64_round_to_int(float64 a, float_status *s)
2979 {
2980     FloatParts64 p;
2981
2982     float64_unpack_canonical(&p, a, s);
2983     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2984     return float64_round_pack_canonical(&p, s);
2985 }
2986
2987 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2988 {
2989     FloatParts64 p;
2990
2991     bfloat16_unpack_canonical(&p, a, s);
2992     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2993     return bfloat16_round_pack_canonical(&p, s);
2994 }
2995
2996 float128 float128_round_to_int(float128 a, float_status *s)
2997 {
2998     FloatParts128 p;
2999
3000     float128_unpack_canonical(&p, a, s);
3001     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3002     return float128_round_pack_canonical(&p, s);
3003 }
3004
3005 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3006 {
3007     FloatParts128 p;
3008
3009     if (!floatx80_unpack_canonical(&p, a, status)) {
3010         return floatx80_default_nan(status);
3011     }
3012
3013     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3014                        &floatx80_params[status->floatx80_rounding_precision]);
3015     return floatx80_round_pack_canonical(&p, status);
3016 }
3017
3018 /*
3019  * Floating-point to signed integer conversions
3020  */
3021
3022 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3023                               float_status *s)
3024 {
3025     FloatParts64 p;
3026
3027     float16_unpack_canonical(&p, a, s);
3028     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3029 }
3030
3031 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3032                                 float_status *s)
3033 {
3034     FloatParts64 p;
3035
3036     float16_unpack_canonical(&p, a, s);
3037     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3038 }
3039
3040 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3041                                 float_status *s)
3042 {
3043     FloatParts64 p;
3044
3045     float16_unpack_canonical(&p, a, s);
3046     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3047 }
3048
3049 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3050                                 float_status *s)
3051 {
3052     FloatParts64 p;
3053
3054     float16_unpack_canonical(&p, a, s);
3055     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3056 }
3057
3058 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3059                                 float_status *s)
3060 {
3061     FloatParts64 p;
3062
3063     float32_unpack_canonical(&p, a, s);
3064     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3065 }
3066
3067 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3068                                 float_status *s)
3069 {
3070     FloatParts64 p;
3071
3072     float32_unpack_canonical(&p, a, s);
3073     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3074 }
3075
3076 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3077                                 float_status *s)
3078 {
3079     FloatParts64 p;
3080
3081     float32_unpack_canonical(&p, a, s);
3082     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3083 }
3084
3085 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3086                                 float_status *s)
3087 {
3088     FloatParts64 p;
3089
3090     float64_unpack_canonical(&p, a, s);
3091     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3092 }
3093
3094 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3095                                 float_status *s)
3096 {
3097     FloatParts64 p;
3098
3099     float64_unpack_canonical(&p, a, s);
3100     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3101 }
3102
3103 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3104                                 float_status *s)
3105 {
3106     FloatParts64 p;
3107
3108     float64_unpack_canonical(&p, a, s);
3109     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3110 }
3111
3112 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3113                                  float_status *s)
3114 {
3115     FloatParts64 p;
3116
3117     bfloat16_unpack_canonical(&p, a, s);
3118     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3119 }
3120
3121 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3122                                  float_status *s)
3123 {
3124     FloatParts64 p;
3125
3126     bfloat16_unpack_canonical(&p, a, s);
3127     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3128 }
3129
3130 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3131                                  float_status *s)
3132 {
3133     FloatParts64 p;
3134
3135     bfloat16_unpack_canonical(&p, a, s);
3136     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3137 }
3138
3139 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3140                                         int scale, float_status *s)
3141 {
3142     FloatParts128 p;
3143
3144     float128_unpack_canonical(&p, a, s);
3145     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3146 }
3147
3148 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3149                                         int scale, float_status *s)
3150 {
3151     FloatParts128 p;
3152
3153     float128_unpack_canonical(&p, a, s);
3154     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3155 }
3156
3157 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3158                                         int scale, float_status *s)
3159 {
3160     int flags = 0;
3161     Int128 r;
3162     FloatParts128 p;
3163
3164     float128_unpack_canonical(&p, a, s);
3165
3166     switch (p.cls) {
3167     case float_class_snan:
3168         flags |= float_flag_invalid_snan;
3169         /* fall through */
3170     case float_class_qnan:
3171         flags |= float_flag_invalid;
3172         r = UINT128_MAX;
3173         break;
3174
3175     case float_class_inf:
3176         flags = float_flag_invalid | float_flag_invalid_cvti;
3177         r = p.sign ? INT128_MIN : INT128_MAX;
3178         break;
3179
3180     case float_class_zero:
3181         return int128_zero();
3182
3183     case float_class_normal:
3184         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3185             flags = float_flag_inexact;
3186         }
3187
3188         if (p.exp < 127) {
3189             int shift = 127 - p.exp;
3190             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3191             if (p.sign) {
3192                 r = int128_neg(r);
3193             }
3194         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3195                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3196             r = INT128_MIN;
3197         } else {
3198             flags = float_flag_invalid | float_flag_invalid_cvti;
3199             r = p.sign ? INT128_MIN : INT128_MAX;
3200         }
3201         break;
3202
3203     default:
3204         g_assert_not_reached();
3205     }
3206
3207     float_raise(flags, s);
3208     return r;
3209 }
3210
3211 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3212                                         int scale, float_status *s)
3213 {
3214     FloatParts128 p;
3215
3216     if (!floatx80_unpack_canonical(&p, a, s)) {
3217         parts_default_nan(&p, s);
3218     }
3219     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3220 }
3221
3222 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3223                                         int scale, float_status *s)
3224 {
3225     FloatParts128 p;
3226
3227     if (!floatx80_unpack_canonical(&p, a, s)) {
3228         parts_default_nan(&p, s);
3229     }
3230     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3231 }
3232
3233 int8_t float16_to_int8(float16 a, float_status *s)
3234 {
3235     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3236 }
3237
3238 int16_t float16_to_int16(float16 a, float_status *s)
3239 {
3240     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3241 }
3242
3243 int32_t float16_to_int32(float16 a, float_status *s)
3244 {
3245     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3246 }
3247
3248 int64_t float16_to_int64(float16 a, float_status *s)
3249 {
3250     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3251 }
3252
3253 int16_t float32_to_int16(float32 a, float_status *s)
3254 {
3255     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3256 }
3257
3258 int32_t float32_to_int32(float32 a, float_status *s)
3259 {
3260     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3261 }
3262
3263 int64_t float32_to_int64(float32 a, float_status *s)
3264 {
3265     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3266 }
3267
3268 int16_t float64_to_int16(float64 a, float_status *s)
3269 {
3270     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3271 }
3272
3273 int32_t float64_to_int32(float64 a, float_status *s)
3274 {
3275     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3276 }
3277
3278 int64_t float64_to_int64(float64 a, float_status *s)
3279 {
3280     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3281 }
3282
3283 int32_t float128_to_int32(float128 a, float_status *s)
3284 {
3285     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3286 }
3287
3288 int64_t float128_to_int64(float128 a, float_status *s)
3289 {
3290     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3291 }
3292
3293 Int128 float128_to_int128(float128 a, float_status *s)
3294 {
3295     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3296 }
3297
3298 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3299 {
3300     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3301 }
3302
3303 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3304 {
3305     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3306 }
3307
3308 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3309 {
3310     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3311 }
3312
3313 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3314 {
3315     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3316 }
3317
3318 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3319 {
3320     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3321 }
3322
3323 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3324 {
3325     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3326 }
3327
3328 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3329 {
3330     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3331 }
3332
3333 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3334 {
3335     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3336 }
3337
3338 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3339 {
3340     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3341 }
3342
3343 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3344 {
3345     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3346 }
3347
3348 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3349 {
3350     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3351 }
3352
3353 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3354 {
3355     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3356 }
3357
3358 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3359 {
3360     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3361 }
3362
3363 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3364 {
3365     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3366 }
3367
3368 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3369 {
3370     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3371 }
3372
3373 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3374 {
3375     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3376 }
3377
3378 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3379 {
3380     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3381 }
3382
3383 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3384 {
3385     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3386 }
3387
3388 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3389 {
3390     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3391 }
3392
3393 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3394 {
3395     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3396 }
3397
3398 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3399 {
3400     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3401 }
3402
3403 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3404 {
3405     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3406 }
3407
3408 /*
3409  * Floating-point to unsigned integer conversions
3410  */
3411
3412 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3413                                 float_status *s)
3414 {
3415     FloatParts64 p;
3416
3417     float16_unpack_canonical(&p, a, s);
3418     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3419 }
3420
3421 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3422                                   float_status *s)
3423 {
3424     FloatParts64 p;
3425
3426     float16_unpack_canonical(&p, a, s);
3427     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3428 }
3429
3430 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3431                                   float_status *s)
3432 {
3433     FloatParts64 p;
3434
3435     float16_unpack_canonical(&p, a, s);
3436     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3437 }
3438
3439 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3440                                   float_status *s)
3441 {
3442     FloatParts64 p;
3443
3444     float16_unpack_canonical(&p, a, s);
3445     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3446 }
3447
3448 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3449                                   float_status *s)
3450 {
3451     FloatParts64 p;
3452
3453     float32_unpack_canonical(&p, a, s);
3454     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3455 }
3456
3457 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3458                                   float_status *s)
3459 {
3460     FloatParts64 p;
3461
3462     float32_unpack_canonical(&p, a, s);
3463     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3464 }
3465
3466 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3467                                   float_status *s)
3468 {
3469     FloatParts64 p;
3470
3471     float32_unpack_canonical(&p, a, s);
3472     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3473 }
3474
3475 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3476                                   float_status *s)
3477 {
3478     FloatParts64 p;
3479
3480     float64_unpack_canonical(&p, a, s);
3481     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3482 }
3483
3484 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3485                                   float_status *s)
3486 {
3487     FloatParts64 p;
3488
3489     float64_unpack_canonical(&p, a, s);
3490     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3491 }
3492
3493 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3494                                   float_status *s)
3495 {
3496     FloatParts64 p;
3497
3498     float64_unpack_canonical(&p, a, s);
3499     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3500 }
3501
3502 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3503                                    int scale, float_status *s)
3504 {
3505     FloatParts64 p;
3506
3507     bfloat16_unpack_canonical(&p, a, s);
3508     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3509 }
3510
3511 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3512                                    int scale, float_status *s)
3513 {
3514     FloatParts64 p;
3515
3516     bfloat16_unpack_canonical(&p, a, s);
3517     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3518 }
3519
3520 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3521                                    int scale, float_status *s)
3522 {
3523     FloatParts64 p;
3524
3525     bfloat16_unpack_canonical(&p, a, s);
3526     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3527 }
3528
3529 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3530                                           int scale, float_status *s)
3531 {
3532     FloatParts128 p;
3533
3534     float128_unpack_canonical(&p, a, s);
3535     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3536 }
3537
3538 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3539                                           int scale, float_status *s)
3540 {
3541     FloatParts128 p;
3542
3543     float128_unpack_canonical(&p, a, s);
3544     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3545 }
3546
3547 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3548                                          int scale, float_status *s)
3549 {
3550     int flags = 0;
3551     Int128 r;
3552     FloatParts128 p;
3553
3554     float128_unpack_canonical(&p, a, s);
3555
3556     switch (p.cls) {
3557     case float_class_snan:
3558         flags |= float_flag_invalid_snan;
3559         /* fall through */
3560     case float_class_qnan:
3561         flags |= float_flag_invalid;
3562         r = UINT128_MAX;
3563         break;
3564
3565     case float_class_inf:
3566         flags = float_flag_invalid | float_flag_invalid_cvti;
3567         r = p.sign ? int128_zero() : UINT128_MAX;
3568         break;
3569
3570     case float_class_zero:
3571         return int128_zero();
3572
3573     case float_class_normal:
3574         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3575             flags = float_flag_inexact;
3576             if (p.cls == float_class_zero) {
3577                 r = int128_zero();
3578                 break;
3579             }
3580         }
3581
3582         if (p.sign) {
3583             flags = float_flag_invalid | float_flag_invalid_cvti;
3584             r = int128_zero();
3585         } else if (p.exp <= 127) {
3586             int shift = 127 - p.exp;
3587             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3588         } else {
3589             flags = float_flag_invalid | float_flag_invalid_cvti;
3590             r = UINT128_MAX;
3591         }
3592         break;
3593
3594     default:
3595         g_assert_not_reached();
3596     }
3597
3598     float_raise(flags, s);
3599     return r;
3600 }
3601
3602 uint8_t float16_to_uint8(float16 a, float_status *s)
3603 {
3604     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3605 }
3606
3607 uint16_t float16_to_uint16(float16 a, float_status *s)
3608 {
3609     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3610 }
3611
3612 uint32_t float16_to_uint32(float16 a, float_status *s)
3613 {
3614     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3615 }
3616
3617 uint64_t float16_to_uint64(float16 a, float_status *s)
3618 {
3619     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3620 }
3621
3622 uint16_t float32_to_uint16(float32 a, float_status *s)
3623 {
3624     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3625 }
3626
3627 uint32_t float32_to_uint32(float32 a, float_status *s)
3628 {
3629     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3630 }
3631
3632 uint64_t float32_to_uint64(float32 a, float_status *s)
3633 {
3634     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3635 }
3636
3637 uint16_t float64_to_uint16(float64 a, float_status *s)
3638 {
3639     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3640 }
3641
3642 uint32_t float64_to_uint32(float64 a, float_status *s)
3643 {
3644     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3645 }
3646
3647 uint64_t float64_to_uint64(float64 a, float_status *s)
3648 {
3649     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3650 }
3651
3652 uint32_t float128_to_uint32(float128 a, float_status *s)
3653 {
3654     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3655 }
3656
3657 uint64_t float128_to_uint64(float128 a, float_status *s)
3658 {
3659     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3660 }
3661
3662 Int128 float128_to_uint128(float128 a, float_status *s)
3663 {
3664     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3665 }
3666
3667 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3668 {
3669     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3670 }
3671
3672 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3673 {
3674     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3675 }
3676
3677 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3678 {
3679     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3680 }
3681
3682 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3683 {
3684     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3685 }
3686
3687 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3688 {
3689     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3690 }
3691
3692 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3693 {
3694     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3695 }
3696
3697 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3698 {
3699     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3700 }
3701
3702 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3703 {
3704     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3705 }
3706
3707 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3708 {
3709     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3710 }
3711
3712 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3713 {
3714     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3715 }
3716
3717 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3718 {
3719     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3720 }
3721
3722 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3723 {
3724     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3725 }
3726
3727 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3728 {
3729     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3730 }
3731
3732 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3733 {
3734     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3735 }
3736
3737 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3738 {
3739     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3740 }
3741
3742 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3743 {
3744     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3745 }
3746
3747 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3748 {
3749     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3750 }
3751
3752 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3753 {
3754     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3755 }
3756
3757 /*
3758  * Signed integer to floating-point conversions
3759  */
3760
3761 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3762 {
3763     FloatParts64 p;
3764
3765     parts_sint_to_float(&p, a, scale, status);
3766     return float16_round_pack_canonical(&p, status);
3767 }
3768
3769 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3770 {
3771     return int64_to_float16_scalbn(a, scale, status);
3772 }
3773
3774 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3775 {
3776     return int64_to_float16_scalbn(a, scale, status);
3777 }
3778
3779 float16 int64_to_float16(int64_t a, float_status *status)
3780 {
3781     return int64_to_float16_scalbn(a, 0, status);
3782 }
3783
3784 float16 int32_to_float16(int32_t a, float_status *status)
3785 {
3786     return int64_to_float16_scalbn(a, 0, status);
3787 }
3788
3789 float16 int16_to_float16(int16_t a, float_status *status)
3790 {
3791     return int64_to_float16_scalbn(a, 0, status);
3792 }
3793
3794 float16 int8_to_float16(int8_t a, float_status *status)
3795 {
3796     return int64_to_float16_scalbn(a, 0, status);
3797 }
3798
3799 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3800 {
3801     FloatParts64 p;
3802
3803     /* Without scaling, there are no overflow concerns. */
3804     if (likely(scale == 0) && can_use_fpu(status)) {
3805         union_float32 ur;
3806         ur.h = a;
3807         return ur.s;
3808     }
3809
3810     parts64_sint_to_float(&p, a, scale, status);
3811     return float32_round_pack_canonical(&p, status);
3812 }
3813
3814 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3815 {
3816     return int64_to_float32_scalbn(a, scale, status);
3817 }
3818
3819 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3820 {
3821     return int64_to_float32_scalbn(a, scale, status);
3822 }
3823
3824 float32 int64_to_float32(int64_t a, float_status *status)
3825 {
3826     return int64_to_float32_scalbn(a, 0, status);
3827 }
3828
3829 float32 int32_to_float32(int32_t a, float_status *status)
3830 {
3831     return int64_to_float32_scalbn(a, 0, status);
3832 }
3833
3834 float32 int16_to_float32(int16_t a, float_status *status)
3835 {
3836     return int64_to_float32_scalbn(a, 0, status);
3837 }
3838
3839 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3840 {
3841     FloatParts64 p;
3842
3843     /* Without scaling, there are no overflow concerns. */
3844     if (likely(scale == 0) && can_use_fpu(status)) {
3845         union_float64 ur;
3846         ur.h = a;
3847         return ur.s;
3848     }
3849
3850     parts_sint_to_float(&p, a, scale, status);
3851     return float64_round_pack_canonical(&p, status);
3852 }
3853
3854 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3855 {
3856     return int64_to_float64_scalbn(a, scale, status);
3857 }
3858
3859 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3860 {
3861     return int64_to_float64_scalbn(a, scale, status);
3862 }
3863
3864 float64 int64_to_float64(int64_t a, float_status *status)
3865 {
3866     return int64_to_float64_scalbn(a, 0, status);
3867 }
3868
3869 float64 int32_to_float64(int32_t a, float_status *status)
3870 {
3871     return int64_to_float64_scalbn(a, 0, status);
3872 }
3873
3874 float64 int16_to_float64(int16_t a, float_status *status)
3875 {
3876     return int64_to_float64_scalbn(a, 0, status);
3877 }
3878
3879 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3880 {
3881     FloatParts64 p;
3882
3883     parts_sint_to_float(&p, a, scale, status);
3884     return bfloat16_round_pack_canonical(&p, status);
3885 }
3886
3887 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3888 {
3889     return int64_to_bfloat16_scalbn(a, scale, status);
3890 }
3891
3892 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3893 {
3894     return int64_to_bfloat16_scalbn(a, scale, status);
3895 }
3896
3897 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3898 {
3899     return int64_to_bfloat16_scalbn(a, 0, status);
3900 }
3901
3902 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3903 {
3904     return int64_to_bfloat16_scalbn(a, 0, status);
3905 }
3906
3907 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3908 {
3909     return int64_to_bfloat16_scalbn(a, 0, status);
3910 }
3911
3912 float128 int128_to_float128(Int128 a, float_status *status)
3913 {
3914     FloatParts128 p = { };
3915     int shift;
3916
3917     if (int128_nz(a)) {
3918         p.cls = float_class_normal;
3919         if (!int128_nonneg(a)) {
3920             p.sign = true;
3921             a = int128_neg(a);
3922         }
3923
3924         shift = clz64(int128_gethi(a));
3925         if (shift == 64) {
3926             shift += clz64(int128_getlo(a));
3927         }
3928
3929         p.exp = 127 - shift;
3930         a = int128_lshift(a, shift);
3931
3932         p.frac_hi = int128_gethi(a);
3933         p.frac_lo = int128_getlo(a);
3934     } else {
3935         p.cls = float_class_zero;
3936     }
3937
3938     return float128_round_pack_canonical(&p, status);
3939 }
3940
3941 float128 int64_to_float128(int64_t a, float_status *status)
3942 {
3943     FloatParts128 p;
3944
3945     parts_sint_to_float(&p, a, 0, status);
3946     return float128_round_pack_canonical(&p, status);
3947 }
3948
3949 float128 int32_to_float128(int32_t a, float_status *status)
3950 {
3951     return int64_to_float128(a, status);
3952 }
3953
3954 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3955 {
3956     FloatParts128 p;
3957
3958     parts_sint_to_float(&p, a, 0, status);
3959     return floatx80_round_pack_canonical(&p, status);
3960 }
3961
3962 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3963 {
3964     return int64_to_floatx80(a, status);
3965 }
3966
3967 /*
3968  * Unsigned Integer to floating-point conversions
3969  */
3970
3971 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3972 {
3973     FloatParts64 p;
3974
3975     parts_uint_to_float(&p, a, scale, status);
3976     return float16_round_pack_canonical(&p, status);
3977 }
3978
3979 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3980 {
3981     return uint64_to_float16_scalbn(a, scale, status);
3982 }
3983
3984 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3985 {
3986     return uint64_to_float16_scalbn(a, scale, status);
3987 }
3988
3989 float16 uint64_to_float16(uint64_t a, float_status *status)
3990 {
3991     return uint64_to_float16_scalbn(a, 0, status);
3992 }
3993
3994 float16 uint32_to_float16(uint32_t a, float_status *status)
3995 {
3996     return uint64_to_float16_scalbn(a, 0, status);
3997 }
3998
3999 float16 uint16_to_float16(uint16_t a, float_status *status)
4000 {
4001     return uint64_to_float16_scalbn(a, 0, status);
4002 }
4003
4004 float16 uint8_to_float16(uint8_t a, float_status *status)
4005 {
4006     return uint64_to_float16_scalbn(a, 0, status);
4007 }
4008
4009 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4010 {
4011     FloatParts64 p;
4012
4013     /* Without scaling, there are no overflow concerns. */
4014     if (likely(scale == 0) && can_use_fpu(status)) {
4015         union_float32 ur;
4016         ur.h = a;
4017         return ur.s;
4018     }
4019
4020     parts_uint_to_float(&p, a, scale, status);
4021     return float32_round_pack_canonical(&p, status);
4022 }
4023
4024 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4025 {
4026     return uint64_to_float32_scalbn(a, scale, status);
4027 }
4028
4029 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4030 {
4031     return uint64_to_float32_scalbn(a, scale, status);
4032 }
4033
4034 float32 uint64_to_float32(uint64_t a, float_status *status)
4035 {
4036     return uint64_to_float32_scalbn(a, 0, status);
4037 }
4038
4039 float32 uint32_to_float32(uint32_t a, float_status *status)
4040 {
4041     return uint64_to_float32_scalbn(a, 0, status);
4042 }
4043
4044 float32 uint16_to_float32(uint16_t a, float_status *status)
4045 {
4046     return uint64_to_float32_scalbn(a, 0, status);
4047 }
4048
4049 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4050 {
4051     FloatParts64 p;
4052
4053     /* Without scaling, there are no overflow concerns. */
4054     if (likely(scale == 0) && can_use_fpu(status)) {
4055         union_float64 ur;
4056         ur.h = a;
4057         return ur.s;
4058     }
4059
4060     parts_uint_to_float(&p, a, scale, status);
4061     return float64_round_pack_canonical(&p, status);
4062 }
4063
4064 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4065 {
4066     return uint64_to_float64_scalbn(a, scale, status);
4067 }
4068
4069 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4070 {
4071     return uint64_to_float64_scalbn(a, scale, status);
4072 }
4073
4074 float64 uint64_to_float64(uint64_t a, float_status *status)
4075 {
4076     return uint64_to_float64_scalbn(a, 0, status);
4077 }
4078
4079 float64 uint32_to_float64(uint32_t a, float_status *status)
4080 {
4081     return uint64_to_float64_scalbn(a, 0, status);
4082 }
4083
4084 float64 uint16_to_float64(uint16_t a, float_status *status)
4085 {
4086     return uint64_to_float64_scalbn(a, 0, status);
4087 }
4088
4089 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4090 {
4091     FloatParts64 p;
4092
4093     parts_uint_to_float(&p, a, scale, status);
4094     return bfloat16_round_pack_canonical(&p, status);
4095 }
4096
4097 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4098 {
4099     return uint64_to_bfloat16_scalbn(a, scale, status);
4100 }
4101
4102 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4103 {
4104     return uint64_to_bfloat16_scalbn(a, scale, status);
4105 }
4106
4107 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4108 {
4109     return uint64_to_bfloat16_scalbn(a, 0, status);
4110 }
4111
4112 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4113 {
4114     return uint64_to_bfloat16_scalbn(a, 0, status);
4115 }
4116
4117 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4118 {
4119     return uint64_to_bfloat16_scalbn(a, 0, status);
4120 }
4121
4122 float128 uint64_to_float128(uint64_t a, float_status *status)
4123 {
4124     FloatParts128 p;
4125
4126     parts_uint_to_float(&p, a, 0, status);
4127     return float128_round_pack_canonical(&p, status);
4128 }
4129
4130 float128 uint128_to_float128(Int128 a, float_status *status)
4131 {
4132     FloatParts128 p = { };
4133     int shift;
4134
4135     if (int128_nz(a)) {
4136         p.cls = float_class_normal;
4137
4138         shift = clz64(int128_gethi(a));
4139         if (shift == 64) {
4140             shift += clz64(int128_getlo(a));
4141         }
4142
4143         p.exp = 127 - shift;
4144         a = int128_lshift(a, shift);
4145
4146         p.frac_hi = int128_gethi(a);
4147         p.frac_lo = int128_getlo(a);
4148     } else {
4149         p.cls = float_class_zero;
4150     }
4151
4152     return float128_round_pack_canonical(&p, status);
4153 }
4154
4155 /*
4156  * Minimum and maximum
4157  */
4158
4159 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4160 {
4161     FloatParts64 pa, pb, *pr;
4162
4163     float16_unpack_canonical(&pa, a, s);
4164     float16_unpack_canonical(&pb, b, s);
4165     pr = parts_minmax(&pa, &pb, s, flags);
4166
4167     return float16_round_pack_canonical(pr, s);
4168 }
4169
4170 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4171                                 float_status *s, int flags)
4172 {
4173     FloatParts64 pa, pb, *pr;
4174
4175     bfloat16_unpack_canonical(&pa, a, s);
4176     bfloat16_unpack_canonical(&pb, b, s);
4177     pr = parts_minmax(&pa, &pb, s, flags);
4178
4179     return bfloat16_round_pack_canonical(pr, s);
4180 }
4181
4182 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4183 {
4184     FloatParts64 pa, pb, *pr;
4185
4186     float32_unpack_canonical(&pa, a, s);
4187     float32_unpack_canonical(&pb, b, s);
4188     pr = parts_minmax(&pa, &pb, s, flags);
4189
4190     return float32_round_pack_canonical(pr, s);
4191 }
4192
4193 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4194 {
4195     FloatParts64 pa, pb, *pr;
4196
4197     float64_unpack_canonical(&pa, a, s);
4198     float64_unpack_canonical(&pb, b, s);
4199     pr = parts_minmax(&pa, &pb, s, flags);
4200
4201     return float64_round_pack_canonical(pr, s);
4202 }
4203
4204 static float128 float128_minmax(float128 a, float128 b,
4205                                 float_status *s, int flags)
4206 {
4207     FloatParts128 pa, pb, *pr;
4208
4209     float128_unpack_canonical(&pa, a, s);
4210     float128_unpack_canonical(&pb, b, s);
4211     pr = parts_minmax(&pa, &pb, s, flags);
4212
4213     return float128_round_pack_canonical(pr, s);
4214 }
4215
4216 #define MINMAX_1(type, name, flags) \
4217     type type##_##name(type a, type b, float_status *s) \
4218     { return type##_minmax(a, b, s, flags); }
4219
4220 #define MINMAX_2(type) \
4221     MINMAX_1(type, max, 0)                                                \
4222     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4223     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4224     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4225     MINMAX_1(type, min, minmax_ismin)                                     \
4226     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4227     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4228     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4229
4230 MINMAX_2(float16)
4231 MINMAX_2(bfloat16)
4232 MINMAX_2(float32)
4233 MINMAX_2(float64)
4234 MINMAX_2(float128)
4235
4236 #undef MINMAX_1
4237 #undef MINMAX_2
4238
4239 /*
4240  * Floating point compare
4241  */
4242
4243 static FloatRelation QEMU_FLATTEN
4244 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4245 {
4246     FloatParts64 pa, pb;
4247
4248     float16_unpack_canonical(&pa, a, s);
4249     float16_unpack_canonical(&pb, b, s);
4250     return parts_compare(&pa, &pb, s, is_quiet);
4251 }
4252
4253 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4254 {
4255     return float16_do_compare(a, b, s, false);
4256 }
4257
4258 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4259 {
4260     return float16_do_compare(a, b, s, true);
4261 }
4262
4263 static FloatRelation QEMU_SOFTFLOAT_ATTR
4264 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4265 {
4266     FloatParts64 pa, pb;
4267
4268     float32_unpack_canonical(&pa, a, s);
4269     float32_unpack_canonical(&pb, b, s);
4270     return parts_compare(&pa, &pb, s, is_quiet);
4271 }
4272
4273 static FloatRelation QEMU_FLATTEN
4274 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4275 {
4276     union_float32 ua, ub;
4277
4278     ua.s = xa;
4279     ub.s = xb;
4280
4281     if (QEMU_NO_HARDFLOAT) {
4282         goto soft;
4283     }
4284
4285     float32_input_flush2(&ua.s, &ub.s, s);
4286     if (isgreaterequal(ua.h, ub.h)) {
4287         if (isgreater(ua.h, ub.h)) {
4288             return float_relation_greater;
4289         }
4290         return float_relation_equal;
4291     }
4292     if (likely(isless(ua.h, ub.h))) {
4293         return float_relation_less;
4294     }
4295     /*
4296      * The only condition remaining is unordered.
4297      * Fall through to set flags.
4298      */
4299  soft:
4300     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4301 }
4302
4303 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4304 {
4305     return float32_hs_compare(a, b, s, false);
4306 }
4307
4308 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4309 {
4310     return float32_hs_compare(a, b, s, true);
4311 }
4312
4313 static FloatRelation QEMU_SOFTFLOAT_ATTR
4314 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4315 {
4316     FloatParts64 pa, pb;
4317
4318     float64_unpack_canonical(&pa, a, s);
4319     float64_unpack_canonical(&pb, b, s);
4320     return parts_compare(&pa, &pb, s, is_quiet);
4321 }
4322
4323 static FloatRelation QEMU_FLATTEN
4324 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4325 {
4326     union_float64 ua, ub;
4327
4328     ua.s = xa;
4329     ub.s = xb;
4330
4331     if (QEMU_NO_HARDFLOAT) {
4332         goto soft;
4333     }
4334
4335     float64_input_flush2(&ua.s, &ub.s, s);
4336     if (isgreaterequal(ua.h, ub.h)) {
4337         if (isgreater(ua.h, ub.h)) {
4338             return float_relation_greater;
4339         }
4340         return float_relation_equal;
4341     }
4342     if (likely(isless(ua.h, ub.h))) {
4343         return float_relation_less;
4344     }
4345     /*
4346      * The only condition remaining is unordered.
4347      * Fall through to set flags.
4348      */
4349  soft:
4350     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4351 }
4352
4353 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4354 {
4355     return float64_hs_compare(a, b, s, false);
4356 }
4357
4358 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4359 {
4360     return float64_hs_compare(a, b, s, true);
4361 }
4362
4363 static FloatRelation QEMU_FLATTEN
4364 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4365 {
4366     FloatParts64 pa, pb;
4367
4368     bfloat16_unpack_canonical(&pa, a, s);
4369     bfloat16_unpack_canonical(&pb, b, s);
4370     return parts_compare(&pa, &pb, s, is_quiet);
4371 }
4372
4373 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4374 {
4375     return bfloat16_do_compare(a, b, s, false);
4376 }
4377
4378 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4379 {
4380     return bfloat16_do_compare(a, b, s, true);
4381 }
4382
4383 static FloatRelation QEMU_FLATTEN
4384 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4385 {
4386     FloatParts128 pa, pb;
4387
4388     float128_unpack_canonical(&pa, a, s);
4389     float128_unpack_canonical(&pb, b, s);
4390     return parts_compare(&pa, &pb, s, is_quiet);
4391 }
4392
4393 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4394 {
4395     return float128_do_compare(a, b, s, false);
4396 }
4397
4398 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4399 {
4400     return float128_do_compare(a, b, s, true);
4401 }
4402
4403 static FloatRelation QEMU_FLATTEN
4404 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4405 {
4406     FloatParts128 pa, pb;
4407
4408     if (!floatx80_unpack_canonical(&pa, a, s) ||
4409         !floatx80_unpack_canonical(&pb, b, s)) {
4410         return float_relation_unordered;
4411     }
4412     return parts_compare(&pa, &pb, s, is_quiet);
4413 }
4414
4415 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4416 {
4417     return floatx80_do_compare(a, b, s, false);
4418 }
4419
4420 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4421 {
4422     return floatx80_do_compare(a, b, s, true);
4423 }
4424
4425 /*
4426  * Scale by 2**N
4427  */
4428
4429 float16 float16_scalbn(float16 a, int n, float_status *status)
4430 {
4431     FloatParts64 p;
4432
4433     float16_unpack_canonical(&p, a, status);
4434     parts_scalbn(&p, n, status);
4435     return float16_round_pack_canonical(&p, status);
4436 }
4437
4438 float32 float32_scalbn(float32 a, int n, float_status *status)
4439 {
4440     FloatParts64 p;
4441
4442     float32_unpack_canonical(&p, a, status);
4443     parts_scalbn(&p, n, status);
4444     return float32_round_pack_canonical(&p, status);
4445 }
4446
4447 float64 float64_scalbn(float64 a, int n, float_status *status)
4448 {
4449     FloatParts64 p;
4450
4451     float64_unpack_canonical(&p, a, status);
4452     parts_scalbn(&p, n, status);
4453     return float64_round_pack_canonical(&p, status);
4454 }
4455
4456 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4457 {
4458     FloatParts64 p;
4459
4460     bfloat16_unpack_canonical(&p, a, status);
4461     parts_scalbn(&p, n, status);
4462     return bfloat16_round_pack_canonical(&p, status);
4463 }
4464
4465 float128 float128_scalbn(float128 a, int n, float_status *status)
4466 {
4467     FloatParts128 p;
4468
4469     float128_unpack_canonical(&p, a, status);
4470     parts_scalbn(&p, n, status);
4471     return float128_round_pack_canonical(&p, status);
4472 }
4473
4474 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4475 {
4476     FloatParts128 p;
4477
4478     if (!floatx80_unpack_canonical(&p, a, status)) {
4479         return floatx80_default_nan(status);
4480     }
4481     parts_scalbn(&p, n, status);
4482     return floatx80_round_pack_canonical(&p, status);
4483 }
4484
4485 /*
4486  * Square Root
4487  */
4488
4489 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4490 {
4491     FloatParts64 p;
4492
4493     float16_unpack_canonical(&p, a, status);
4494     parts_sqrt(&p, status, &float16_params);
4495     return float16_round_pack_canonical(&p, status);
4496 }
4497
4498 static float32 QEMU_SOFTFLOAT_ATTR
4499 soft_f32_sqrt(float32 a, float_status *status)
4500 {
4501     FloatParts64 p;
4502
4503     float32_unpack_canonical(&p, a, status);
4504     parts_sqrt(&p, status, &float32_params);
4505     return float32_round_pack_canonical(&p, status);
4506 }
4507
4508 static float64 QEMU_SOFTFLOAT_ATTR
4509 soft_f64_sqrt(float64 a, float_status *status)
4510 {
4511     FloatParts64 p;
4512
4513     float64_unpack_canonical(&p, a, status);
4514     parts_sqrt(&p, status, &float64_params);
4515     return float64_round_pack_canonical(&p, status);
4516 }
4517
4518 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4519 {
4520     union_float32 ua, ur;
4521
4522     ua.s = xa;
4523     if (unlikely(!can_use_fpu(s))) {
4524         goto soft;
4525     }
4526
4527     float32_input_flush1(&ua.s, s);
4528     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4529         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4530                        fpclassify(ua.h) == FP_ZERO) ||
4531                      signbit(ua.h))) {
4532             goto soft;
4533         }
4534     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4535                         float32_is_neg(ua.s))) {
4536         goto soft;
4537     }
4538     ur.h = sqrtf(ua.h);
4539     return ur.s;
4540
4541  soft:
4542     return soft_f32_sqrt(ua.s, s);
4543 }
4544
4545 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4546 {
4547     union_float64 ua, ur;
4548
4549     ua.s = xa;
4550     if (unlikely(!can_use_fpu(s))) {
4551         goto soft;
4552     }
4553
4554     float64_input_flush1(&ua.s, s);
4555     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4556         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4557                        fpclassify(ua.h) == FP_ZERO) ||
4558                      signbit(ua.h))) {
4559             goto soft;
4560         }
4561     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4562                         float64_is_neg(ua.s))) {
4563         goto soft;
4564     }
4565     ur.h = sqrt(ua.h);
4566     return ur.s;
4567
4568  soft:
4569     return soft_f64_sqrt(ua.s, s);
4570 }
4571
4572 float64 float64r32_sqrt(float64 a, float_status *status)
4573 {
4574     FloatParts64 p;
4575
4576     float64_unpack_canonical(&p, a, status);
4577     parts_sqrt(&p, status, &float64_params);
4578     return float64r32_round_pack_canonical(&p, status);
4579 }
4580
4581 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4582 {
4583     FloatParts64 p;
4584
4585     bfloat16_unpack_canonical(&p, a, status);
4586     parts_sqrt(&p, status, &bfloat16_params);
4587     return bfloat16_round_pack_canonical(&p, status);
4588 }
4589
4590 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4591 {
4592     FloatParts128 p;
4593
4594     float128_unpack_canonical(&p, a, status);
4595     parts_sqrt(&p, status, &float128_params);
4596     return float128_round_pack_canonical(&p, status);
4597 }
4598
4599 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4600 {
4601     FloatParts128 p;
4602
4603     if (!floatx80_unpack_canonical(&p, a, s)) {
4604         return floatx80_default_nan(s);
4605     }
4606     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4607     return floatx80_round_pack_canonical(&p, s);
4608 }
4609
4610 /*
4611  * log2
4612  */
4613 float32 float32_log2(float32 a, float_status *status)
4614 {
4615     FloatParts64 p;
4616
4617     float32_unpack_canonical(&p, a, status);
4618     parts_log2(&p, status, &float32_params);
4619     return float32_round_pack_canonical(&p, status);
4620 }
4621
4622 float64 float64_log2(float64 a, float_status *status)
4623 {
4624     FloatParts64 p;
4625
4626     float64_unpack_canonical(&p, a, status);
4627     parts_log2(&p, status, &float64_params);
4628     return float64_round_pack_canonical(&p, status);
4629 }
4630
4631 /*----------------------------------------------------------------------------
4632 | The pattern for a default generated NaN.
4633 *----------------------------------------------------------------------------*/
4634
4635 float16 float16_default_nan(float_status *status)
4636 {
4637     FloatParts64 p;
4638
4639     parts_default_nan(&p, status);
4640     p.frac >>= float16_params.frac_shift;
4641     return float16_pack_raw(&p);
4642 }
4643
4644 float32 float32_default_nan(float_status *status)
4645 {
4646     FloatParts64 p;
4647
4648     parts_default_nan(&p, status);
4649     p.frac >>= float32_params.frac_shift;
4650     return float32_pack_raw(&p);
4651 }
4652
4653 float64 float64_default_nan(float_status *status)
4654 {
4655     FloatParts64 p;
4656
4657     parts_default_nan(&p, status);
4658     p.frac >>= float64_params.frac_shift;
4659     return float64_pack_raw(&p);
4660 }
4661
4662 float128 float128_default_nan(float_status *status)
4663 {
4664     FloatParts128 p;
4665
4666     parts_default_nan(&p, status);
4667     frac_shr(&p, float128_params.frac_shift);
4668     return float128_pack_raw(&p);
4669 }
4670
4671 bfloat16 bfloat16_default_nan(float_status *status)
4672 {
4673     FloatParts64 p;
4674
4675     parts_default_nan(&p, status);
4676     p.frac >>= bfloat16_params.frac_shift;
4677     return bfloat16_pack_raw(&p);
4678 }
4679
4680 /*----------------------------------------------------------------------------
4681 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4682 *----------------------------------------------------------------------------*/
4683
4684 float16 float16_silence_nan(float16 a, float_status *status)
4685 {
4686     FloatParts64 p;
4687
4688     float16_unpack_raw(&p, a);
4689     p.frac <<= float16_params.frac_shift;
4690     parts_silence_nan(&p, status);
4691     p.frac >>= float16_params.frac_shift;
4692     return float16_pack_raw(&p);
4693 }
4694
4695 float32 float32_silence_nan(float32 a, float_status *status)
4696 {
4697     FloatParts64 p;
4698
4699     float32_unpack_raw(&p, a);
4700     p.frac <<= float32_params.frac_shift;
4701     parts_silence_nan(&p, status);
4702     p.frac >>= float32_params.frac_shift;
4703     return float32_pack_raw(&p);
4704 }
4705
4706 float64 float64_silence_nan(float64 a, float_status *status)
4707 {
4708     FloatParts64 p;
4709
4710     float64_unpack_raw(&p, a);
4711     p.frac <<= float64_params.frac_shift;
4712     parts_silence_nan(&p, status);
4713     p.frac >>= float64_params.frac_shift;
4714     return float64_pack_raw(&p);
4715 }
4716
4717 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4718 {
4719     FloatParts64 p;
4720
4721     bfloat16_unpack_raw(&p, a);
4722     p.frac <<= bfloat16_params.frac_shift;
4723     parts_silence_nan(&p, status);
4724     p.frac >>= bfloat16_params.frac_shift;
4725     return bfloat16_pack_raw(&p);
4726 }
4727
4728 float128 float128_silence_nan(float128 a, float_status *status)
4729 {
4730     FloatParts128 p;
4731
4732     float128_unpack_raw(&p, a);
4733     frac_shl(&p, float128_params.frac_shift);
4734     parts_silence_nan(&p, status);
4735     frac_shr(&p, float128_params.frac_shift);
4736     return float128_pack_raw(&p);
4737 }
4738
4739 /*----------------------------------------------------------------------------
4740 | If `a' is denormal and we are in flush-to-zero mode then set the
4741 | input-denormal exception and return zero. Otherwise just return the value.
4742 *----------------------------------------------------------------------------*/
4743
4744 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4745 {
4746     if (p.exp == 0 && p.frac != 0) {
4747         float_raise(float_flag_input_denormal, status);
4748         return true;
4749     }
4750
4751     return false;
4752 }
4753
4754 float16 float16_squash_input_denormal(float16 a, float_status *status)
4755 {
4756     if (status->flush_inputs_to_zero) {
4757         FloatParts64 p;
4758
4759         float16_unpack_raw(&p, a);
4760         if (parts_squash_denormal(p, status)) {
4761             return float16_set_sign(float16_zero, p.sign);
4762         }
4763     }
4764     return a;
4765 }
4766
4767 float32 float32_squash_input_denormal(float32 a, float_status *status)
4768 {
4769     if (status->flush_inputs_to_zero) {
4770         FloatParts64 p;
4771
4772         float32_unpack_raw(&p, a);
4773         if (parts_squash_denormal(p, status)) {
4774             return float32_set_sign(float32_zero, p.sign);
4775         }
4776     }
4777     return a;
4778 }
4779
4780 float64 float64_squash_input_denormal(float64 a, float_status *status)
4781 {
4782     if (status->flush_inputs_to_zero) {
4783         FloatParts64 p;
4784
4785         float64_unpack_raw(&p, a);
4786         if (parts_squash_denormal(p, status)) {
4787             return float64_set_sign(float64_zero, p.sign);
4788         }
4789     }
4790     return a;
4791 }
4792
4793 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4794 {
4795     if (status->flush_inputs_to_zero) {
4796         FloatParts64 p;
4797
4798         bfloat16_unpack_raw(&p, a);
4799         if (parts_squash_denormal(p, status)) {
4800             return bfloat16_set_sign(bfloat16_zero, p.sign);
4801         }
4802     }
4803     return a;
4804 }
4805
4806 /*----------------------------------------------------------------------------
4807 | Normalizes the subnormal extended double-precision floating-point value
4808 | represented by the denormalized significand `aSig'.  The normalized exponent
4809 | and significand are stored at the locations pointed to by `zExpPtr' and
4810 | `zSigPtr', respectively.
4811 *----------------------------------------------------------------------------*/
4812
4813 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4814                                 uint64_t *zSigPtr)
4815 {
4816     int8_t shiftCount;
4817
4818     shiftCount = clz64(aSig);
4819     *zSigPtr = aSig<<shiftCount;
4820     *zExpPtr = 1 - shiftCount;
4821 }
4822
4823 /*----------------------------------------------------------------------------
4824 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4825 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4826 | and returns the proper extended double-precision floating-point value
4827 | corresponding to the abstract input.  Ordinarily, the abstract value is
4828 | rounded and packed into the extended double-precision format, with the
4829 | inexact exception raised if the abstract input cannot be represented
4830 | exactly.  However, if the abstract value is too large, the overflow and
4831 | inexact exceptions are raised and an infinity or maximal finite value is
4832 | returned.  If the abstract value is too small, the input value is rounded to
4833 | a subnormal number, and the underflow and inexact exceptions are raised if
4834 | the abstract input cannot be represented exactly as a subnormal extended
4835 | double-precision floating-point number.
4836 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4837 | the result is rounded to the same number of bits as single or double
4838 | precision, respectively.  Otherwise, the result is rounded to the full
4839 | precision of the extended double-precision format.
4840 |     The input significand must be normalized or smaller.  If the input
4841 | significand is not normalized, `zExp' must be 0; in that case, the result
4842 | returned is a subnormal number, and it must not require rounding.  The
4843 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4844 | Floating-Point Arithmetic.
4845 *----------------------------------------------------------------------------*/
4846
4847 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4848                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4849                               float_status *status)
4850 {
4851     FloatRoundMode roundingMode;
4852     bool roundNearestEven, increment, isTiny;
4853     int64_t roundIncrement, roundMask, roundBits;
4854
4855     roundingMode = status->float_rounding_mode;
4856     roundNearestEven = ( roundingMode == float_round_nearest_even );
4857     switch (roundingPrecision) {
4858     case floatx80_precision_x:
4859         goto precision80;
4860     case floatx80_precision_d:
4861         roundIncrement = UINT64_C(0x0000000000000400);
4862         roundMask = UINT64_C(0x00000000000007FF);
4863         break;
4864     case floatx80_precision_s:
4865         roundIncrement = UINT64_C(0x0000008000000000);
4866         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4867         break;
4868     default:
4869         g_assert_not_reached();
4870     }
4871     zSig0 |= ( zSig1 != 0 );
4872     switch (roundingMode) {
4873     case float_round_nearest_even:
4874     case float_round_ties_away:
4875         break;
4876     case float_round_to_zero:
4877         roundIncrement = 0;
4878         break;
4879     case float_round_up:
4880         roundIncrement = zSign ? 0 : roundMask;
4881         break;
4882     case float_round_down:
4883         roundIncrement = zSign ? roundMask : 0;
4884         break;
4885     default:
4886         abort();
4887     }
4888     roundBits = zSig0 & roundMask;
4889     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4890         if (    ( 0x7FFE < zExp )
4891              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4892            ) {
4893             goto overflow;
4894         }
4895         if ( zExp <= 0 ) {
4896             if (status->flush_to_zero) {
4897                 float_raise(float_flag_output_denormal, status);
4898                 return packFloatx80(zSign, 0, 0);
4899             }
4900             isTiny = status->tininess_before_rounding
4901                   || (zExp < 0 )
4902                   || (zSig0 <= zSig0 + roundIncrement);
4903             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4904             zExp = 0;
4905             roundBits = zSig0 & roundMask;
4906             if (isTiny && roundBits) {
4907                 float_raise(float_flag_underflow, status);
4908             }
4909             if (roundBits) {
4910                 float_raise(float_flag_inexact, status);
4911             }
4912             zSig0 += roundIncrement;
4913             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4914             roundIncrement = roundMask + 1;
4915             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4916                 roundMask |= roundIncrement;
4917             }
4918             zSig0 &= ~ roundMask;
4919             return packFloatx80( zSign, zExp, zSig0 );
4920         }
4921     }
4922     if (roundBits) {
4923         float_raise(float_flag_inexact, status);
4924     }
4925     zSig0 += roundIncrement;
4926     if ( zSig0 < roundIncrement ) {
4927         ++zExp;
4928         zSig0 = UINT64_C(0x8000000000000000);
4929     }
4930     roundIncrement = roundMask + 1;
4931     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4932         roundMask |= roundIncrement;
4933     }
4934     zSig0 &= ~ roundMask;
4935     if ( zSig0 == 0 ) zExp = 0;
4936     return packFloatx80( zSign, zExp, zSig0 );
4937  precision80:
4938     switch (roundingMode) {
4939     case float_round_nearest_even:
4940     case float_round_ties_away:
4941         increment = ((int64_t)zSig1 < 0);
4942         break;
4943     case float_round_to_zero:
4944         increment = 0;
4945         break;
4946     case float_round_up:
4947         increment = !zSign && zSig1;
4948         break;
4949     case float_round_down:
4950         increment = zSign && zSig1;
4951         break;
4952     default:
4953         abort();
4954     }
4955     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4956         if (    ( 0x7FFE < zExp )
4957              || (    ( zExp == 0x7FFE )
4958                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4959                   && increment
4960                 )
4961            ) {
4962             roundMask = 0;
4963  overflow:
4964             float_raise(float_flag_overflow | float_flag_inexact, status);
4965             if (    ( roundingMode == float_round_to_zero )
4966                  || ( zSign && ( roundingMode == float_round_up ) )
4967                  || ( ! zSign && ( roundingMode == float_round_down ) )
4968                ) {
4969                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4970             }
4971             return packFloatx80(zSign,
4972                                 floatx80_infinity_high,
4973                                 floatx80_infinity_low);
4974         }
4975         if ( zExp <= 0 ) {
4976             isTiny = status->tininess_before_rounding
4977                   || (zExp < 0)
4978                   || !increment
4979                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4980             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4981             zExp = 0;
4982             if (isTiny && zSig1) {
4983                 float_raise(float_flag_underflow, status);
4984             }
4985             if (zSig1) {
4986                 float_raise(float_flag_inexact, status);
4987             }
4988             switch (roundingMode) {
4989             case float_round_nearest_even:
4990             case float_round_ties_away:
4991                 increment = ((int64_t)zSig1 < 0);
4992                 break;
4993             case float_round_to_zero:
4994                 increment = 0;
4995                 break;
4996             case float_round_up:
4997                 increment = !zSign && zSig1;
4998                 break;
4999             case float_round_down:
5000                 increment = zSign && zSig1;
5001                 break;
5002             default:
5003                 abort();
5004             }
5005             if ( increment ) {
5006                 ++zSig0;
5007                 if (!(zSig1 << 1) && roundNearestEven) {
5008                     zSig0 &= ~1;
5009                 }
5010                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5011             }
5012             return packFloatx80( zSign, zExp, zSig0 );
5013         }
5014     }
5015     if (zSig1) {
5016         float_raise(float_flag_inexact, status);
5017     }
5018     if ( increment ) {
5019         ++zSig0;
5020         if ( zSig0 == 0 ) {
5021             ++zExp;
5022             zSig0 = UINT64_C(0x8000000000000000);
5023         }
5024         else {
5025             if (!(zSig1 << 1) && roundNearestEven) {
5026                 zSig0 &= ~1;
5027             }
5028         }
5029     }
5030     else {
5031         if ( zSig0 == 0 ) zExp = 0;
5032     }
5033     return packFloatx80( zSign, zExp, zSig0 );
5034
5035 }
5036
5037 /*----------------------------------------------------------------------------
5038 | Takes an abstract floating-point value having sign `zSign', exponent
5039 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5040 | and returns the proper extended double-precision floating-point value
5041 | corresponding to the abstract input.  This routine is just like
5042 | `roundAndPackFloatx80' except that the input significand does not have to be
5043 | normalized.
5044 *----------------------------------------------------------------------------*/
5045
5046 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5047                                        bool zSign, int32_t zExp,
5048                                        uint64_t zSig0, uint64_t zSig1,
5049                                        float_status *status)
5050 {
5051     int8_t shiftCount;
5052
5053     if ( zSig0 == 0 ) {
5054         zSig0 = zSig1;
5055         zSig1 = 0;
5056         zExp -= 64;
5057     }
5058     shiftCount = clz64(zSig0);
5059     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5060     zExp -= shiftCount;
5061     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5062                                 zSig0, zSig1, status);
5063
5064 }
5065
5066 /*----------------------------------------------------------------------------
5067 | Returns the binary exponential of the single-precision floating-point value
5068 | `a'. The operation is performed according to the IEC/IEEE Standard for
5069 | Binary Floating-Point Arithmetic.
5070 |
5071 | Uses the following identities:
5072 |
5073 | 1. -------------------------------------------------------------------------
5074 |      x    x*ln(2)
5075 |     2  = e
5076 |
5077 | 2. -------------------------------------------------------------------------
5078 |                      2     3     4     5           n
5079 |      x        x     x     x     x     x           x
5080 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5081 |               1!    2!    3!    4!    5!          n!
5082 *----------------------------------------------------------------------------*/
5083
5084 static const float64 float32_exp2_coefficients[15] =
5085 {
5086     const_float64( 0x3ff0000000000000ll ), /*  1 */
5087     const_float64( 0x3fe0000000000000ll ), /*  2 */
5088     const_float64( 0x3fc5555555555555ll ), /*  3 */
5089     const_float64( 0x3fa5555555555555ll ), /*  4 */
5090     const_float64( 0x3f81111111111111ll ), /*  5 */
5091     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5092     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5093     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5094     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5095     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5096     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5097     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5098     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5099     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5100     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5101 };
5102
5103 float32 float32_exp2(float32 a, float_status *status)
5104 {
5105     FloatParts64 xp, xnp, tp, rp;
5106     int i;
5107
5108     float32_unpack_canonical(&xp, a, status);
5109     if (unlikely(xp.cls != float_class_normal)) {
5110         switch (xp.cls) {
5111         case float_class_snan:
5112         case float_class_qnan:
5113             parts_return_nan(&xp, status);
5114             return float32_round_pack_canonical(&xp, status);
5115         case float_class_inf:
5116             return xp.sign ? float32_zero : a;
5117         case float_class_zero:
5118             return float32_one;
5119         default:
5120             break;
5121         }
5122         g_assert_not_reached();
5123     }
5124
5125     float_raise(float_flag_inexact, status);
5126
5127     float64_unpack_canonical(&tp, float64_ln2, status);
5128     xp = *parts_mul(&xp, &tp, status);
5129     xnp = xp;
5130
5131     float64_unpack_canonical(&rp, float64_one, status);
5132     for (i = 0 ; i < 15 ; i++) {
5133         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5134         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5135         xnp = *parts_mul(&xnp, &xp, status);
5136     }
5137
5138     return float32_round_pack_canonical(&rp, status);
5139 }
5140
5141 /*----------------------------------------------------------------------------
5142 | Rounds the extended double-precision floating-point value `a'
5143 | to the precision provided by floatx80_rounding_precision and returns the
5144 | result as an extended double-precision floating-point value.
5145 | The operation is performed according to the IEC/IEEE Standard for Binary
5146 | Floating-Point Arithmetic.
5147 *----------------------------------------------------------------------------*/
5148
5149 floatx80 floatx80_round(floatx80 a, float_status *status)
5150 {
5151     FloatParts128 p;
5152
5153     if (!floatx80_unpack_canonical(&p, a, status)) {
5154         return floatx80_default_nan(status);
5155     }
5156     return floatx80_round_pack_canonical(&p, status);
5157 }
5158
5159 static void __attribute__((constructor)) softfloat_init(void)
5160 {
5161     union_float64 ua, ub, uc, ur;
5162
5163     if (QEMU_NO_HARDFLOAT) {
5164         return;
5165     }
5166     /*
5167      * Test that the host's FMA is not obviously broken. For example,
5168      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5169      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5170      */
5171     ua.s = 0x0020000000000001ULL;
5172     ub.s = 0x3ca0000000000000ULL;
5173     uc.s = 0x0020000000000000ULL;
5174     ur.h = fma(ua.h, ub.h, uc.h);
5175     if (ur.s != 0x0020000000000001ULL) {
5176         force_soft_fma = true;
5177     }
5178 }