fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439     /*
 440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441      * operations.
 442      */
 443     minmax_isnumber = 8,
 444 };
 445
 446 /* Simple helpers for checking if, or what kind of, NaN we have */
 447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448 {
 449     return unlikely(c >= float_class_qnan);
 450 }
 451
 452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453 {
 454     return c == float_class_snan;
 455 }
 456
 457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458 {
 459     return c == float_class_qnan;
 460 }
 461
 462 /*
 463  * Structure holding all of the decomposed parts of a float.
 464  * The exponent is unbiased and the fraction is normalized.
 465  *
 466  * The fraction words are stored in big-endian word ordering,
 467  * so that truncation from a larger format to a smaller format
 468  * can be done simply by ignoring subsequent elements.
 469  */
 470
 471 typedef struct {
 472     FloatClass cls;
 473     bool sign;
 474     int32_t exp;
 475     union {
 476         /* Routines that know the structure may reference the singular name. */
 477         uint64_t frac;
 478         /*
 479          * Routines expanded with multiple structures reference "hi" and "lo"
 480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481          * both the same word and aliased here.
 482          */
 483         uint64_t frac_hi;
 484         uint64_t frac_lo;
 485     };
 486 } FloatParts64;
 487
 488 typedef struct {
 489     FloatClass cls;
 490     bool sign;
 491     int32_t exp;
 492     uint64_t frac_hi;
 493     uint64_t frac_lo;
 494 } FloatParts128;
 495
 496 typedef struct {
 497     FloatClass cls;
 498     bool sign;
 499     int32_t exp;
 500     uint64_t frac_hi;
 501     uint64_t frac_hm;  /* high-middle */
 502     uint64_t frac_lm;  /* low-middle */
 503     uint64_t frac_lo;
 504 } FloatParts256;
 505
 506 /* These apply to the most significant word of each FloatPartsN. */
 507 #define DECOMPOSED_BINARY_POINT    63
 508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510 /* Structure holding all of the relevant parameters for a format.
 511  *   exp_size: the size of the exponent field
 512  *   exp_bias: the offset applied to the exponent field
 513  *   exp_max: the maximum normalised exponent
 514  *   frac_size: the size of the fraction field
 515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516  * The following are computed based the size of fraction
 517  *   round_mask: bits below lsb which must be rounded
 518  * The following optional modifiers are available:
 519  *   arm_althp: handle ARM Alternative Half Precision
 520  */
 521 typedef struct {
 522     int exp_size;
 523     int exp_bias;
 524     int exp_re_bias;
 525     int exp_max;
 526     int frac_size;
 527     int frac_shift;
 528     bool arm_althp;
 529     uint64_t round_mask;
 530 } FloatFmt;
 531
 532 /* Expand fields based on the size of exponent and fraction */
 533 #define FLOAT_PARAMS_(E)                                \
 534     .exp_size       = E,                                \
 535     .exp_bias       = ((1 << E) - 1) >> 1,              \
 536     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
 537     .exp_max        = (1 << E) - 1
 538
 539 #define FLOAT_PARAMS(E, F)                              \
 540     FLOAT_PARAMS_(E),                                   \
 541     .frac_size      = F,                                \
 542     .frac_shift     = (-F - 1) & 63,                    \
 543     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 544
 545 static const FloatFmt float16_params = {
 546     FLOAT_PARAMS(5, 10)
 547 };
 548
 549 static const FloatFmt float16_params_ahp = {
 550     FLOAT_PARAMS(5, 10),
 551     .arm_althp = true
 552 };
 553
 554 static const FloatFmt bfloat16_params = {
 555     FLOAT_PARAMS(8, 7)
 556 };
 557
 558 static const FloatFmt float32_params = {
 559     FLOAT_PARAMS(8, 23)
 560 };
 561
 562 static const FloatFmt float64_params = {
 563     FLOAT_PARAMS(11, 52)
 564 };
 565
 566 static const FloatFmt float128_params = {
 567     FLOAT_PARAMS(15, 112)
 568 };
 569
 570 #define FLOATX80_PARAMS(R)              \
 571     FLOAT_PARAMS_(15),                  \
 572     .frac_size = R == 64 ? 63 : R,      \
 573     .frac_shift = 0,                    \
 574     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 575
 576 static const FloatFmt floatx80_params[3] = {
 577     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 578     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 579     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 580 };
 581
 582 /* Unpack a float to parts, but do not canonicalize.  */
 583 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 584 {
 585     const int f_size = fmt->frac_size;
 586     const int e_size = fmt->exp_size;
 587
 588     *r = (FloatParts64) {
 589         .cls = float_class_unclassified,
 590         .sign = extract64(raw, f_size + e_size, 1),
 591         .exp = extract64(raw, f_size, e_size),
 592         .frac = extract64(raw, 0, f_size)
 593     };
 594 }
 595
 596 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 597 {
 598     unpack_raw64(p, &float16_params, f);
 599 }
 600
 601 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 602 {
 603     unpack_raw64(p, &bfloat16_params, f);
 604 }
 605
 606 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 607 {
 608     unpack_raw64(p, &float32_params, f);
 609 }
 610
 611 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 612 {
 613     unpack_raw64(p, &float64_params, f);
 614 }
 615
 616 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 617 {
 618     *p = (FloatParts128) {
 619         .cls = float_class_unclassified,
 620         .sign = extract32(f.high, 15, 1),
 621         .exp = extract32(f.high, 0, 15),
 622         .frac_hi = f.low
 623     };
 624 }
 625
 626 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 627 {
 628     const int f_size = float128_params.frac_size - 64;
 629     const int e_size = float128_params.exp_size;
 630
 631     *p = (FloatParts128) {
 632         .cls = float_class_unclassified,
 633         .sign = extract64(f.high, f_size + e_size, 1),
 634         .exp = extract64(f.high, f_size, e_size),
 635         .frac_hi = extract64(f.high, 0, f_size),
 636         .frac_lo = f.low,
 637     };
 638 }
 639
 640 /* Pack a float from parts, but do not canonicalize.  */
 641 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 642 {
 643     const int f_size = fmt->frac_size;
 644     const int e_size = fmt->exp_size;
 645     uint64_t ret;
 646
 647     ret = (uint64_t)p->sign << (f_size + e_size);
 648     ret = deposit64(ret, f_size, e_size, p->exp);
 649     ret = deposit64(ret, 0, f_size, p->frac);
 650     return ret;
 651 }
 652
 653 static inline float16 float16_pack_raw(const FloatParts64 *p)
 654 {
 655     return make_float16(pack_raw64(p, &float16_params));
 656 }
 657
 658 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 659 {
 660     return pack_raw64(p, &bfloat16_params);
 661 }
 662
 663 static inline float32 float32_pack_raw(const FloatParts64 *p)
 664 {
 665     return make_float32(pack_raw64(p, &float32_params));
 666 }
 667
 668 static inline float64 float64_pack_raw(const FloatParts64 *p)
 669 {
 670     return make_float64(pack_raw64(p, &float64_params));
 671 }
 672
 673 static float128 float128_pack_raw(const FloatParts128 *p)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677     uint64_t hi;
 678
 679     hi = (uint64_t)p->sign << (f_size + e_size);
 680     hi = deposit64(hi, f_size, e_size, p->exp);
 681     hi = deposit64(hi, 0, f_size, p->frac_hi);
 682     return make_float128(hi, p->frac_lo);
 683 }
 684
 685 /*----------------------------------------------------------------------------
 686 | Functions and definitions to determine:  (1) whether tininess for underflow
 687 | is detected before or after rounding by default, (2) what (if anything)
 688 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 689 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 690 | are propagated from function inputs to output.  These details are target-
 691 | specific.
 692 *----------------------------------------------------------------------------*/
 693 #include "softfloat-specialize.c.inc"
 694
 695 #define PARTS_GENERIC_64_128(NAME, P) \
 696     _Generic((P), FloatParts64 *: parts64_##NAME, \
 697                   FloatParts128 *: parts128_##NAME)
 698
 699 #define PARTS_GENERIC_64_128_256(NAME, P) \
 700     _Generic((P), FloatParts64 *: parts64_##NAME, \
 701                   FloatParts128 *: parts128_##NAME, \
 702                   FloatParts256 *: parts256_##NAME)
 703
 704 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 705 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 706
 707 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 708 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 709
 710 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 711
 712 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 713                                       float_status *s);
 714 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 715                                         float_status *s);
 716
 717 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 718
 719 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 720                                              FloatParts64 *c, float_status *s,
 721                                              int ab_mask, int abc_mask);
 722 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 723                                                FloatParts128 *b,
 724                                                FloatParts128 *c,
 725                                                float_status *s,
 726                                                int ab_mask, int abc_mask);
 727
 728 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 729     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 730
 731 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 732                                  const FloatFmt *fmt);
 733 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 734                                   const FloatFmt *fmt);
 735
 736 #define parts_canonicalize(A, S, F) \
 737     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 738
 739 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 740                                    const FloatFmt *fmt);
 741 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 742                                     const FloatFmt *fmt);
 743
 744 #define parts_uncanon_normal(A, S, F) \
 745     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 746
 747 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 748                             const FloatFmt *fmt);
 749 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 750                              const FloatFmt *fmt);
 751
 752 #define parts_uncanon(A, S, F) \
 753     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 754
 755 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 756 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 757 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 758
 759 #define parts_add_normal(A, B) \
 760     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 761
 762 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 763 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 764 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 765
 766 #define parts_sub_normal(A, B) \
 767     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 768
 769 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 770                                     float_status *s, bool subtract);
 771 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 772                                       float_status *s, bool subtract);
 773
 774 #define parts_addsub(A, B, S, Z) \
 775     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 776
 777 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 778                                  float_status *s);
 779 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 780                                    float_status *s);
 781
 782 #define parts_mul(A, B, S) \
 783     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 784
 785 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 786                                     FloatParts64 *c, int flags,
 787                                     float_status *s);
 788 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 789                                       FloatParts128 *c, int flags,
 790                                       float_status *s);
 791
 792 #define parts_muladd(A, B, C, Z, S) \
 793     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 794
 795 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 796                                  float_status *s);
 797 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 798                                    float_status *s);
 799
 800 #define parts_div(A, B, S) \
 801     PARTS_GENERIC_64_128(div, A)(A, B, S)
 802
 803 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 804                                     uint64_t *mod_quot, float_status *s);
 805 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 806                                       uint64_t *mod_quot, float_status *s);
 807
 808 #define parts_modrem(A, B, Q, S) \
 809     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 810
 811 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 812 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 813
 814 #define parts_sqrt(A, S, F) \
 815     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 816
 817 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 818                                         int scale, int frac_size);
 819 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 820                                          int scale, int frac_size);
 821
 822 #define parts_round_to_int_normal(A, R, C, F) \
 823     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 824
 825 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 826                                  int scale, float_status *s,
 827                                  const FloatFmt *fmt);
 828 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 829                                   int scale, float_status *s,
 830                                   const FloatFmt *fmt);
 831
 832 #define parts_round_to_int(A, R, C, S, F) \
 833     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 834
 835 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 836                                      int scale, int64_t min, int64_t max,
 837                                      float_status *s);
 838 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 839                                      int scale, int64_t min, int64_t max,
 840                                      float_status *s);
 841
 842 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 843     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 844
 845 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 846                                       int scale, uint64_t max,
 847                                       float_status *s);
 848 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 849                                        int scale, uint64_t max,
 850                                        float_status *s);
 851
 852 #define parts_float_to_uint(P, R, Z, M, S) \
 853     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 854
 855 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 856                                   int scale, float_status *s);
 857 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 858                                    int scale, float_status *s);
 859
 860 #define parts_sint_to_float(P, I, Z, S) \
 861     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 862
 863 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 864                                   int scale, float_status *s);
 865 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 866                                    int scale, float_status *s);
 867
 868 #define parts_uint_to_float(P, I, Z, S) \
 869     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 870
 871 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 872                                     float_status *s, int flags);
 873 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 874                                       float_status *s, int flags);
 875
 876 #define parts_minmax(A, B, S, F) \
 877     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 878
 879 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
 880                                      float_status *s, bool q);
 881 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
 882                                       float_status *s, bool q);
 883
 884 #define parts_compare(A, B, S, Q) \
 885     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 886
 887 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 888 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 889
 890 #define parts_scalbn(A, N, S) \
 891     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 892
 893 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 894 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 895
 896 #define parts_log2(A, S, F) \
 897     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 898
 899 /*
 900  * Helper functions for softfloat-parts.c.inc, per-size operations.
 901  */
 902
 903 #define FRAC_GENERIC_64_128(NAME, P) \
 904     _Generic((P), FloatParts64 *: frac64_##NAME, \
 905                   FloatParts128 *: frac128_##NAME)
 906
 907 #define FRAC_GENERIC_64_128_256(NAME, P) \
 908     _Generic((P), FloatParts64 *: frac64_##NAME, \
 909                   FloatParts128 *: frac128_##NAME, \
 910                   FloatParts256 *: frac256_##NAME)
 911
 912 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 913 {
 914     return uadd64_overflow(a->frac, b->frac, &r->frac);
 915 }
 916
 917 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 918 {
 919     bool c = 0;
 920     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 921     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 922     return c;
 923 }
 924
 925 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 926 {
 927     bool c = 0;
 928     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 929     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 930     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 931     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 932     return c;
 933 }
 934
 935 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 936
 937 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 938 {
 939     return uadd64_overflow(a->frac, c, &r->frac);
 940 }
 941
 942 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 943 {
 944     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 945     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 946 }
 947
 948 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 949
 950 static void frac64_allones(FloatParts64 *a)
 951 {
 952     a->frac = -1;
 953 }
 954
 955 static void frac128_allones(FloatParts128 *a)
 956 {
 957     a->frac_hi = a->frac_lo = -1;
 958 }
 959
 960 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 961
 962 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 963 {
 964     return (a->frac == b->frac ? float_relation_equal
 965             : a->frac < b->frac ? float_relation_less
 966             : float_relation_greater);
 967 }
 968
 969 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 970 {
 971     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 972     if (ta == tb) {
 973         ta = a->frac_lo, tb = b->frac_lo;
 974         if (ta == tb) {
 975             return float_relation_equal;
 976         }
 977     }
 978     return ta < tb ? float_relation_less : float_relation_greater;
 979 }
 980
 981 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 982
 983 static void frac64_clear(FloatParts64 *a)
 984 {
 985     a->frac = 0;
 986 }
 987
 988 static void frac128_clear(FloatParts128 *a)
 989 {
 990     a->frac_hi = a->frac_lo = 0;
 991 }
 992
 993 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 994
 995 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 996 {
 997     uint64_t n1, n0, r, q;
 998     bool ret;
 999
1000     /*
1001      * We want a 2*N / N-bit division to produce exactly an N-bit
1002      * result, so that we do not lose any precision and so that we
1003      * do not have to renormalize afterward.  If A.frac < B.frac,
1004      * then division would produce an (N-1)-bit result; shift A left
1005      * by one to produce the an N-bit result, and return true to
1006      * decrement the exponent to match.
1007      *
1008      * The udiv_qrnnd algorithm that we're using requires normalization,
1009      * i.e. the msb of the denominator must be set, which is already true.
1010      */
1011     ret = a->frac < b->frac;
1012     if (ret) {
1013         n0 = a->frac;
1014         n1 = 0;
1015     } else {
1016         n0 = a->frac >> 1;
1017         n1 = a->frac << 63;
1018     }
1019     q = udiv_qrnnd(&r, n0, n1, b->frac);
1020
1021     /* Set lsb if there is a remainder, to set inexact. */
1022     a->frac = q | (r != 0);
1023
1024     return ret;
1025 }
1026
1027 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1028 {
1029     uint64_t q0, q1, a0, a1, b0, b1;
1030     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1031     bool ret = false;
1032
1033     a0 = a->frac_hi, a1 = a->frac_lo;
1034     b0 = b->frac_hi, b1 = b->frac_lo;
1035
1036     ret = lt128(a0, a1, b0, b1);
1037     if (!ret) {
1038         a1 = shr_double(a0, a1, 1);
1039         a0 = a0 >> 1;
1040     }
1041
1042     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1043     q0 = estimateDiv128To64(a0, a1, b0);
1044
1045     /*
1046      * Estimate is high because B1 was not included (unless B1 == 0).
1047      * Reduce quotient and increase remainder until remainder is non-negative.
1048      * This loop will execute 0 to 2 times.
1049      */
1050     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1051     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1052     while (r0 != 0) {
1053         q0--;
1054         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1055     }
1056
1057     /* Repeat using the remainder, producing a second word of quotient. */
1058     q1 = estimateDiv128To64(r1, r2, b0);
1059     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1060     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1061     while (r1 != 0) {
1062         q1--;
1063         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1064     }
1065
1066     /* Any remainder indicates inexact; set sticky bit. */
1067     q1 |= (r2 | r3) != 0;
1068
1069     a->frac_hi = q0;
1070     a->frac_lo = q1;
1071     return ret;
1072 }
1073
1074 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1075
1076 static bool frac64_eqz(FloatParts64 *a)
1077 {
1078     return a->frac == 0;
1079 }
1080
1081 static bool frac128_eqz(FloatParts128 *a)
1082 {
1083     return (a->frac_hi | a->frac_lo) == 0;
1084 }
1085
1086 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1087
1088 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1089 {
1090     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1091 }
1092
1093 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1094 {
1095     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1096                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1097 }
1098
1099 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1100
1101 static void frac64_neg(FloatParts64 *a)
1102 {
1103     a->frac = -a->frac;
1104 }
1105
1106 static void frac128_neg(FloatParts128 *a)
1107 {
1108     bool c = 0;
1109     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1110     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1111 }
1112
1113 static void frac256_neg(FloatParts256 *a)
1114 {
1115     bool c = 0;
1116     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1117     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1118     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1119     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1120 }
1121
1122 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1123
1124 static int frac64_normalize(FloatParts64 *a)
1125 {
1126     if (a->frac) {
1127         int shift = clz64(a->frac);
1128         a->frac <<= shift;
1129         return shift;
1130     }
1131     return 64;
1132 }
1133
1134 static int frac128_normalize(FloatParts128 *a)
1135 {
1136     if (a->frac_hi) {
1137         int shl = clz64(a->frac_hi);
1138         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1139         a->frac_lo <<= shl;
1140         return shl;
1141     } else if (a->frac_lo) {
1142         int shl = clz64(a->frac_lo);
1143         a->frac_hi = a->frac_lo << shl;
1144         a->frac_lo = 0;
1145         return shl + 64;
1146     }
1147     return 128;
1148 }
1149
1150 static int frac256_normalize(FloatParts256 *a)
1151 {
1152     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1153     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1154     int ret, shl;
1155
1156     if (likely(a0)) {
1157         shl = clz64(a0);
1158         if (shl == 0) {
1159             return 0;
1160         }
1161         ret = shl;
1162     } else {
1163         if (a1) {
1164             ret = 64;
1165             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1166         } else if (a2) {
1167             ret = 128;
1168             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1169         } else if (a3) {
1170             ret = 192;
1171             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1172         } else {
1173             ret = 256;
1174             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1175             goto done;
1176         }
1177         shl = clz64(a0);
1178         if (shl == 0) {
1179             goto done;
1180         }
1181         ret += shl;
1182     }
1183
1184     a0 = shl_double(a0, a1, shl);
1185     a1 = shl_double(a1, a2, shl);
1186     a2 = shl_double(a2, a3, shl);
1187     a3 <<= shl;
1188
1189  done:
1190     a->frac_hi = a0;
1191     a->frac_hm = a1;
1192     a->frac_lm = a2;
1193     a->frac_lo = a3;
1194     return ret;
1195 }
1196
1197 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1198
1199 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1200 {
1201     uint64_t a0, a1, b0, t0, t1, q, quot;
1202     int exp_diff = a->exp - b->exp;
1203     int shift;
1204
1205     a0 = a->frac;
1206     a1 = 0;
1207
1208     if (exp_diff < -1) {
1209         if (mod_quot) {
1210             *mod_quot = 0;
1211         }
1212         return;
1213     }
1214     if (exp_diff == -1) {
1215         a0 >>= 1;
1216         exp_diff = 0;
1217     }
1218
1219     b0 = b->frac;
1220     quot = q = b0 <= a0;
1221     if (q) {
1222         a0 -= b0;
1223     }
1224
1225     exp_diff -= 64;
1226     while (exp_diff > 0) {
1227         q = estimateDiv128To64(a0, a1, b0);
1228         q = q > 2 ? q - 2 : 0;
1229         mul64To128(b0, q, &t0, &t1);
1230         sub128(a0, a1, t0, t1, &a0, &a1);
1231         shortShift128Left(a0, a1, 62, &a0, &a1);
1232         exp_diff -= 62;
1233         quot = (quot << 62) + q;
1234     }
1235
1236     exp_diff += 64;
1237     if (exp_diff > 0) {
1238         q = estimateDiv128To64(a0, a1, b0);
1239         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1240         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1241         sub128(a0, a1, t0, t1, &a0, &a1);
1242         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1243         while (le128(t0, t1, a0, a1)) {
1244             ++q;
1245             sub128(a0, a1, t0, t1, &a0, &a1);
1246         }
1247         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1248     } else {
1249         t0 = b0;
1250         t1 = 0;
1251     }
1252
1253     if (mod_quot) {
1254         *mod_quot = quot;
1255     } else {
1256         sub128(t0, t1, a0, a1, &t0, &t1);
1257         if (lt128(t0, t1, a0, a1) ||
1258             (eq128(t0, t1, a0, a1) && (q & 1))) {
1259             a0 = t0;
1260             a1 = t1;
1261             a->sign = !a->sign;
1262         }
1263     }
1264
1265     if (likely(a0)) {
1266         shift = clz64(a0);
1267         shortShift128Left(a0, a1, shift, &a0, &a1);
1268     } else if (likely(a1)) {
1269         shift = clz64(a1);
1270         a0 = a1 << shift;
1271         a1 = 0;
1272         shift += 64;
1273     } else {
1274         a->cls = float_class_zero;
1275         return;
1276     }
1277
1278     a->exp = b->exp + exp_diff - shift;
1279     a->frac = a0 | (a1 != 0);
1280 }
1281
1282 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1283                            uint64_t *mod_quot)
1284 {
1285     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1286     int exp_diff = a->exp - b->exp;
1287     int shift;
1288
1289     a0 = a->frac_hi;
1290     a1 = a->frac_lo;
1291     a2 = 0;
1292
1293     if (exp_diff < -1) {
1294         if (mod_quot) {
1295             *mod_quot = 0;
1296         }
1297         return;
1298     }
1299     if (exp_diff == -1) {
1300         shift128Right(a0, a1, 1, &a0, &a1);
1301         exp_diff = 0;
1302     }
1303
1304     b0 = b->frac_hi;
1305     b1 = b->frac_lo;
1306
1307     quot = q = le128(b0, b1, a0, a1);
1308     if (q) {
1309         sub128(a0, a1, b0, b1, &a0, &a1);
1310     }
1311
1312     exp_diff -= 64;
1313     while (exp_diff > 0) {
1314         q = estimateDiv128To64(a0, a1, b0);
1315         q = q > 4 ? q - 4 : 0;
1316         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1317         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1318         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1319         exp_diff -= 61;
1320         quot = (quot << 61) + q;
1321     }
1322
1323     exp_diff += 64;
1324     if (exp_diff > 0) {
1325         q = estimateDiv128To64(a0, a1, b0);
1326         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1327         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1328         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1330         while (le192(t0, t1, t2, a0, a1, a2)) {
1331             ++q;
1332             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1333         }
1334         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1335     } else {
1336         t0 = b0;
1337         t1 = b1;
1338         t2 = 0;
1339     }
1340
1341     if (mod_quot) {
1342         *mod_quot = quot;
1343     } else {
1344         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1345         if (lt192(t0, t1, t2, a0, a1, a2) ||
1346             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1347             a0 = t0;
1348             a1 = t1;
1349             a2 = t2;
1350             a->sign = !a->sign;
1351         }
1352     }
1353
1354     if (likely(a0)) {
1355         shift = clz64(a0);
1356         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1357     } else if (likely(a1)) {
1358         shift = clz64(a1);
1359         shortShift128Left(a1, a2, shift, &a0, &a1);
1360         a2 = 0;
1361         shift += 64;
1362     } else if (likely(a2)) {
1363         shift = clz64(a2);
1364         a0 = a2 << shift;
1365         a1 = a2 = 0;
1366         shift += 128;
1367     } else {
1368         a->cls = float_class_zero;
1369         return;
1370     }
1371
1372     a->exp = b->exp + exp_diff - shift;
1373     a->frac_hi = a0;
1374     a->frac_lo = a1 | (a2 != 0);
1375 }
1376
1377 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1378
1379 static void frac64_shl(FloatParts64 *a, int c)
1380 {
1381     a->frac <<= c;
1382 }
1383
1384 static void frac128_shl(FloatParts128 *a, int c)
1385 {
1386     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1387
1388     if (c & 64) {
1389         a0 = a1, a1 = 0;
1390     }
1391
1392     c &= 63;
1393     if (c) {
1394         a0 = shl_double(a0, a1, c);
1395         a1 = a1 << c;
1396     }
1397
1398     a->frac_hi = a0;
1399     a->frac_lo = a1;
1400 }
1401
1402 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1403
1404 static void frac64_shr(FloatParts64 *a, int c)
1405 {
1406     a->frac >>= c;
1407 }
1408
1409 static void frac128_shr(FloatParts128 *a, int c)
1410 {
1411     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1412
1413     if (c & 64) {
1414         a1 = a0, a0 = 0;
1415     }
1416
1417     c &= 63;
1418     if (c) {
1419         a1 = shr_double(a0, a1, c);
1420         a0 = a0 >> c;
1421     }
1422
1423     a->frac_hi = a0;
1424     a->frac_lo = a1;
1425 }
1426
1427 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1428
1429 static void frac64_shrjam(FloatParts64 *a, int c)
1430 {
1431     uint64_t a0 = a->frac;
1432
1433     if (likely(c != 0)) {
1434         if (likely(c < 64)) {
1435             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1436         } else {
1437             a0 = a0 != 0;
1438         }
1439         a->frac = a0;
1440     }
1441 }
1442
1443 static void frac128_shrjam(FloatParts128 *a, int c)
1444 {
1445     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446     uint64_t sticky = 0;
1447
1448     if (unlikely(c == 0)) {
1449         return;
1450     } else if (likely(c < 64)) {
1451         /* nothing */
1452     } else if (likely(c < 128)) {
1453         sticky = a1;
1454         a1 = a0;
1455         a0 = 0;
1456         c &= 63;
1457         if (c == 0) {
1458             goto done;
1459         }
1460     } else {
1461         sticky = a0 | a1;
1462         a0 = a1 = 0;
1463         goto done;
1464     }
1465
1466     sticky |= shr_double(a1, 0, c);
1467     a1 = shr_double(a0, a1, c);
1468     a0 = a0 >> c;
1469
1470  done:
1471     a->frac_lo = a1 | (sticky != 0);
1472     a->frac_hi = a0;
1473 }
1474
1475 static void frac256_shrjam(FloatParts256 *a, int c)
1476 {
1477     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1478     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1479     uint64_t sticky = 0;
1480
1481     if (unlikely(c == 0)) {
1482         return;
1483     } else if (likely(c < 64)) {
1484         /* nothing */
1485     } else if (likely(c < 256)) {
1486         if (unlikely(c & 128)) {
1487             sticky |= a2 | a3;
1488             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1489         }
1490         if (unlikely(c & 64)) {
1491             sticky |= a3;
1492             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1493         }
1494         c &= 63;
1495         if (c == 0) {
1496             goto done;
1497         }
1498     } else {
1499         sticky = a0 | a1 | a2 | a3;
1500         a0 = a1 = a2 = a3 = 0;
1501         goto done;
1502     }
1503
1504     sticky |= shr_double(a3, 0, c);
1505     a3 = shr_double(a2, a3, c);
1506     a2 = shr_double(a1, a2, c);
1507     a1 = shr_double(a0, a1, c);
1508     a0 = a0 >> c;
1509
1510  done:
1511     a->frac_lo = a3 | (sticky != 0);
1512     a->frac_lm = a2;
1513     a->frac_hm = a1;
1514     a->frac_hi = a0;
1515 }
1516
1517 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1518
1519 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1520 {
1521     return usub64_overflow(a->frac, b->frac, &r->frac);
1522 }
1523
1524 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1525 {
1526     bool c = 0;
1527     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1528     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1529     return c;
1530 }
1531
1532 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1533 {
1534     bool c = 0;
1535     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1536     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1537     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1538     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1539     return c;
1540 }
1541
1542 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1543
1544 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1545 {
1546     r->frac = a->frac_hi | (a->frac_lo != 0);
1547 }
1548
1549 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1550 {
1551     r->frac_hi = a->frac_hi;
1552     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1553 }
1554
1555 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1556
1557 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1558 {
1559     r->frac_hi = a->frac;
1560     r->frac_lo = 0;
1561 }
1562
1563 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1564 {
1565     r->frac_hi = a->frac_hi;
1566     r->frac_hm = a->frac_lo;
1567     r->frac_lm = 0;
1568     r->frac_lo = 0;
1569 }
1570
1571 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1572
1573 /*
1574  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1575  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1576  * and thus MIT licenced.
1577  */
1578 static const uint16_t rsqrt_tab[128] = {
1579     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1580     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1581     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1582     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1583     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1584     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1585     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1586     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1587     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1588     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1589     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1590     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1591     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1592     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1593     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1594     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1595 };
1596
1597 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1598 #define FloatPartsN    glue(FloatParts,N)
1599 #define FloatPartsW    glue(FloatParts,W)
1600
1601 #define N 64
1602 #define W 128
1603
1604 #include "softfloat-parts-addsub.c.inc"
1605 #include "softfloat-parts.c.inc"
1606
1607 #undef  N
1608 #undef  W
1609 #define N 128
1610 #define W 256
1611
1612 #include "softfloat-parts-addsub.c.inc"
1613 #include "softfloat-parts.c.inc"
1614
1615 #undef  N
1616 #undef  W
1617 #define N            256
1618
1619 #include "softfloat-parts-addsub.c.inc"
1620
1621 #undef  N
1622 #undef  W
1623 #undef  partsN
1624 #undef  FloatPartsN
1625 #undef  FloatPartsW
1626
1627 /*
1628  * Pack/unpack routines with a specific FloatFmt.
1629  */
1630
1631 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1632                                       float_status *s, const FloatFmt *params)
1633 {
1634     float16_unpack_raw(p, f);
1635     parts_canonicalize(p, s, params);
1636 }
1637
1638 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1639                                      float_status *s)
1640 {
1641     float16a_unpack_canonical(p, f, s, &float16_params);
1642 }
1643
1644 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1645                                       float_status *s)
1646 {
1647     bfloat16_unpack_raw(p, f);
1648     parts_canonicalize(p, s, &bfloat16_params);
1649 }
1650
1651 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1652                                              float_status *s,
1653                                              const FloatFmt *params)
1654 {
1655     parts_uncanon(p, s, params);
1656     return float16_pack_raw(p);
1657 }
1658
1659 static float16 float16_round_pack_canonical(FloatParts64 *p,
1660                                             float_status *s)
1661 {
1662     return float16a_round_pack_canonical(p, s, &float16_params);
1663 }
1664
1665 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1666                                               float_status *s)
1667 {
1668     parts_uncanon(p, s, &bfloat16_params);
1669     return bfloat16_pack_raw(p);
1670 }
1671
1672 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1673                                      float_status *s)
1674 {
1675     float32_unpack_raw(p, f);
1676     parts_canonicalize(p, s, &float32_params);
1677 }
1678
1679 static float32 float32_round_pack_canonical(FloatParts64 *p,
1680                                             float_status *s)
1681 {
1682     parts_uncanon(p, s, &float32_params);
1683     return float32_pack_raw(p);
1684 }
1685
1686 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1687                                      float_status *s)
1688 {
1689     float64_unpack_raw(p, f);
1690     parts_canonicalize(p, s, &float64_params);
1691 }
1692
1693 static float64 float64_round_pack_canonical(FloatParts64 *p,
1694                                             float_status *s)
1695 {
1696     parts_uncanon(p, s, &float64_params);
1697     return float64_pack_raw(p);
1698 }
1699
1700 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1701                                                float_status *s)
1702 {
1703     parts_uncanon(p, s, &float32_params);
1704
1705     /*
1706      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1707      * We need to adjust the fraction higher so that the least N bits are
1708      * zero, and the fraction is adjacent to the float64 implicit bit.
1709      */
1710     switch (p->cls) {
1711     case float_class_normal:
1712         if (unlikely(p->exp == 0)) {
1713             /*
1714              * The result is denormal for float32, but can be represented
1715              * in normalized form for float64.  Adjust, per canonicalize.
1716              */
1717             int shift = frac_normalize(p);
1718             p->exp = (float32_params.frac_shift -
1719                       float32_params.exp_bias - shift + 1 +
1720                       float64_params.exp_bias);
1721             frac_shr(p, float64_params.frac_shift);
1722         } else {
1723             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1724             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1725         }
1726         break;
1727     case float_class_snan:
1728     case float_class_qnan:
1729         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1730         p->exp = float64_params.exp_max;
1731         break;
1732     case float_class_inf:
1733         p->exp = float64_params.exp_max;
1734         break;
1735     case float_class_zero:
1736         break;
1737     default:
1738         g_assert_not_reached();
1739     }
1740
1741     return float64_pack_raw(p);
1742 }
1743
1744 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1745                                       float_status *s)
1746 {
1747     float128_unpack_raw(p, f);
1748     parts_canonicalize(p, s, &float128_params);
1749 }
1750
1751 static float128 float128_round_pack_canonical(FloatParts128 *p,
1752                                               float_status *s)
1753 {
1754     parts_uncanon(p, s, &float128_params);
1755     return float128_pack_raw(p);
1756 }
1757
1758 /* Returns false if the encoding is invalid. */
1759 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1760                                       float_status *s)
1761 {
1762     /* Ensure rounding precision is set before beginning. */
1763     switch (s->floatx80_rounding_precision) {
1764     case floatx80_precision_x:
1765     case floatx80_precision_d:
1766     case floatx80_precision_s:
1767         break;
1768     default:
1769         g_assert_not_reached();
1770     }
1771
1772     if (unlikely(floatx80_invalid_encoding(f))) {
1773         float_raise(float_flag_invalid, s);
1774         return false;
1775     }
1776
1777     floatx80_unpack_raw(p, f);
1778
1779     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1780         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1781     } else {
1782         /* The explicit integer bit is ignored, after invalid checks. */
1783         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1784         p->cls = (p->frac_hi == 0 ? float_class_inf
1785                   : parts_is_snan_frac(p->frac_hi, s)
1786                   ? float_class_snan : float_class_qnan);
1787     }
1788     return true;
1789 }
1790
1791 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1792                                               float_status *s)
1793 {
1794     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1795     uint64_t frac;
1796     int exp;
1797
1798     switch (p->cls) {
1799     case float_class_normal:
1800         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1801             parts_uncanon_normal(p, s, fmt);
1802             frac = p->frac_hi;
1803             exp = p->exp;
1804         } else {
1805             FloatParts64 p64;
1806
1807             p64.sign = p->sign;
1808             p64.exp = p->exp;
1809             frac_truncjam(&p64, p);
1810             parts_uncanon_normal(&p64, s, fmt);
1811             frac = p64.frac;
1812             exp = p64.exp;
1813         }
1814         if (exp != fmt->exp_max) {
1815             break;
1816         }
1817         /* rounded to inf -- fall through to set frac correctly */
1818
1819     case float_class_inf:
1820         /* x86 and m68k differ in the setting of the integer bit. */
1821         frac = floatx80_infinity_low;
1822         exp = fmt->exp_max;
1823         break;
1824
1825     case float_class_zero:
1826         frac = 0;
1827         exp = 0;
1828         break;
1829
1830     case float_class_snan:
1831     case float_class_qnan:
1832         /* NaNs have the integer bit set. */
1833         frac = p->frac_hi | (1ull << 63);
1834         exp = fmt->exp_max;
1835         break;
1836
1837     default:
1838         g_assert_not_reached();
1839     }
1840
1841     return packFloatx80(p->sign, exp, frac);
1842 }
1843
1844 /*
1845  * Addition and subtraction
1846  */
1847
1848 static float16 QEMU_FLATTEN
1849 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1850 {
1851     FloatParts64 pa, pb, *pr;
1852
1853     float16_unpack_canonical(&pa, a, status);
1854     float16_unpack_canonical(&pb, b, status);
1855     pr = parts_addsub(&pa, &pb, status, subtract);
1856
1857     return float16_round_pack_canonical(pr, status);
1858 }
1859
1860 float16 float16_add(float16 a, float16 b, float_status *status)
1861 {
1862     return float16_addsub(a, b, status, false);
1863 }
1864
1865 float16 float16_sub(float16 a, float16 b, float_status *status)
1866 {
1867     return float16_addsub(a, b, status, true);
1868 }
1869
1870 static float32 QEMU_SOFTFLOAT_ATTR
1871 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1872 {
1873     FloatParts64 pa, pb, *pr;
1874
1875     float32_unpack_canonical(&pa, a, status);
1876     float32_unpack_canonical(&pb, b, status);
1877     pr = parts_addsub(&pa, &pb, status, subtract);
1878
1879     return float32_round_pack_canonical(pr, status);
1880 }
1881
1882 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1883 {
1884     return soft_f32_addsub(a, b, status, false);
1885 }
1886
1887 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1888 {
1889     return soft_f32_addsub(a, b, status, true);
1890 }
1891
1892 static float64 QEMU_SOFTFLOAT_ATTR
1893 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1894 {
1895     FloatParts64 pa, pb, *pr;
1896
1897     float64_unpack_canonical(&pa, a, status);
1898     float64_unpack_canonical(&pb, b, status);
1899     pr = parts_addsub(&pa, &pb, status, subtract);
1900
1901     return float64_round_pack_canonical(pr, status);
1902 }
1903
1904 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1905 {
1906     return soft_f64_addsub(a, b, status, false);
1907 }
1908
1909 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1910 {
1911     return soft_f64_addsub(a, b, status, true);
1912 }
1913
1914 static float hard_f32_add(float a, float b)
1915 {
1916     return a + b;
1917 }
1918
1919 static float hard_f32_sub(float a, float b)
1920 {
1921     return a - b;
1922 }
1923
1924 static double hard_f64_add(double a, double b)
1925 {
1926     return a + b;
1927 }
1928
1929 static double hard_f64_sub(double a, double b)
1930 {
1931     return a - b;
1932 }
1933
1934 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1935 {
1936     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1937         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1938     }
1939     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1940 }
1941
1942 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1943 {
1944     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1945         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1946     } else {
1947         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1948     }
1949 }
1950
1951 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1952                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1953 {
1954     return float32_gen2(a, b, s, hard, soft,
1955                         f32_is_zon2, f32_addsubmul_post);
1956 }
1957
1958 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1959                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1960 {
1961     return float64_gen2(a, b, s, hard, soft,
1962                         f64_is_zon2, f64_addsubmul_post);
1963 }
1964
1965 float32 QEMU_FLATTEN
1966 float32_add(float32 a, float32 b, float_status *s)
1967 {
1968     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1969 }
1970
1971 float32 QEMU_FLATTEN
1972 float32_sub(float32 a, float32 b, float_status *s)
1973 {
1974     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1975 }
1976
1977 float64 QEMU_FLATTEN
1978 float64_add(float64 a, float64 b, float_status *s)
1979 {
1980     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1981 }
1982
1983 float64 QEMU_FLATTEN
1984 float64_sub(float64 a, float64 b, float_status *s)
1985 {
1986     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1987 }
1988
1989 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1990                                  bool subtract)
1991 {
1992     FloatParts64 pa, pb, *pr;
1993
1994     float64_unpack_canonical(&pa, a, status);
1995     float64_unpack_canonical(&pb, b, status);
1996     pr = parts_addsub(&pa, &pb, status, subtract);
1997
1998     return float64r32_round_pack_canonical(pr, status);
1999 }
2000
2001 float64 float64r32_add(float64 a, float64 b, float_status *status)
2002 {
2003     return float64r32_addsub(a, b, status, false);
2004 }
2005
2006 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2007 {
2008     return float64r32_addsub(a, b, status, true);
2009 }
2010
2011 static bfloat16 QEMU_FLATTEN
2012 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2013 {
2014     FloatParts64 pa, pb, *pr;
2015
2016     bfloat16_unpack_canonical(&pa, a, status);
2017     bfloat16_unpack_canonical(&pb, b, status);
2018     pr = parts_addsub(&pa, &pb, status, subtract);
2019
2020     return bfloat16_round_pack_canonical(pr, status);
2021 }
2022
2023 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2024 {
2025     return bfloat16_addsub(a, b, status, false);
2026 }
2027
2028 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2029 {
2030     return bfloat16_addsub(a, b, status, true);
2031 }
2032
2033 static float128 QEMU_FLATTEN
2034 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2035 {
2036     FloatParts128 pa, pb, *pr;
2037
2038     float128_unpack_canonical(&pa, a, status);
2039     float128_unpack_canonical(&pb, b, status);
2040     pr = parts_addsub(&pa, &pb, status, subtract);
2041
2042     return float128_round_pack_canonical(pr, status);
2043 }
2044
2045 float128 float128_add(float128 a, float128 b, float_status *status)
2046 {
2047     return float128_addsub(a, b, status, false);
2048 }
2049
2050 float128 float128_sub(float128 a, float128 b, float_status *status)
2051 {
2052     return float128_addsub(a, b, status, true);
2053 }
2054
2055 static floatx80 QEMU_FLATTEN
2056 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2057 {
2058     FloatParts128 pa, pb, *pr;
2059
2060     if (!floatx80_unpack_canonical(&pa, a, status) ||
2061         !floatx80_unpack_canonical(&pb, b, status)) {
2062         return floatx80_default_nan(status);
2063     }
2064
2065     pr = parts_addsub(&pa, &pb, status, subtract);
2066     return floatx80_round_pack_canonical(pr, status);
2067 }
2068
2069 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2070 {
2071     return floatx80_addsub(a, b, status, false);
2072 }
2073
2074 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2075 {
2076     return floatx80_addsub(a, b, status, true);
2077 }
2078
2079 /*
2080  * Multiplication
2081  */
2082
2083 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2084 {
2085     FloatParts64 pa, pb, *pr;
2086
2087     float16_unpack_canonical(&pa, a, status);
2088     float16_unpack_canonical(&pb, b, status);
2089     pr = parts_mul(&pa, &pb, status);
2090
2091     return float16_round_pack_canonical(pr, status);
2092 }
2093
2094 static float32 QEMU_SOFTFLOAT_ATTR
2095 soft_f32_mul(float32 a, float32 b, float_status *status)
2096 {
2097     FloatParts64 pa, pb, *pr;
2098
2099     float32_unpack_canonical(&pa, a, status);
2100     float32_unpack_canonical(&pb, b, status);
2101     pr = parts_mul(&pa, &pb, status);
2102
2103     return float32_round_pack_canonical(pr, status);
2104 }
2105
2106 static float64 QEMU_SOFTFLOAT_ATTR
2107 soft_f64_mul(float64 a, float64 b, float_status *status)
2108 {
2109     FloatParts64 pa, pb, *pr;
2110
2111     float64_unpack_canonical(&pa, a, status);
2112     float64_unpack_canonical(&pb, b, status);
2113     pr = parts_mul(&pa, &pb, status);
2114
2115     return float64_round_pack_canonical(pr, status);
2116 }
2117
2118 static float hard_f32_mul(float a, float b)
2119 {
2120     return a * b;
2121 }
2122
2123 static double hard_f64_mul(double a, double b)
2124 {
2125     return a * b;
2126 }
2127
2128 float32 QEMU_FLATTEN
2129 float32_mul(float32 a, float32 b, float_status *s)
2130 {
2131     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2132                         f32_is_zon2, f32_addsubmul_post);
2133 }
2134
2135 float64 QEMU_FLATTEN
2136 float64_mul(float64 a, float64 b, float_status *s)
2137 {
2138     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2139                         f64_is_zon2, f64_addsubmul_post);
2140 }
2141
2142 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2143 {
2144     FloatParts64 pa, pb, *pr;
2145
2146     float64_unpack_canonical(&pa, a, status);
2147     float64_unpack_canonical(&pb, b, status);
2148     pr = parts_mul(&pa, &pb, status);
2149
2150     return float64r32_round_pack_canonical(pr, status);
2151 }
2152
2153 bfloat16 QEMU_FLATTEN
2154 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2155 {
2156     FloatParts64 pa, pb, *pr;
2157
2158     bfloat16_unpack_canonical(&pa, a, status);
2159     bfloat16_unpack_canonical(&pb, b, status);
2160     pr = parts_mul(&pa, &pb, status);
2161
2162     return bfloat16_round_pack_canonical(pr, status);
2163 }
2164
2165 float128 QEMU_FLATTEN
2166 float128_mul(float128 a, float128 b, float_status *status)
2167 {
2168     FloatParts128 pa, pb, *pr;
2169
2170     float128_unpack_canonical(&pa, a, status);
2171     float128_unpack_canonical(&pb, b, status);
2172     pr = parts_mul(&pa, &pb, status);
2173
2174     return float128_round_pack_canonical(pr, status);
2175 }
2176
2177 floatx80 QEMU_FLATTEN
2178 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2179 {
2180     FloatParts128 pa, pb, *pr;
2181
2182     if (!floatx80_unpack_canonical(&pa, a, status) ||
2183         !floatx80_unpack_canonical(&pb, b, status)) {
2184         return floatx80_default_nan(status);
2185     }
2186
2187     pr = parts_mul(&pa, &pb, status);
2188     return floatx80_round_pack_canonical(pr, status);
2189 }
2190
2191 /*
2192  * Fused multiply-add
2193  */
2194
2195 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2196                                     int flags, float_status *status)
2197 {
2198     FloatParts64 pa, pb, pc, *pr;
2199
2200     float16_unpack_canonical(&pa, a, status);
2201     float16_unpack_canonical(&pb, b, status);
2202     float16_unpack_canonical(&pc, c, status);
2203     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2204
2205     return float16_round_pack_canonical(pr, status);
2206 }
2207
2208 static float32 QEMU_SOFTFLOAT_ATTR
2209 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2210                 float_status *status)
2211 {
2212     FloatParts64 pa, pb, pc, *pr;
2213
2214     float32_unpack_canonical(&pa, a, status);
2215     float32_unpack_canonical(&pb, b, status);
2216     float32_unpack_canonical(&pc, c, status);
2217     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2218
2219     return float32_round_pack_canonical(pr, status);
2220 }
2221
2222 static float64 QEMU_SOFTFLOAT_ATTR
2223 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2224                 float_status *status)
2225 {
2226     FloatParts64 pa, pb, pc, *pr;
2227
2228     float64_unpack_canonical(&pa, a, status);
2229     float64_unpack_canonical(&pb, b, status);
2230     float64_unpack_canonical(&pc, c, status);
2231     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2232
2233     return float64_round_pack_canonical(pr, status);
2234 }
2235
2236 static bool force_soft_fma;
2237
2238 float32 QEMU_FLATTEN
2239 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2240 {
2241     union_float32 ua, ub, uc, ur;
2242
2243     ua.s = xa;
2244     ub.s = xb;
2245     uc.s = xc;
2246
2247     if (unlikely(!can_use_fpu(s))) {
2248         goto soft;
2249     }
2250     if (unlikely(flags & float_muladd_halve_result)) {
2251         goto soft;
2252     }
2253
2254     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2255     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2256         goto soft;
2257     }
2258
2259     if (unlikely(force_soft_fma)) {
2260         goto soft;
2261     }
2262
2263     /*
2264      * When (a || b) == 0, there's no need to check for under/over flow,
2265      * since we know the addend is (normal || 0) and the product is 0.
2266      */
2267     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2268         union_float32 up;
2269         bool prod_sign;
2270
2271         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2272         prod_sign ^= !!(flags & float_muladd_negate_product);
2273         up.s = float32_set_sign(float32_zero, prod_sign);
2274
2275         if (flags & float_muladd_negate_c) {
2276             uc.h = -uc.h;
2277         }
2278         ur.h = up.h + uc.h;
2279     } else {
2280         union_float32 ua_orig = ua;
2281         union_float32 uc_orig = uc;
2282
2283         if (flags & float_muladd_negate_product) {
2284             ua.h = -ua.h;
2285         }
2286         if (flags & float_muladd_negate_c) {
2287             uc.h = -uc.h;
2288         }
2289
2290         ur.h = fmaf(ua.h, ub.h, uc.h);
2291
2292         if (unlikely(f32_is_inf(ur))) {
2293             float_raise(float_flag_overflow, s);
2294         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2295             ua = ua_orig;
2296             uc = uc_orig;
2297             goto soft;
2298         }
2299     }
2300     if (flags & float_muladd_negate_result) {
2301         return float32_chs(ur.s);
2302     }
2303     return ur.s;
2304
2305  soft:
2306     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2307 }
2308
2309 float64 QEMU_FLATTEN
2310 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2311 {
2312     union_float64 ua, ub, uc, ur;
2313
2314     ua.s = xa;
2315     ub.s = xb;
2316     uc.s = xc;
2317
2318     if (unlikely(!can_use_fpu(s))) {
2319         goto soft;
2320     }
2321     if (unlikely(flags & float_muladd_halve_result)) {
2322         goto soft;
2323     }
2324
2325     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2326     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2327         goto soft;
2328     }
2329
2330     if (unlikely(force_soft_fma)) {
2331         goto soft;
2332     }
2333
2334     /*
2335      * When (a || b) == 0, there's no need to check for under/over flow,
2336      * since we know the addend is (normal || 0) and the product is 0.
2337      */
2338     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2339         union_float64 up;
2340         bool prod_sign;
2341
2342         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2343         prod_sign ^= !!(flags & float_muladd_negate_product);
2344         up.s = float64_set_sign(float64_zero, prod_sign);
2345
2346         if (flags & float_muladd_negate_c) {
2347             uc.h = -uc.h;
2348         }
2349         ur.h = up.h + uc.h;
2350     } else {
2351         union_float64 ua_orig = ua;
2352         union_float64 uc_orig = uc;
2353
2354         if (flags & float_muladd_negate_product) {
2355             ua.h = -ua.h;
2356         }
2357         if (flags & float_muladd_negate_c) {
2358             uc.h = -uc.h;
2359         }
2360
2361         ur.h = fma(ua.h, ub.h, uc.h);
2362
2363         if (unlikely(f64_is_inf(ur))) {
2364             float_raise(float_flag_overflow, s);
2365         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2366             ua = ua_orig;
2367             uc = uc_orig;
2368             goto soft;
2369         }
2370     }
2371     if (flags & float_muladd_negate_result) {
2372         return float64_chs(ur.s);
2373     }
2374     return ur.s;
2375
2376  soft:
2377     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2378 }
2379
2380 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2381                           int flags, float_status *status)
2382 {
2383     FloatParts64 pa, pb, pc, *pr;
2384
2385     float64_unpack_canonical(&pa, a, status);
2386     float64_unpack_canonical(&pb, b, status);
2387     float64_unpack_canonical(&pc, c, status);
2388     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2389
2390     return float64r32_round_pack_canonical(pr, status);
2391 }
2392
2393 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2394                                       int flags, float_status *status)
2395 {
2396     FloatParts64 pa, pb, pc, *pr;
2397
2398     bfloat16_unpack_canonical(&pa, a, status);
2399     bfloat16_unpack_canonical(&pb, b, status);
2400     bfloat16_unpack_canonical(&pc, c, status);
2401     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2402
2403     return bfloat16_round_pack_canonical(pr, status);
2404 }
2405
2406 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2407                                       int flags, float_status *status)
2408 {
2409     FloatParts128 pa, pb, pc, *pr;
2410
2411     float128_unpack_canonical(&pa, a, status);
2412     float128_unpack_canonical(&pb, b, status);
2413     float128_unpack_canonical(&pc, c, status);
2414     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2415
2416     return float128_round_pack_canonical(pr, status);
2417 }
2418
2419 /*
2420  * Division
2421  */
2422
2423 float16 float16_div(float16 a, float16 b, float_status *status)
2424 {
2425     FloatParts64 pa, pb, *pr;
2426
2427     float16_unpack_canonical(&pa, a, status);
2428     float16_unpack_canonical(&pb, b, status);
2429     pr = parts_div(&pa, &pb, status);
2430
2431     return float16_round_pack_canonical(pr, status);
2432 }
2433
2434 static float32 QEMU_SOFTFLOAT_ATTR
2435 soft_f32_div(float32 a, float32 b, float_status *status)
2436 {
2437     FloatParts64 pa, pb, *pr;
2438
2439     float32_unpack_canonical(&pa, a, status);
2440     float32_unpack_canonical(&pb, b, status);
2441     pr = parts_div(&pa, &pb, status);
2442
2443     return float32_round_pack_canonical(pr, status);
2444 }
2445
2446 static float64 QEMU_SOFTFLOAT_ATTR
2447 soft_f64_div(float64 a, float64 b, float_status *status)
2448 {
2449     FloatParts64 pa, pb, *pr;
2450
2451     float64_unpack_canonical(&pa, a, status);
2452     float64_unpack_canonical(&pb, b, status);
2453     pr = parts_div(&pa, &pb, status);
2454
2455     return float64_round_pack_canonical(pr, status);
2456 }
2457
2458 static float hard_f32_div(float a, float b)
2459 {
2460     return a / b;
2461 }
2462
2463 static double hard_f64_div(double a, double b)
2464 {
2465     return a / b;
2466 }
2467
2468 static bool f32_div_pre(union_float32 a, union_float32 b)
2469 {
2470     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2471         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2472                fpclassify(b.h) == FP_NORMAL;
2473     }
2474     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2475 }
2476
2477 static bool f64_div_pre(union_float64 a, union_float64 b)
2478 {
2479     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2480         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2481                fpclassify(b.h) == FP_NORMAL;
2482     }
2483     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2484 }
2485
2486 static bool f32_div_post(union_float32 a, union_float32 b)
2487 {
2488     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2489         return fpclassify(a.h) != FP_ZERO;
2490     }
2491     return !float32_is_zero(a.s);
2492 }
2493
2494 static bool f64_div_post(union_float64 a, union_float64 b)
2495 {
2496     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2497         return fpclassify(a.h) != FP_ZERO;
2498     }
2499     return !float64_is_zero(a.s);
2500 }
2501
2502 float32 QEMU_FLATTEN
2503 float32_div(float32 a, float32 b, float_status *s)
2504 {
2505     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2506                         f32_div_pre, f32_div_post);
2507 }
2508
2509 float64 QEMU_FLATTEN
2510 float64_div(float64 a, float64 b, float_status *s)
2511 {
2512     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2513                         f64_div_pre, f64_div_post);
2514 }
2515
2516 float64 float64r32_div(float64 a, float64 b, float_status *status)
2517 {
2518     FloatParts64 pa, pb, *pr;
2519
2520     float64_unpack_canonical(&pa, a, status);
2521     float64_unpack_canonical(&pb, b, status);
2522     pr = parts_div(&pa, &pb, status);
2523
2524     return float64r32_round_pack_canonical(pr, status);
2525 }
2526
2527 bfloat16 QEMU_FLATTEN
2528 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2529 {
2530     FloatParts64 pa, pb, *pr;
2531
2532     bfloat16_unpack_canonical(&pa, a, status);
2533     bfloat16_unpack_canonical(&pb, b, status);
2534     pr = parts_div(&pa, &pb, status);
2535
2536     return bfloat16_round_pack_canonical(pr, status);
2537 }
2538
2539 float128 QEMU_FLATTEN
2540 float128_div(float128 a, float128 b, float_status *status)
2541 {
2542     FloatParts128 pa, pb, *pr;
2543
2544     float128_unpack_canonical(&pa, a, status);
2545     float128_unpack_canonical(&pb, b, status);
2546     pr = parts_div(&pa, &pb, status);
2547
2548     return float128_round_pack_canonical(pr, status);
2549 }
2550
2551 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2552 {
2553     FloatParts128 pa, pb, *pr;
2554
2555     if (!floatx80_unpack_canonical(&pa, a, status) ||
2556         !floatx80_unpack_canonical(&pb, b, status)) {
2557         return floatx80_default_nan(status);
2558     }
2559
2560     pr = parts_div(&pa, &pb, status);
2561     return floatx80_round_pack_canonical(pr, status);
2562 }
2563
2564 /*
2565  * Remainder
2566  */
2567
2568 float32 float32_rem(float32 a, float32 b, float_status *status)
2569 {
2570     FloatParts64 pa, pb, *pr;
2571
2572     float32_unpack_canonical(&pa, a, status);
2573     float32_unpack_canonical(&pb, b, status);
2574     pr = parts_modrem(&pa, &pb, NULL, status);
2575
2576     return float32_round_pack_canonical(pr, status);
2577 }
2578
2579 float64 float64_rem(float64 a, float64 b, float_status *status)
2580 {
2581     FloatParts64 pa, pb, *pr;
2582
2583     float64_unpack_canonical(&pa, a, status);
2584     float64_unpack_canonical(&pb, b, status);
2585     pr = parts_modrem(&pa, &pb, NULL, status);
2586
2587     return float64_round_pack_canonical(pr, status);
2588 }
2589
2590 float128 float128_rem(float128 a, float128 b, float_status *status)
2591 {
2592     FloatParts128 pa, pb, *pr;
2593
2594     float128_unpack_canonical(&pa, a, status);
2595     float128_unpack_canonical(&pb, b, status);
2596     pr = parts_modrem(&pa, &pb, NULL, status);
2597
2598     return float128_round_pack_canonical(pr, status);
2599 }
2600
2601 /*
2602  * Returns the remainder of the extended double-precision floating-point value
2603  * `a' with respect to the corresponding value `b'.
2604  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2605  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2606  * the remainder based on truncating the quotient toward zero instead and
2607  * *quotient is set to the low 64 bits of the absolute value of the integer
2608  * quotient.
2609  */
2610 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2611                          uint64_t *quotient, float_status *status)
2612 {
2613     FloatParts128 pa, pb, *pr;
2614
2615     *quotient = 0;
2616     if (!floatx80_unpack_canonical(&pa, a, status) ||
2617         !floatx80_unpack_canonical(&pb, b, status)) {
2618         return floatx80_default_nan(status);
2619     }
2620     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2621
2622     return floatx80_round_pack_canonical(pr, status);
2623 }
2624
2625 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2626 {
2627     uint64_t quotient;
2628     return floatx80_modrem(a, b, false, &quotient, status);
2629 }
2630
2631 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2632 {
2633     uint64_t quotient;
2634     return floatx80_modrem(a, b, true, &quotient, status);
2635 }
2636
2637 /*
2638  * Float to Float conversions
2639  *
2640  * Returns the result of converting one float format to another. The
2641  * conversion is performed according to the IEC/IEEE Standard for
2642  * Binary Floating-Point Arithmetic.
2643  *
2644  * Usually this only needs to take care of raising invalid exceptions
2645  * and handling the conversion on NaNs.
2646  */
2647
2648 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2649 {
2650     switch (a->cls) {
2651     case float_class_snan:
2652         float_raise(float_flag_invalid_snan, s);
2653         /* fall through */
2654     case float_class_qnan:
2655         /*
2656          * There is no NaN in the destination format.  Raise Invalid
2657          * and return a zero with the sign of the input NaN.
2658          */
2659         float_raise(float_flag_invalid, s);
2660         a->cls = float_class_zero;
2661         break;
2662
2663     case float_class_inf:
2664         /*
2665          * There is no Inf in the destination format.  Raise Invalid
2666          * and return the maximum normal with the correct sign.
2667          */
2668         float_raise(float_flag_invalid, s);
2669         a->cls = float_class_normal;
2670         a->exp = float16_params_ahp.exp_max;
2671         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2672                                   float16_params_ahp.frac_size + 1);
2673         break;
2674
2675     case float_class_normal:
2676     case float_class_zero:
2677         break;
2678
2679     default:
2680         g_assert_not_reached();
2681     }
2682 }
2683
2684 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2685 {
2686     if (is_nan(a->cls)) {
2687         parts_return_nan(a, s);
2688     }
2689 }
2690
2691 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2692 {
2693     if (is_nan(a->cls)) {
2694         parts_return_nan(a, s);
2695     }
2696 }
2697
2698 #define parts_float_to_float(P, S) \
2699     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2700
2701 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2702                                         float_status *s)
2703 {
2704     a->cls = b->cls;
2705     a->sign = b->sign;
2706     a->exp = b->exp;
2707
2708     if (a->cls == float_class_normal) {
2709         frac_truncjam(a, b);
2710     } else if (is_nan(a->cls)) {
2711         /* Discard the low bits of the NaN. */
2712         a->frac = b->frac_hi;
2713         parts_return_nan(a, s);
2714     }
2715 }
2716
2717 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2718                                        float_status *s)
2719 {
2720     a->cls = b->cls;
2721     a->sign = b->sign;
2722     a->exp = b->exp;
2723     frac_widen(a, b);
2724
2725     if (is_nan(a->cls)) {
2726         parts_return_nan(a, s);
2727     }
2728 }
2729
2730 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2731 {
2732     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2733     FloatParts64 p;
2734
2735     float16a_unpack_canonical(&p, a, s, fmt16);
2736     parts_float_to_float(&p, s);
2737     return float32_round_pack_canonical(&p, s);
2738 }
2739
2740 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2741 {
2742     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2743     FloatParts64 p;
2744
2745     float16a_unpack_canonical(&p, a, s, fmt16);
2746     parts_float_to_float(&p, s);
2747     return float64_round_pack_canonical(&p, s);
2748 }
2749
2750 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2751 {
2752     FloatParts64 p;
2753     const FloatFmt *fmt;
2754
2755     float32_unpack_canonical(&p, a, s);
2756     if (ieee) {
2757         parts_float_to_float(&p, s);
2758         fmt = &float16_params;
2759     } else {
2760         parts_float_to_ahp(&p, s);
2761         fmt = &float16_params_ahp;
2762     }
2763     return float16a_round_pack_canonical(&p, s, fmt);
2764 }
2765
2766 static float64 QEMU_SOFTFLOAT_ATTR
2767 soft_float32_to_float64(float32 a, float_status *s)
2768 {
2769     FloatParts64 p;
2770
2771     float32_unpack_canonical(&p, a, s);
2772     parts_float_to_float(&p, s);
2773     return float64_round_pack_canonical(&p, s);
2774 }
2775
2776 float64 float32_to_float64(float32 a, float_status *s)
2777 {
2778     if (likely(float32_is_normal(a))) {
2779         /* Widening conversion can never produce inexact results.  */
2780         union_float32 uf;
2781         union_float64 ud;
2782         uf.s = a;
2783         ud.h = uf.h;
2784         return ud.s;
2785     } else if (float32_is_zero(a)) {
2786         return float64_set_sign(float64_zero, float32_is_neg(a));
2787     } else {
2788         return soft_float32_to_float64(a, s);
2789     }
2790 }
2791
2792 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2793 {
2794     FloatParts64 p;
2795     const FloatFmt *fmt;
2796
2797     float64_unpack_canonical(&p, a, s);
2798     if (ieee) {
2799         parts_float_to_float(&p, s);
2800         fmt = &float16_params;
2801     } else {
2802         parts_float_to_ahp(&p, s);
2803         fmt = &float16_params_ahp;
2804     }
2805     return float16a_round_pack_canonical(&p, s, fmt);
2806 }
2807
2808 float32 float64_to_float32(float64 a, float_status *s)
2809 {
2810     FloatParts64 p;
2811
2812     float64_unpack_canonical(&p, a, s);
2813     parts_float_to_float(&p, s);
2814     return float32_round_pack_canonical(&p, s);
2815 }
2816
2817 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2818 {
2819     FloatParts64 p;
2820
2821     bfloat16_unpack_canonical(&p, a, s);
2822     parts_float_to_float(&p, s);
2823     return float32_round_pack_canonical(&p, s);
2824 }
2825
2826 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2827 {
2828     FloatParts64 p;
2829
2830     bfloat16_unpack_canonical(&p, a, s);
2831     parts_float_to_float(&p, s);
2832     return float64_round_pack_canonical(&p, s);
2833 }
2834
2835 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2836 {
2837     FloatParts64 p;
2838
2839     float32_unpack_canonical(&p, a, s);
2840     parts_float_to_float(&p, s);
2841     return bfloat16_round_pack_canonical(&p, s);
2842 }
2843
2844 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2845 {
2846     FloatParts64 p;
2847
2848     float64_unpack_canonical(&p, a, s);
2849     parts_float_to_float(&p, s);
2850     return bfloat16_round_pack_canonical(&p, s);
2851 }
2852
2853 float32 float128_to_float32(float128 a, float_status *s)
2854 {
2855     FloatParts64 p64;
2856     FloatParts128 p128;
2857
2858     float128_unpack_canonical(&p128, a, s);
2859     parts_float_to_float_narrow(&p64, &p128, s);
2860     return float32_round_pack_canonical(&p64, s);
2861 }
2862
2863 float64 float128_to_float64(float128 a, float_status *s)
2864 {
2865     FloatParts64 p64;
2866     FloatParts128 p128;
2867
2868     float128_unpack_canonical(&p128, a, s);
2869     parts_float_to_float_narrow(&p64, &p128, s);
2870     return float64_round_pack_canonical(&p64, s);
2871 }
2872
2873 float128 float32_to_float128(float32 a, float_status *s)
2874 {
2875     FloatParts64 p64;
2876     FloatParts128 p128;
2877
2878     float32_unpack_canonical(&p64, a, s);
2879     parts_float_to_float_widen(&p128, &p64, s);
2880     return float128_round_pack_canonical(&p128, s);
2881 }
2882
2883 float128 float64_to_float128(float64 a, float_status *s)
2884 {
2885     FloatParts64 p64;
2886     FloatParts128 p128;
2887
2888     float64_unpack_canonical(&p64, a, s);
2889     parts_float_to_float_widen(&p128, &p64, s);
2890     return float128_round_pack_canonical(&p128, s);
2891 }
2892
2893 float32 floatx80_to_float32(floatx80 a, float_status *s)
2894 {
2895     FloatParts64 p64;
2896     FloatParts128 p128;
2897
2898     if (floatx80_unpack_canonical(&p128, a, s)) {
2899         parts_float_to_float_narrow(&p64, &p128, s);
2900     } else {
2901         parts_default_nan(&p64, s);
2902     }
2903     return float32_round_pack_canonical(&p64, s);
2904 }
2905
2906 float64 floatx80_to_float64(floatx80 a, float_status *s)
2907 {
2908     FloatParts64 p64;
2909     FloatParts128 p128;
2910
2911     if (floatx80_unpack_canonical(&p128, a, s)) {
2912         parts_float_to_float_narrow(&p64, &p128, s);
2913     } else {
2914         parts_default_nan(&p64, s);
2915     }
2916     return float64_round_pack_canonical(&p64, s);
2917 }
2918
2919 float128 floatx80_to_float128(floatx80 a, float_status *s)
2920 {
2921     FloatParts128 p;
2922
2923     if (floatx80_unpack_canonical(&p, a, s)) {
2924         parts_float_to_float(&p, s);
2925     } else {
2926         parts_default_nan(&p, s);
2927     }
2928     return float128_round_pack_canonical(&p, s);
2929 }
2930
2931 floatx80 float32_to_floatx80(float32 a, float_status *s)
2932 {
2933     FloatParts64 p64;
2934     FloatParts128 p128;
2935
2936     float32_unpack_canonical(&p64, a, s);
2937     parts_float_to_float_widen(&p128, &p64, s);
2938     return floatx80_round_pack_canonical(&p128, s);
2939 }
2940
2941 floatx80 float64_to_floatx80(float64 a, float_status *s)
2942 {
2943     FloatParts64 p64;
2944     FloatParts128 p128;
2945
2946     float64_unpack_canonical(&p64, a, s);
2947     parts_float_to_float_widen(&p128, &p64, s);
2948     return floatx80_round_pack_canonical(&p128, s);
2949 }
2950
2951 floatx80 float128_to_floatx80(float128 a, float_status *s)
2952 {
2953     FloatParts128 p;
2954
2955     float128_unpack_canonical(&p, a, s);
2956     parts_float_to_float(&p, s);
2957     return floatx80_round_pack_canonical(&p, s);
2958 }
2959
2960 /*
2961  * Round to integral value
2962  */
2963
2964 float16 float16_round_to_int(float16 a, float_status *s)
2965 {
2966     FloatParts64 p;
2967
2968     float16_unpack_canonical(&p, a, s);
2969     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2970     return float16_round_pack_canonical(&p, s);
2971 }
2972
2973 float32 float32_round_to_int(float32 a, float_status *s)
2974 {
2975     FloatParts64 p;
2976
2977     float32_unpack_canonical(&p, a, s);
2978     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2979     return float32_round_pack_canonical(&p, s);
2980 }
2981
2982 float64 float64_round_to_int(float64 a, float_status *s)
2983 {
2984     FloatParts64 p;
2985
2986     float64_unpack_canonical(&p, a, s);
2987     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2988     return float64_round_pack_canonical(&p, s);
2989 }
2990
2991 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2992 {
2993     FloatParts64 p;
2994
2995     bfloat16_unpack_canonical(&p, a, s);
2996     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2997     return bfloat16_round_pack_canonical(&p, s);
2998 }
2999
3000 float128 float128_round_to_int(float128 a, float_status *s)
3001 {
3002     FloatParts128 p;
3003
3004     float128_unpack_canonical(&p, a, s);
3005     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3006     return float128_round_pack_canonical(&p, s);
3007 }
3008
3009 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3010 {
3011     FloatParts128 p;
3012
3013     if (!floatx80_unpack_canonical(&p, a, status)) {
3014         return floatx80_default_nan(status);
3015     }
3016
3017     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3018                        &floatx80_params[status->floatx80_rounding_precision]);
3019     return floatx80_round_pack_canonical(&p, status);
3020 }
3021
3022 /*
3023  * Floating-point to signed integer conversions
3024  */
3025
3026 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3027                               float_status *s)
3028 {
3029     FloatParts64 p;
3030
3031     float16_unpack_canonical(&p, a, s);
3032     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3033 }
3034
3035 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3036                                 float_status *s)
3037 {
3038     FloatParts64 p;
3039
3040     float16_unpack_canonical(&p, a, s);
3041     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3042 }
3043
3044 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3045                                 float_status *s)
3046 {
3047     FloatParts64 p;
3048
3049     float16_unpack_canonical(&p, a, s);
3050     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3051 }
3052
3053 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3054                                 float_status *s)
3055 {
3056     FloatParts64 p;
3057
3058     float16_unpack_canonical(&p, a, s);
3059     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3060 }
3061
3062 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3063                                 float_status *s)
3064 {
3065     FloatParts64 p;
3066
3067     float32_unpack_canonical(&p, a, s);
3068     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3069 }
3070
3071 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3072                                 float_status *s)
3073 {
3074     FloatParts64 p;
3075
3076     float32_unpack_canonical(&p, a, s);
3077     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3078 }
3079
3080 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3081                                 float_status *s)
3082 {
3083     FloatParts64 p;
3084
3085     float32_unpack_canonical(&p, a, s);
3086     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3087 }
3088
3089 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3090                                 float_status *s)
3091 {
3092     FloatParts64 p;
3093
3094     float64_unpack_canonical(&p, a, s);
3095     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3096 }
3097
3098 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3099                                 float_status *s)
3100 {
3101     FloatParts64 p;
3102
3103     float64_unpack_canonical(&p, a, s);
3104     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3105 }
3106
3107 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3108                                 float_status *s)
3109 {
3110     FloatParts64 p;
3111
3112     float64_unpack_canonical(&p, a, s);
3113     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3114 }
3115
3116 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3117                                  float_status *s)
3118 {
3119     FloatParts64 p;
3120
3121     bfloat16_unpack_canonical(&p, a, s);
3122     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3123 }
3124
3125 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3126                                  float_status *s)
3127 {
3128     FloatParts64 p;
3129
3130     bfloat16_unpack_canonical(&p, a, s);
3131     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3132 }
3133
3134 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3135                                  float_status *s)
3136 {
3137     FloatParts64 p;
3138
3139     bfloat16_unpack_canonical(&p, a, s);
3140     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3141 }
3142
3143 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3144                                         int scale, float_status *s)
3145 {
3146     FloatParts128 p;
3147
3148     float128_unpack_canonical(&p, a, s);
3149     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3150 }
3151
3152 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3153                                         int scale, float_status *s)
3154 {
3155     FloatParts128 p;
3156
3157     float128_unpack_canonical(&p, a, s);
3158     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3159 }
3160
3161 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3162                                         int scale, float_status *s)
3163 {
3164     int flags = 0;
3165     Int128 r;
3166     FloatParts128 p;
3167
3168     float128_unpack_canonical(&p, a, s);
3169
3170     switch (p.cls) {
3171     case float_class_snan:
3172         flags |= float_flag_invalid_snan;
3173         /* fall through */
3174     case float_class_qnan:
3175         flags |= float_flag_invalid;
3176         r = UINT128_MAX;
3177         break;
3178
3179     case float_class_inf:
3180         flags = float_flag_invalid | float_flag_invalid_cvti;
3181         r = p.sign ? INT128_MIN : INT128_MAX;
3182         break;
3183
3184     case float_class_zero:
3185         return int128_zero();
3186
3187     case float_class_normal:
3188         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3189             flags = float_flag_inexact;
3190         }
3191
3192         if (p.exp < 127) {
3193             int shift = 127 - p.exp;
3194             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3195             if (p.sign) {
3196                 r = int128_neg(r);
3197             }
3198         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3199                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3200             r = INT128_MIN;
3201         } else {
3202             flags = float_flag_invalid | float_flag_invalid_cvti;
3203             r = p.sign ? INT128_MIN : INT128_MAX;
3204         }
3205         break;
3206
3207     default:
3208         g_assert_not_reached();
3209     }
3210
3211     float_raise(flags, s);
3212     return r;
3213 }
3214
3215 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3216                                         int scale, float_status *s)
3217 {
3218     FloatParts128 p;
3219
3220     if (!floatx80_unpack_canonical(&p, a, s)) {
3221         parts_default_nan(&p, s);
3222     }
3223     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3224 }
3225
3226 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3227                                         int scale, float_status *s)
3228 {
3229     FloatParts128 p;
3230
3231     if (!floatx80_unpack_canonical(&p, a, s)) {
3232         parts_default_nan(&p, s);
3233     }
3234     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3235 }
3236
3237 int8_t float16_to_int8(float16 a, float_status *s)
3238 {
3239     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3240 }
3241
3242 int16_t float16_to_int16(float16 a, float_status *s)
3243 {
3244     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3245 }
3246
3247 int32_t float16_to_int32(float16 a, float_status *s)
3248 {
3249     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3250 }
3251
3252 int64_t float16_to_int64(float16 a, float_status *s)
3253 {
3254     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3255 }
3256
3257 int16_t float32_to_int16(float32 a, float_status *s)
3258 {
3259     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3260 }
3261
3262 int32_t float32_to_int32(float32 a, float_status *s)
3263 {
3264     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3265 }
3266
3267 int64_t float32_to_int64(float32 a, float_status *s)
3268 {
3269     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3270 }
3271
3272 int16_t float64_to_int16(float64 a, float_status *s)
3273 {
3274     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3275 }
3276
3277 int32_t float64_to_int32(float64 a, float_status *s)
3278 {
3279     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3280 }
3281
3282 int64_t float64_to_int64(float64 a, float_status *s)
3283 {
3284     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3285 }
3286
3287 int32_t float128_to_int32(float128 a, float_status *s)
3288 {
3289     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3290 }
3291
3292 int64_t float128_to_int64(float128 a, float_status *s)
3293 {
3294     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3295 }
3296
3297 Int128 float128_to_int128(float128 a, float_status *s)
3298 {
3299     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3300 }
3301
3302 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3303 {
3304     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3305 }
3306
3307 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3308 {
3309     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3310 }
3311
3312 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3313 {
3314     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3315 }
3316
3317 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3318 {
3319     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3320 }
3321
3322 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3323 {
3324     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3325 }
3326
3327 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3328 {
3329     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3330 }
3331
3332 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3333 {
3334     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3335 }
3336
3337 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3338 {
3339     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3340 }
3341
3342 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3343 {
3344     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3345 }
3346
3347 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3348 {
3349     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3350 }
3351
3352 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3353 {
3354     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3355 }
3356
3357 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3358 {
3359     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3360 }
3361
3362 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3363 {
3364     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3365 }
3366
3367 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3368 {
3369     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3370 }
3371
3372 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3373 {
3374     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3375 }
3376
3377 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3378 {
3379     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3380 }
3381
3382 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3383 {
3384     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3385 }
3386
3387 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3388 {
3389     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3390 }
3391
3392 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3393 {
3394     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3395 }
3396
3397 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3398 {
3399     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3400 }
3401
3402 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3403 {
3404     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3405 }
3406
3407 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3408 {
3409     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3410 }
3411
3412 /*
3413  * Floating-point to unsigned integer conversions
3414  */
3415
3416 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3417                                 float_status *s)
3418 {
3419     FloatParts64 p;
3420
3421     float16_unpack_canonical(&p, a, s);
3422     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3423 }
3424
3425 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3426                                   float_status *s)
3427 {
3428     FloatParts64 p;
3429
3430     float16_unpack_canonical(&p, a, s);
3431     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3432 }
3433
3434 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3435                                   float_status *s)
3436 {
3437     FloatParts64 p;
3438
3439     float16_unpack_canonical(&p, a, s);
3440     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3441 }
3442
3443 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3444                                   float_status *s)
3445 {
3446     FloatParts64 p;
3447
3448     float16_unpack_canonical(&p, a, s);
3449     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3450 }
3451
3452 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3453                                   float_status *s)
3454 {
3455     FloatParts64 p;
3456
3457     float32_unpack_canonical(&p, a, s);
3458     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3459 }
3460
3461 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3462                                   float_status *s)
3463 {
3464     FloatParts64 p;
3465
3466     float32_unpack_canonical(&p, a, s);
3467     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3468 }
3469
3470 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3471                                   float_status *s)
3472 {
3473     FloatParts64 p;
3474
3475     float32_unpack_canonical(&p, a, s);
3476     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3477 }
3478
3479 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3480                                   float_status *s)
3481 {
3482     FloatParts64 p;
3483
3484     float64_unpack_canonical(&p, a, s);
3485     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3486 }
3487
3488 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3489                                   float_status *s)
3490 {
3491     FloatParts64 p;
3492
3493     float64_unpack_canonical(&p, a, s);
3494     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3495 }
3496
3497 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3498                                   float_status *s)
3499 {
3500     FloatParts64 p;
3501
3502     float64_unpack_canonical(&p, a, s);
3503     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3504 }
3505
3506 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3507                                    int scale, float_status *s)
3508 {
3509     FloatParts64 p;
3510
3511     bfloat16_unpack_canonical(&p, a, s);
3512     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3513 }
3514
3515 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3516                                    int scale, float_status *s)
3517 {
3518     FloatParts64 p;
3519
3520     bfloat16_unpack_canonical(&p, a, s);
3521     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3522 }
3523
3524 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3525                                    int scale, float_status *s)
3526 {
3527     FloatParts64 p;
3528
3529     bfloat16_unpack_canonical(&p, a, s);
3530     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3531 }
3532
3533 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3534                                           int scale, float_status *s)
3535 {
3536     FloatParts128 p;
3537
3538     float128_unpack_canonical(&p, a, s);
3539     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3540 }
3541
3542 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3543                                           int scale, float_status *s)
3544 {
3545     FloatParts128 p;
3546
3547     float128_unpack_canonical(&p, a, s);
3548     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3549 }
3550
3551 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3552                                          int scale, float_status *s)
3553 {
3554     int flags = 0;
3555     Int128 r;
3556     FloatParts128 p;
3557
3558     float128_unpack_canonical(&p, a, s);
3559
3560     switch (p.cls) {
3561     case float_class_snan:
3562         flags |= float_flag_invalid_snan;
3563         /* fall through */
3564     case float_class_qnan:
3565         flags |= float_flag_invalid;
3566         r = UINT128_MAX;
3567         break;
3568
3569     case float_class_inf:
3570         flags = float_flag_invalid | float_flag_invalid_cvti;
3571         r = p.sign ? int128_zero() : UINT128_MAX;
3572         break;
3573
3574     case float_class_zero:
3575         return int128_zero();
3576
3577     case float_class_normal:
3578         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3579             flags = float_flag_inexact;
3580             if (p.cls == float_class_zero) {
3581                 r = int128_zero();
3582                 break;
3583             }
3584         }
3585
3586         if (p.sign) {
3587             flags = float_flag_invalid | float_flag_invalid_cvti;
3588             r = int128_zero();
3589         } else if (p.exp <= 127) {
3590             int shift = 127 - p.exp;
3591             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3592         } else {
3593             flags = float_flag_invalid | float_flag_invalid_cvti;
3594             r = UINT128_MAX;
3595         }
3596         break;
3597
3598     default:
3599         g_assert_not_reached();
3600     }
3601
3602     float_raise(flags, s);
3603     return r;
3604 }
3605
3606 uint8_t float16_to_uint8(float16 a, float_status *s)
3607 {
3608     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3609 }
3610
3611 uint16_t float16_to_uint16(float16 a, float_status *s)
3612 {
3613     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3614 }
3615
3616 uint32_t float16_to_uint32(float16 a, float_status *s)
3617 {
3618     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3619 }
3620
3621 uint64_t float16_to_uint64(float16 a, float_status *s)
3622 {
3623     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3624 }
3625
3626 uint16_t float32_to_uint16(float32 a, float_status *s)
3627 {
3628     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3629 }
3630
3631 uint32_t float32_to_uint32(float32 a, float_status *s)
3632 {
3633     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3634 }
3635
3636 uint64_t float32_to_uint64(float32 a, float_status *s)
3637 {
3638     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3639 }
3640
3641 uint16_t float64_to_uint16(float64 a, float_status *s)
3642 {
3643     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3644 }
3645
3646 uint32_t float64_to_uint32(float64 a, float_status *s)
3647 {
3648     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3649 }
3650
3651 uint64_t float64_to_uint64(float64 a, float_status *s)
3652 {
3653     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3654 }
3655
3656 uint32_t float128_to_uint32(float128 a, float_status *s)
3657 {
3658     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3659 }
3660
3661 uint64_t float128_to_uint64(float128 a, float_status *s)
3662 {
3663     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3664 }
3665
3666 Int128 float128_to_uint128(float128 a, float_status *s)
3667 {
3668     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3669 }
3670
3671 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3672 {
3673     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3674 }
3675
3676 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3677 {
3678     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3679 }
3680
3681 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3682 {
3683     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3684 }
3685
3686 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3687 {
3688     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3689 }
3690
3691 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3692 {
3693     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3694 }
3695
3696 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3697 {
3698     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3699 }
3700
3701 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3702 {
3703     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3704 }
3705
3706 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3707 {
3708     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3709 }
3710
3711 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3712 {
3713     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3714 }
3715
3716 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3717 {
3718     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3719 }
3720
3721 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3722 {
3723     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3724 }
3725
3726 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3727 {
3728     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3729 }
3730
3731 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3732 {
3733     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3734 }
3735
3736 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3737 {
3738     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3739 }
3740
3741 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3742 {
3743     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3744 }
3745
3746 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3747 {
3748     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3749 }
3750
3751 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3752 {
3753     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3754 }
3755
3756 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3757 {
3758     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3759 }
3760
3761 /*
3762  * Signed integer to floating-point conversions
3763  */
3764
3765 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3766 {
3767     FloatParts64 p;
3768
3769     parts_sint_to_float(&p, a, scale, status);
3770     return float16_round_pack_canonical(&p, status);
3771 }
3772
3773 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3774 {
3775     return int64_to_float16_scalbn(a, scale, status);
3776 }
3777
3778 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3779 {
3780     return int64_to_float16_scalbn(a, scale, status);
3781 }
3782
3783 float16 int64_to_float16(int64_t a, float_status *status)
3784 {
3785     return int64_to_float16_scalbn(a, 0, status);
3786 }
3787
3788 float16 int32_to_float16(int32_t a, float_status *status)
3789 {
3790     return int64_to_float16_scalbn(a, 0, status);
3791 }
3792
3793 float16 int16_to_float16(int16_t a, float_status *status)
3794 {
3795     return int64_to_float16_scalbn(a, 0, status);
3796 }
3797
3798 float16 int8_to_float16(int8_t a, float_status *status)
3799 {
3800     return int64_to_float16_scalbn(a, 0, status);
3801 }
3802
3803 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3804 {
3805     FloatParts64 p;
3806
3807     /* Without scaling, there are no overflow concerns. */
3808     if (likely(scale == 0) && can_use_fpu(status)) {
3809         union_float32 ur;
3810         ur.h = a;
3811         return ur.s;
3812     }
3813
3814     parts64_sint_to_float(&p, a, scale, status);
3815     return float32_round_pack_canonical(&p, status);
3816 }
3817
3818 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3819 {
3820     return int64_to_float32_scalbn(a, scale, status);
3821 }
3822
3823 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3824 {
3825     return int64_to_float32_scalbn(a, scale, status);
3826 }
3827
3828 float32 int64_to_float32(int64_t a, float_status *status)
3829 {
3830     return int64_to_float32_scalbn(a, 0, status);
3831 }
3832
3833 float32 int32_to_float32(int32_t a, float_status *status)
3834 {
3835     return int64_to_float32_scalbn(a, 0, status);
3836 }
3837
3838 float32 int16_to_float32(int16_t a, float_status *status)
3839 {
3840     return int64_to_float32_scalbn(a, 0, status);
3841 }
3842
3843 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3844 {
3845     FloatParts64 p;
3846
3847     /* Without scaling, there are no overflow concerns. */
3848     if (likely(scale == 0) && can_use_fpu(status)) {
3849         union_float64 ur;
3850         ur.h = a;
3851         return ur.s;
3852     }
3853
3854     parts_sint_to_float(&p, a, scale, status);
3855     return float64_round_pack_canonical(&p, status);
3856 }
3857
3858 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3859 {
3860     return int64_to_float64_scalbn(a, scale, status);
3861 }
3862
3863 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3864 {
3865     return int64_to_float64_scalbn(a, scale, status);
3866 }
3867
3868 float64 int64_to_float64(int64_t a, float_status *status)
3869 {
3870     return int64_to_float64_scalbn(a, 0, status);
3871 }
3872
3873 float64 int32_to_float64(int32_t a, float_status *status)
3874 {
3875     return int64_to_float64_scalbn(a, 0, status);
3876 }
3877
3878 float64 int16_to_float64(int16_t a, float_status *status)
3879 {
3880     return int64_to_float64_scalbn(a, 0, status);
3881 }
3882
3883 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3884 {
3885     FloatParts64 p;
3886
3887     parts_sint_to_float(&p, a, scale, status);
3888     return bfloat16_round_pack_canonical(&p, status);
3889 }
3890
3891 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3892 {
3893     return int64_to_bfloat16_scalbn(a, scale, status);
3894 }
3895
3896 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3897 {
3898     return int64_to_bfloat16_scalbn(a, scale, status);
3899 }
3900
3901 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3902 {
3903     return int64_to_bfloat16_scalbn(a, 0, status);
3904 }
3905
3906 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3907 {
3908     return int64_to_bfloat16_scalbn(a, 0, status);
3909 }
3910
3911 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3912 {
3913     return int64_to_bfloat16_scalbn(a, 0, status);
3914 }
3915
3916 float128 int128_to_float128(Int128 a, float_status *status)
3917 {
3918     FloatParts128 p = { };
3919     int shift;
3920
3921     if (int128_nz(a)) {
3922         p.cls = float_class_normal;
3923         if (!int128_nonneg(a)) {
3924             p.sign = true;
3925             a = int128_neg(a);
3926         }
3927
3928         shift = clz64(int128_gethi(a));
3929         if (shift == 64) {
3930             shift += clz64(int128_getlo(a));
3931         }
3932
3933         p.exp = 127 - shift;
3934         a = int128_lshift(a, shift);
3935
3936         p.frac_hi = int128_gethi(a);
3937         p.frac_lo = int128_getlo(a);
3938     } else {
3939         p.cls = float_class_zero;
3940     }
3941
3942     return float128_round_pack_canonical(&p, status);
3943 }
3944
3945 float128 int64_to_float128(int64_t a, float_status *status)
3946 {
3947     FloatParts128 p;
3948
3949     parts_sint_to_float(&p, a, 0, status);
3950     return float128_round_pack_canonical(&p, status);
3951 }
3952
3953 float128 int32_to_float128(int32_t a, float_status *status)
3954 {
3955     return int64_to_float128(a, status);
3956 }
3957
3958 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3959 {
3960     FloatParts128 p;
3961
3962     parts_sint_to_float(&p, a, 0, status);
3963     return floatx80_round_pack_canonical(&p, status);
3964 }
3965
3966 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3967 {
3968     return int64_to_floatx80(a, status);
3969 }
3970
3971 /*
3972  * Unsigned Integer to floating-point conversions
3973  */
3974
3975 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3976 {
3977     FloatParts64 p;
3978
3979     parts_uint_to_float(&p, a, scale, status);
3980     return float16_round_pack_canonical(&p, status);
3981 }
3982
3983 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3984 {
3985     return uint64_to_float16_scalbn(a, scale, status);
3986 }
3987
3988 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3989 {
3990     return uint64_to_float16_scalbn(a, scale, status);
3991 }
3992
3993 float16 uint64_to_float16(uint64_t a, float_status *status)
3994 {
3995     return uint64_to_float16_scalbn(a, 0, status);
3996 }
3997
3998 float16 uint32_to_float16(uint32_t a, float_status *status)
3999 {
4000     return uint64_to_float16_scalbn(a, 0, status);
4001 }
4002
4003 float16 uint16_to_float16(uint16_t a, float_status *status)
4004 {
4005     return uint64_to_float16_scalbn(a, 0, status);
4006 }
4007
4008 float16 uint8_to_float16(uint8_t a, float_status *status)
4009 {
4010     return uint64_to_float16_scalbn(a, 0, status);
4011 }
4012
4013 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4014 {
4015     FloatParts64 p;
4016
4017     /* Without scaling, there are no overflow concerns. */
4018     if (likely(scale == 0) && can_use_fpu(status)) {
4019         union_float32 ur;
4020         ur.h = a;
4021         return ur.s;
4022     }
4023
4024     parts_uint_to_float(&p, a, scale, status);
4025     return float32_round_pack_canonical(&p, status);
4026 }
4027
4028 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4029 {
4030     return uint64_to_float32_scalbn(a, scale, status);
4031 }
4032
4033 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4034 {
4035     return uint64_to_float32_scalbn(a, scale, status);
4036 }
4037
4038 float32 uint64_to_float32(uint64_t a, float_status *status)
4039 {
4040     return uint64_to_float32_scalbn(a, 0, status);
4041 }
4042
4043 float32 uint32_to_float32(uint32_t a, float_status *status)
4044 {
4045     return uint64_to_float32_scalbn(a, 0, status);
4046 }
4047
4048 float32 uint16_to_float32(uint16_t a, float_status *status)
4049 {
4050     return uint64_to_float32_scalbn(a, 0, status);
4051 }
4052
4053 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4054 {
4055     FloatParts64 p;
4056
4057     /* Without scaling, there are no overflow concerns. */
4058     if (likely(scale == 0) && can_use_fpu(status)) {
4059         union_float64 ur;
4060         ur.h = a;
4061         return ur.s;
4062     }
4063
4064     parts_uint_to_float(&p, a, scale, status);
4065     return float64_round_pack_canonical(&p, status);
4066 }
4067
4068 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4069 {
4070     return uint64_to_float64_scalbn(a, scale, status);
4071 }
4072
4073 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4074 {
4075     return uint64_to_float64_scalbn(a, scale, status);
4076 }
4077
4078 float64 uint64_to_float64(uint64_t a, float_status *status)
4079 {
4080     return uint64_to_float64_scalbn(a, 0, status);
4081 }
4082
4083 float64 uint32_to_float64(uint32_t a, float_status *status)
4084 {
4085     return uint64_to_float64_scalbn(a, 0, status);
4086 }
4087
4088 float64 uint16_to_float64(uint16_t a, float_status *status)
4089 {
4090     return uint64_to_float64_scalbn(a, 0, status);
4091 }
4092
4093 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4094 {
4095     FloatParts64 p;
4096
4097     parts_uint_to_float(&p, a, scale, status);
4098     return bfloat16_round_pack_canonical(&p, status);
4099 }
4100
4101 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4102 {
4103     return uint64_to_bfloat16_scalbn(a, scale, status);
4104 }
4105
4106 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4107 {
4108     return uint64_to_bfloat16_scalbn(a, scale, status);
4109 }
4110
4111 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4112 {
4113     return uint64_to_bfloat16_scalbn(a, 0, status);
4114 }
4115
4116 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4117 {
4118     return uint64_to_bfloat16_scalbn(a, 0, status);
4119 }
4120
4121 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4122 {
4123     return uint64_to_bfloat16_scalbn(a, 0, status);
4124 }
4125
4126 float128 uint64_to_float128(uint64_t a, float_status *status)
4127 {
4128     FloatParts128 p;
4129
4130     parts_uint_to_float(&p, a, 0, status);
4131     return float128_round_pack_canonical(&p, status);
4132 }
4133
4134 float128 uint128_to_float128(Int128 a, float_status *status)
4135 {
4136     FloatParts128 p = { };
4137     int shift;
4138
4139     if (int128_nz(a)) {
4140         p.cls = float_class_normal;
4141
4142         shift = clz64(int128_gethi(a));
4143         if (shift == 64) {
4144             shift += clz64(int128_getlo(a));
4145         }
4146
4147         p.exp = 127 - shift;
4148         a = int128_lshift(a, shift);
4149
4150         p.frac_hi = int128_gethi(a);
4151         p.frac_lo = int128_getlo(a);
4152     } else {
4153         p.cls = float_class_zero;
4154     }
4155
4156     return float128_round_pack_canonical(&p, status);
4157 }
4158
4159 /*
4160  * Minimum and maximum
4161  */
4162
4163 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4164 {
4165     FloatParts64 pa, pb, *pr;
4166
4167     float16_unpack_canonical(&pa, a, s);
4168     float16_unpack_canonical(&pb, b, s);
4169     pr = parts_minmax(&pa, &pb, s, flags);
4170
4171     return float16_round_pack_canonical(pr, s);
4172 }
4173
4174 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4175                                 float_status *s, int flags)
4176 {
4177     FloatParts64 pa, pb, *pr;
4178
4179     bfloat16_unpack_canonical(&pa, a, s);
4180     bfloat16_unpack_canonical(&pb, b, s);
4181     pr = parts_minmax(&pa, &pb, s, flags);
4182
4183     return bfloat16_round_pack_canonical(pr, s);
4184 }
4185
4186 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4187 {
4188     FloatParts64 pa, pb, *pr;
4189
4190     float32_unpack_canonical(&pa, a, s);
4191     float32_unpack_canonical(&pb, b, s);
4192     pr = parts_minmax(&pa, &pb, s, flags);
4193
4194     return float32_round_pack_canonical(pr, s);
4195 }
4196
4197 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4198 {
4199     FloatParts64 pa, pb, *pr;
4200
4201     float64_unpack_canonical(&pa, a, s);
4202     float64_unpack_canonical(&pb, b, s);
4203     pr = parts_minmax(&pa, &pb, s, flags);
4204
4205     return float64_round_pack_canonical(pr, s);
4206 }
4207
4208 static float128 float128_minmax(float128 a, float128 b,
4209                                 float_status *s, int flags)
4210 {
4211     FloatParts128 pa, pb, *pr;
4212
4213     float128_unpack_canonical(&pa, a, s);
4214     float128_unpack_canonical(&pb, b, s);
4215     pr = parts_minmax(&pa, &pb, s, flags);
4216
4217     return float128_round_pack_canonical(pr, s);
4218 }
4219
4220 #define MINMAX_1(type, name, flags) \
4221     type type##_##name(type a, type b, float_status *s) \
4222     { return type##_minmax(a, b, s, flags); }
4223
4224 #define MINMAX_2(type) \
4225     MINMAX_1(type, max, 0)                                                \
4226     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4227     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4228     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4229     MINMAX_1(type, min, minmax_ismin)                                     \
4230     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4231     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4232     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4233
4234 MINMAX_2(float16)
4235 MINMAX_2(bfloat16)
4236 MINMAX_2(float32)
4237 MINMAX_2(float64)
4238 MINMAX_2(float128)
4239
4240 #undef MINMAX_1
4241 #undef MINMAX_2
4242
4243 /*
4244  * Floating point compare
4245  */
4246
4247 static FloatRelation QEMU_FLATTEN
4248 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4249 {
4250     FloatParts64 pa, pb;
4251
4252     float16_unpack_canonical(&pa, a, s);
4253     float16_unpack_canonical(&pb, b, s);
4254     return parts_compare(&pa, &pb, s, is_quiet);
4255 }
4256
4257 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4258 {
4259     return float16_do_compare(a, b, s, false);
4260 }
4261
4262 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4263 {
4264     return float16_do_compare(a, b, s, true);
4265 }
4266
4267 static FloatRelation QEMU_SOFTFLOAT_ATTR
4268 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4269 {
4270     FloatParts64 pa, pb;
4271
4272     float32_unpack_canonical(&pa, a, s);
4273     float32_unpack_canonical(&pb, b, s);
4274     return parts_compare(&pa, &pb, s, is_quiet);
4275 }
4276
4277 static FloatRelation QEMU_FLATTEN
4278 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4279 {
4280     union_float32 ua, ub;
4281
4282     ua.s = xa;
4283     ub.s = xb;
4284
4285     if (QEMU_NO_HARDFLOAT) {
4286         goto soft;
4287     }
4288
4289     float32_input_flush2(&ua.s, &ub.s, s);
4290     if (isgreaterequal(ua.h, ub.h)) {
4291         if (isgreater(ua.h, ub.h)) {
4292             return float_relation_greater;
4293         }
4294         return float_relation_equal;
4295     }
4296     if (likely(isless(ua.h, ub.h))) {
4297         return float_relation_less;
4298     }
4299     /*
4300      * The only condition remaining is unordered.
4301      * Fall through to set flags.
4302      */
4303  soft:
4304     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4305 }
4306
4307 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4308 {
4309     return float32_hs_compare(a, b, s, false);
4310 }
4311
4312 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4313 {
4314     return float32_hs_compare(a, b, s, true);
4315 }
4316
4317 static FloatRelation QEMU_SOFTFLOAT_ATTR
4318 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4319 {
4320     FloatParts64 pa, pb;
4321
4322     float64_unpack_canonical(&pa, a, s);
4323     float64_unpack_canonical(&pb, b, s);
4324     return parts_compare(&pa, &pb, s, is_quiet);
4325 }
4326
4327 static FloatRelation QEMU_FLATTEN
4328 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4329 {
4330     union_float64 ua, ub;
4331
4332     ua.s = xa;
4333     ub.s = xb;
4334
4335     if (QEMU_NO_HARDFLOAT) {
4336         goto soft;
4337     }
4338
4339     float64_input_flush2(&ua.s, &ub.s, s);
4340     if (isgreaterequal(ua.h, ub.h)) {
4341         if (isgreater(ua.h, ub.h)) {
4342             return float_relation_greater;
4343         }
4344         return float_relation_equal;
4345     }
4346     if (likely(isless(ua.h, ub.h))) {
4347         return float_relation_less;
4348     }
4349     /*
4350      * The only condition remaining is unordered.
4351      * Fall through to set flags.
4352      */
4353  soft:
4354     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4355 }
4356
4357 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4358 {
4359     return float64_hs_compare(a, b, s, false);
4360 }
4361
4362 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4363 {
4364     return float64_hs_compare(a, b, s, true);
4365 }
4366
4367 static FloatRelation QEMU_FLATTEN
4368 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4369 {
4370     FloatParts64 pa, pb;
4371
4372     bfloat16_unpack_canonical(&pa, a, s);
4373     bfloat16_unpack_canonical(&pb, b, s);
4374     return parts_compare(&pa, &pb, s, is_quiet);
4375 }
4376
4377 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4378 {
4379     return bfloat16_do_compare(a, b, s, false);
4380 }
4381
4382 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4383 {
4384     return bfloat16_do_compare(a, b, s, true);
4385 }
4386
4387 static FloatRelation QEMU_FLATTEN
4388 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4389 {
4390     FloatParts128 pa, pb;
4391
4392     float128_unpack_canonical(&pa, a, s);
4393     float128_unpack_canonical(&pb, b, s);
4394     return parts_compare(&pa, &pb, s, is_quiet);
4395 }
4396
4397 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4398 {
4399     return float128_do_compare(a, b, s, false);
4400 }
4401
4402 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4403 {
4404     return float128_do_compare(a, b, s, true);
4405 }
4406
4407 static FloatRelation QEMU_FLATTEN
4408 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4409 {
4410     FloatParts128 pa, pb;
4411
4412     if (!floatx80_unpack_canonical(&pa, a, s) ||
4413         !floatx80_unpack_canonical(&pb, b, s)) {
4414         return float_relation_unordered;
4415     }
4416     return parts_compare(&pa, &pb, s, is_quiet);
4417 }
4418
4419 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4420 {
4421     return floatx80_do_compare(a, b, s, false);
4422 }
4423
4424 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4425 {
4426     return floatx80_do_compare(a, b, s, true);
4427 }
4428
4429 /*
4430  * Scale by 2**N
4431  */
4432
4433 float16 float16_scalbn(float16 a, int n, float_status *status)
4434 {
4435     FloatParts64 p;
4436
4437     float16_unpack_canonical(&p, a, status);
4438     parts_scalbn(&p, n, status);
4439     return float16_round_pack_canonical(&p, status);
4440 }
4441
4442 float32 float32_scalbn(float32 a, int n, float_status *status)
4443 {
4444     FloatParts64 p;
4445
4446     float32_unpack_canonical(&p, a, status);
4447     parts_scalbn(&p, n, status);
4448     return float32_round_pack_canonical(&p, status);
4449 }
4450
4451 float64 float64_scalbn(float64 a, int n, float_status *status)
4452 {
4453     FloatParts64 p;
4454
4455     float64_unpack_canonical(&p, a, status);
4456     parts_scalbn(&p, n, status);
4457     return float64_round_pack_canonical(&p, status);
4458 }
4459
4460 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4461 {
4462     FloatParts64 p;
4463
4464     bfloat16_unpack_canonical(&p, a, status);
4465     parts_scalbn(&p, n, status);
4466     return bfloat16_round_pack_canonical(&p, status);
4467 }
4468
4469 float128 float128_scalbn(float128 a, int n, float_status *status)
4470 {
4471     FloatParts128 p;
4472
4473     float128_unpack_canonical(&p, a, status);
4474     parts_scalbn(&p, n, status);
4475     return float128_round_pack_canonical(&p, status);
4476 }
4477
4478 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4479 {
4480     FloatParts128 p;
4481
4482     if (!floatx80_unpack_canonical(&p, a, status)) {
4483         return floatx80_default_nan(status);
4484     }
4485     parts_scalbn(&p, n, status);
4486     return floatx80_round_pack_canonical(&p, status);
4487 }
4488
4489 /*
4490  * Square Root
4491  */
4492
4493 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4494 {
4495     FloatParts64 p;
4496
4497     float16_unpack_canonical(&p, a, status);
4498     parts_sqrt(&p, status, &float16_params);
4499     return float16_round_pack_canonical(&p, status);
4500 }
4501
4502 static float32 QEMU_SOFTFLOAT_ATTR
4503 soft_f32_sqrt(float32 a, float_status *status)
4504 {
4505     FloatParts64 p;
4506
4507     float32_unpack_canonical(&p, a, status);
4508     parts_sqrt(&p, status, &float32_params);
4509     return float32_round_pack_canonical(&p, status);
4510 }
4511
4512 static float64 QEMU_SOFTFLOAT_ATTR
4513 soft_f64_sqrt(float64 a, float_status *status)
4514 {
4515     FloatParts64 p;
4516
4517     float64_unpack_canonical(&p, a, status);
4518     parts_sqrt(&p, status, &float64_params);
4519     return float64_round_pack_canonical(&p, status);
4520 }
4521
4522 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4523 {
4524     union_float32 ua, ur;
4525
4526     ua.s = xa;
4527     if (unlikely(!can_use_fpu(s))) {
4528         goto soft;
4529     }
4530
4531     float32_input_flush1(&ua.s, s);
4532     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4533         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4534                        fpclassify(ua.h) == FP_ZERO) ||
4535                      signbit(ua.h))) {
4536             goto soft;
4537         }
4538     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4539                         float32_is_neg(ua.s))) {
4540         goto soft;
4541     }
4542     ur.h = sqrtf(ua.h);
4543     return ur.s;
4544
4545  soft:
4546     return soft_f32_sqrt(ua.s, s);
4547 }
4548
4549 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4550 {
4551     union_float64 ua, ur;
4552
4553     ua.s = xa;
4554     if (unlikely(!can_use_fpu(s))) {
4555         goto soft;
4556     }
4557
4558     float64_input_flush1(&ua.s, s);
4559     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4560         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4561                        fpclassify(ua.h) == FP_ZERO) ||
4562                      signbit(ua.h))) {
4563             goto soft;
4564         }
4565     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4566                         float64_is_neg(ua.s))) {
4567         goto soft;
4568     }
4569     ur.h = sqrt(ua.h);
4570     return ur.s;
4571
4572  soft:
4573     return soft_f64_sqrt(ua.s, s);
4574 }
4575
4576 float64 float64r32_sqrt(float64 a, float_status *status)
4577 {
4578     FloatParts64 p;
4579
4580     float64_unpack_canonical(&p, a, status);
4581     parts_sqrt(&p, status, &float64_params);
4582     return float64r32_round_pack_canonical(&p, status);
4583 }
4584
4585 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4586 {
4587     FloatParts64 p;
4588
4589     bfloat16_unpack_canonical(&p, a, status);
4590     parts_sqrt(&p, status, &bfloat16_params);
4591     return bfloat16_round_pack_canonical(&p, status);
4592 }
4593
4594 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4595 {
4596     FloatParts128 p;
4597
4598     float128_unpack_canonical(&p, a, status);
4599     parts_sqrt(&p, status, &float128_params);
4600     return float128_round_pack_canonical(&p, status);
4601 }
4602
4603 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4604 {
4605     FloatParts128 p;
4606
4607     if (!floatx80_unpack_canonical(&p, a, s)) {
4608         return floatx80_default_nan(s);
4609     }
4610     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4611     return floatx80_round_pack_canonical(&p, s);
4612 }
4613
4614 /*
4615  * log2
4616  */
4617 float32 float32_log2(float32 a, float_status *status)
4618 {
4619     FloatParts64 p;
4620
4621     float32_unpack_canonical(&p, a, status);
4622     parts_log2(&p, status, &float32_params);
4623     return float32_round_pack_canonical(&p, status);
4624 }
4625
4626 float64 float64_log2(float64 a, float_status *status)
4627 {
4628     FloatParts64 p;
4629
4630     float64_unpack_canonical(&p, a, status);
4631     parts_log2(&p, status, &float64_params);
4632     return float64_round_pack_canonical(&p, status);
4633 }
4634
4635 /*----------------------------------------------------------------------------
4636 | The pattern for a default generated NaN.
4637 *----------------------------------------------------------------------------*/
4638
4639 float16 float16_default_nan(float_status *status)
4640 {
4641     FloatParts64 p;
4642
4643     parts_default_nan(&p, status);
4644     p.frac >>= float16_params.frac_shift;
4645     return float16_pack_raw(&p);
4646 }
4647
4648 float32 float32_default_nan(float_status *status)
4649 {
4650     FloatParts64 p;
4651
4652     parts_default_nan(&p, status);
4653     p.frac >>= float32_params.frac_shift;
4654     return float32_pack_raw(&p);
4655 }
4656
4657 float64 float64_default_nan(float_status *status)
4658 {
4659     FloatParts64 p;
4660
4661     parts_default_nan(&p, status);
4662     p.frac >>= float64_params.frac_shift;
4663     return float64_pack_raw(&p);
4664 }
4665
4666 float128 float128_default_nan(float_status *status)
4667 {
4668     FloatParts128 p;
4669
4670     parts_default_nan(&p, status);
4671     frac_shr(&p, float128_params.frac_shift);
4672     return float128_pack_raw(&p);
4673 }
4674
4675 bfloat16 bfloat16_default_nan(float_status *status)
4676 {
4677     FloatParts64 p;
4678
4679     parts_default_nan(&p, status);
4680     p.frac >>= bfloat16_params.frac_shift;
4681     return bfloat16_pack_raw(&p);
4682 }
4683
4684 /*----------------------------------------------------------------------------
4685 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4686 *----------------------------------------------------------------------------*/
4687
4688 float16 float16_silence_nan(float16 a, float_status *status)
4689 {
4690     FloatParts64 p;
4691
4692     float16_unpack_raw(&p, a);
4693     p.frac <<= float16_params.frac_shift;
4694     parts_silence_nan(&p, status);
4695     p.frac >>= float16_params.frac_shift;
4696     return float16_pack_raw(&p);
4697 }
4698
4699 float32 float32_silence_nan(float32 a, float_status *status)
4700 {
4701     FloatParts64 p;
4702
4703     float32_unpack_raw(&p, a);
4704     p.frac <<= float32_params.frac_shift;
4705     parts_silence_nan(&p, status);
4706     p.frac >>= float32_params.frac_shift;
4707     return float32_pack_raw(&p);
4708 }
4709
4710 float64 float64_silence_nan(float64 a, float_status *status)
4711 {
4712     FloatParts64 p;
4713
4714     float64_unpack_raw(&p, a);
4715     p.frac <<= float64_params.frac_shift;
4716     parts_silence_nan(&p, status);
4717     p.frac >>= float64_params.frac_shift;
4718     return float64_pack_raw(&p);
4719 }
4720
4721 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4722 {
4723     FloatParts64 p;
4724
4725     bfloat16_unpack_raw(&p, a);
4726     p.frac <<= bfloat16_params.frac_shift;
4727     parts_silence_nan(&p, status);
4728     p.frac >>= bfloat16_params.frac_shift;
4729     return bfloat16_pack_raw(&p);
4730 }
4731
4732 float128 float128_silence_nan(float128 a, float_status *status)
4733 {
4734     FloatParts128 p;
4735
4736     float128_unpack_raw(&p, a);
4737     frac_shl(&p, float128_params.frac_shift);
4738     parts_silence_nan(&p, status);
4739     frac_shr(&p, float128_params.frac_shift);
4740     return float128_pack_raw(&p);
4741 }
4742
4743 /*----------------------------------------------------------------------------
4744 | If `a' is denormal and we are in flush-to-zero mode then set the
4745 | input-denormal exception and return zero. Otherwise just return the value.
4746 *----------------------------------------------------------------------------*/
4747
4748 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4749 {
4750     if (p.exp == 0 && p.frac != 0) {
4751         float_raise(float_flag_input_denormal, status);
4752         return true;
4753     }
4754
4755     return false;
4756 }
4757
4758 float16 float16_squash_input_denormal(float16 a, float_status *status)
4759 {
4760     if (status->flush_inputs_to_zero) {
4761         FloatParts64 p;
4762
4763         float16_unpack_raw(&p, a);
4764         if (parts_squash_denormal(p, status)) {
4765             return float16_set_sign(float16_zero, p.sign);
4766         }
4767     }
4768     return a;
4769 }
4770
4771 float32 float32_squash_input_denormal(float32 a, float_status *status)
4772 {
4773     if (status->flush_inputs_to_zero) {
4774         FloatParts64 p;
4775
4776         float32_unpack_raw(&p, a);
4777         if (parts_squash_denormal(p, status)) {
4778             return float32_set_sign(float32_zero, p.sign);
4779         }
4780     }
4781     return a;
4782 }
4783
4784 float64 float64_squash_input_denormal(float64 a, float_status *status)
4785 {
4786     if (status->flush_inputs_to_zero) {
4787         FloatParts64 p;
4788
4789         float64_unpack_raw(&p, a);
4790         if (parts_squash_denormal(p, status)) {
4791             return float64_set_sign(float64_zero, p.sign);
4792         }
4793     }
4794     return a;
4795 }
4796
4797 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4798 {
4799     if (status->flush_inputs_to_zero) {
4800         FloatParts64 p;
4801
4802         bfloat16_unpack_raw(&p, a);
4803         if (parts_squash_denormal(p, status)) {
4804             return bfloat16_set_sign(bfloat16_zero, p.sign);
4805         }
4806     }
4807     return a;
4808 }
4809
4810 /*----------------------------------------------------------------------------
4811 | Normalizes the subnormal extended double-precision floating-point value
4812 | represented by the denormalized significand `aSig'.  The normalized exponent
4813 | and significand are stored at the locations pointed to by `zExpPtr' and
4814 | `zSigPtr', respectively.
4815 *----------------------------------------------------------------------------*/
4816
4817 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4818                                 uint64_t *zSigPtr)
4819 {
4820     int8_t shiftCount;
4821
4822     shiftCount = clz64(aSig);
4823     *zSigPtr = aSig<<shiftCount;
4824     *zExpPtr = 1 - shiftCount;
4825 }
4826
4827 /*----------------------------------------------------------------------------
4828 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4829 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4830 | and returns the proper extended double-precision floating-point value
4831 | corresponding to the abstract input.  Ordinarily, the abstract value is
4832 | rounded and packed into the extended double-precision format, with the
4833 | inexact exception raised if the abstract input cannot be represented
4834 | exactly.  However, if the abstract value is too large, the overflow and
4835 | inexact exceptions are raised and an infinity or maximal finite value is
4836 | returned.  If the abstract value is too small, the input value is rounded to
4837 | a subnormal number, and the underflow and inexact exceptions are raised if
4838 | the abstract input cannot be represented exactly as a subnormal extended
4839 | double-precision floating-point number.
4840 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4841 | the result is rounded to the same number of bits as single or double
4842 | precision, respectively.  Otherwise, the result is rounded to the full
4843 | precision of the extended double-precision format.
4844 |     The input significand must be normalized or smaller.  If the input
4845 | significand is not normalized, `zExp' must be 0; in that case, the result
4846 | returned is a subnormal number, and it must not require rounding.  The
4847 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4848 | Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4850
4851 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4852                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4853                               float_status *status)
4854 {
4855     FloatRoundMode roundingMode;
4856     bool roundNearestEven, increment, isTiny;
4857     int64_t roundIncrement, roundMask, roundBits;
4858
4859     roundingMode = status->float_rounding_mode;
4860     roundNearestEven = ( roundingMode == float_round_nearest_even );
4861     switch (roundingPrecision) {
4862     case floatx80_precision_x:
4863         goto precision80;
4864     case floatx80_precision_d:
4865         roundIncrement = UINT64_C(0x0000000000000400);
4866         roundMask = UINT64_C(0x00000000000007FF);
4867         break;
4868     case floatx80_precision_s:
4869         roundIncrement = UINT64_C(0x0000008000000000);
4870         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4871         break;
4872     default:
4873         g_assert_not_reached();
4874     }
4875     zSig0 |= ( zSig1 != 0 );
4876     switch (roundingMode) {
4877     case float_round_nearest_even:
4878     case float_round_ties_away:
4879         break;
4880     case float_round_to_zero:
4881         roundIncrement = 0;
4882         break;
4883     case float_round_up:
4884         roundIncrement = zSign ? 0 : roundMask;
4885         break;
4886     case float_round_down:
4887         roundIncrement = zSign ? roundMask : 0;
4888         break;
4889     default:
4890         abort();
4891     }
4892     roundBits = zSig0 & roundMask;
4893     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4894         if (    ( 0x7FFE < zExp )
4895              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4896            ) {
4897             goto overflow;
4898         }
4899         if ( zExp <= 0 ) {
4900             if (status->flush_to_zero) {
4901                 float_raise(float_flag_output_denormal, status);
4902                 return packFloatx80(zSign, 0, 0);
4903             }
4904             isTiny = status->tininess_before_rounding
4905                   || (zExp < 0 )
4906                   || (zSig0 <= zSig0 + roundIncrement);
4907             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4908             zExp = 0;
4909             roundBits = zSig0 & roundMask;
4910             if (isTiny && roundBits) {
4911                 float_raise(float_flag_underflow, status);
4912             }
4913             if (roundBits) {
4914                 float_raise(float_flag_inexact, status);
4915             }
4916             zSig0 += roundIncrement;
4917             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4918             roundIncrement = roundMask + 1;
4919             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4920                 roundMask |= roundIncrement;
4921             }
4922             zSig0 &= ~ roundMask;
4923             return packFloatx80( zSign, zExp, zSig0 );
4924         }
4925     }
4926     if (roundBits) {
4927         float_raise(float_flag_inexact, status);
4928     }
4929     zSig0 += roundIncrement;
4930     if ( zSig0 < roundIncrement ) {
4931         ++zExp;
4932         zSig0 = UINT64_C(0x8000000000000000);
4933     }
4934     roundIncrement = roundMask + 1;
4935     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4936         roundMask |= roundIncrement;
4937     }
4938     zSig0 &= ~ roundMask;
4939     if ( zSig0 == 0 ) zExp = 0;
4940     return packFloatx80( zSign, zExp, zSig0 );
4941  precision80:
4942     switch (roundingMode) {
4943     case float_round_nearest_even:
4944     case float_round_ties_away:
4945         increment = ((int64_t)zSig1 < 0);
4946         break;
4947     case float_round_to_zero:
4948         increment = 0;
4949         break;
4950     case float_round_up:
4951         increment = !zSign && zSig1;
4952         break;
4953     case float_round_down:
4954         increment = zSign && zSig1;
4955         break;
4956     default:
4957         abort();
4958     }
4959     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4960         if (    ( 0x7FFE < zExp )
4961              || (    ( zExp == 0x7FFE )
4962                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4963                   && increment
4964                 )
4965            ) {
4966             roundMask = 0;
4967  overflow:
4968             float_raise(float_flag_overflow | float_flag_inexact, status);
4969             if (    ( roundingMode == float_round_to_zero )
4970                  || ( zSign && ( roundingMode == float_round_up ) )
4971                  || ( ! zSign && ( roundingMode == float_round_down ) )
4972                ) {
4973                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4974             }
4975             return packFloatx80(zSign,
4976                                 floatx80_infinity_high,
4977                                 floatx80_infinity_low);
4978         }
4979         if ( zExp <= 0 ) {
4980             isTiny = status->tininess_before_rounding
4981                   || (zExp < 0)
4982                   || !increment
4983                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4984             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4985             zExp = 0;
4986             if (isTiny && zSig1) {
4987                 float_raise(float_flag_underflow, status);
4988             }
4989             if (zSig1) {
4990                 float_raise(float_flag_inexact, status);
4991             }
4992             switch (roundingMode) {
4993             case float_round_nearest_even:
4994             case float_round_ties_away:
4995                 increment = ((int64_t)zSig1 < 0);
4996                 break;
4997             case float_round_to_zero:
4998                 increment = 0;
4999                 break;
5000             case float_round_up:
5001                 increment = !zSign && zSig1;
5002                 break;
5003             case float_round_down:
5004                 increment = zSign && zSig1;
5005                 break;
5006             default:
5007                 abort();
5008             }
5009             if ( increment ) {
5010                 ++zSig0;
5011                 if (!(zSig1 << 1) && roundNearestEven) {
5012                     zSig0 &= ~1;
5013                 }
5014                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5015             }
5016             return packFloatx80( zSign, zExp, zSig0 );
5017         }
5018     }
5019     if (zSig1) {
5020         float_raise(float_flag_inexact, status);
5021     }
5022     if ( increment ) {
5023         ++zSig0;
5024         if ( zSig0 == 0 ) {
5025             ++zExp;
5026             zSig0 = UINT64_C(0x8000000000000000);
5027         }
5028         else {
5029             if (!(zSig1 << 1) && roundNearestEven) {
5030                 zSig0 &= ~1;
5031             }
5032         }
5033     }
5034     else {
5035         if ( zSig0 == 0 ) zExp = 0;
5036     }
5037     return packFloatx80( zSign, zExp, zSig0 );
5038
5039 }
5040
5041 /*----------------------------------------------------------------------------
5042 | Takes an abstract floating-point value having sign `zSign', exponent
5043 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5044 | and returns the proper extended double-precision floating-point value
5045 | corresponding to the abstract input.  This routine is just like
5046 | `roundAndPackFloatx80' except that the input significand does not have to be
5047 | normalized.
5048 *----------------------------------------------------------------------------*/
5049
5050 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5051                                        bool zSign, int32_t zExp,
5052                                        uint64_t zSig0, uint64_t zSig1,
5053                                        float_status *status)
5054 {
5055     int8_t shiftCount;
5056
5057     if ( zSig0 == 0 ) {
5058         zSig0 = zSig1;
5059         zSig1 = 0;
5060         zExp -= 64;
5061     }
5062     shiftCount = clz64(zSig0);
5063     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5064     zExp -= shiftCount;
5065     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5066                                 zSig0, zSig1, status);
5067
5068 }
5069
5070 /*----------------------------------------------------------------------------
5071 | Returns the binary exponential of the single-precision floating-point value
5072 | `a'. The operation is performed according to the IEC/IEEE Standard for
5073 | Binary Floating-Point Arithmetic.
5074 |
5075 | Uses the following identities:
5076 |
5077 | 1. -------------------------------------------------------------------------
5078 |      x    x*ln(2)
5079 |     2  = e
5080 |
5081 | 2. -------------------------------------------------------------------------
5082 |                      2     3     4     5           n
5083 |      x        x     x     x     x     x           x
5084 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5085 |               1!    2!    3!    4!    5!          n!
5086 *----------------------------------------------------------------------------*/
5087
5088 static const float64 float32_exp2_coefficients[15] =
5089 {
5090     const_float64( 0x3ff0000000000000ll ), /*  1 */
5091     const_float64( 0x3fe0000000000000ll ), /*  2 */
5092     const_float64( 0x3fc5555555555555ll ), /*  3 */
5093     const_float64( 0x3fa5555555555555ll ), /*  4 */
5094     const_float64( 0x3f81111111111111ll ), /*  5 */
5095     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5096     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5097     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5098     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5099     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5100     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5101     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5102     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5103     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5104     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5105 };
5106
5107 float32 float32_exp2(float32 a, float_status *status)
5108 {
5109     FloatParts64 xp, xnp, tp, rp;
5110     int i;
5111
5112     float32_unpack_canonical(&xp, a, status);
5113     if (unlikely(xp.cls != float_class_normal)) {
5114         switch (xp.cls) {
5115         case float_class_snan:
5116         case float_class_qnan:
5117             parts_return_nan(&xp, status);
5118             return float32_round_pack_canonical(&xp, status);
5119         case float_class_inf:
5120             return xp.sign ? float32_zero : a;
5121         case float_class_zero:
5122             return float32_one;
5123         default:
5124             break;
5125         }
5126         g_assert_not_reached();
5127     }
5128
5129     float_raise(float_flag_inexact, status);
5130
5131     float64_unpack_canonical(&tp, float64_ln2, status);
5132     xp = *parts_mul(&xp, &tp, status);
5133     xnp = xp;
5134
5135     float64_unpack_canonical(&rp, float64_one, status);
5136     for (i = 0 ; i < 15 ; i++) {
5137         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5138         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5139         xnp = *parts_mul(&xnp, &xp, status);
5140     }
5141
5142     return float32_round_pack_canonical(&rp, status);
5143 }
5144
5145 /*----------------------------------------------------------------------------
5146 | Rounds the extended double-precision floating-point value `a'
5147 | to the precision provided by floatx80_rounding_precision and returns the
5148 | result as an extended double-precision floating-point value.
5149 | The operation is performed according to the IEC/IEEE Standard for Binary
5150 | Floating-Point Arithmetic.
5151 *----------------------------------------------------------------------------*/
5152
5153 floatx80 floatx80_round(floatx80 a, float_status *status)
5154 {
5155     FloatParts128 p;
5156
5157     if (!floatx80_unpack_canonical(&p, a, status)) {
5158         return floatx80_default_nan(status);
5159     }
5160     return floatx80_round_pack_canonical(&p, status);
5161 }
5162
5163 static void __attribute__((constructor)) softfloat_init(void)
5164 {
5165     union_float64 ua, ub, uc, ur;
5166
5167     if (QEMU_NO_HARDFLOAT) {
5168         return;
5169     }
5170     /*
5171      * Test that the host's FMA is not obviously broken. For example,
5172      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5173      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5174      */
5175     ua.s = 0x0020000000000001ULL;
5176     ub.s = 0x3ca0000000000000ULL;
5177     uc.s = 0x0020000000000000ULL;
5178     ur.h = fma(ua.h, ub.h, uc.h);
5179     if (ur.s != 0x0020000000000001ULL) {
5180         force_soft_fma = true;
5181     }
5182 }