fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439     /*
 440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441      * operations.
 442      */
 443     minmax_isnumber = 8,
 444 };
 445
 446 /* Simple helpers for checking if, or what kind of, NaN we have */
 447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448 {
 449     return unlikely(c >= float_class_qnan);
 450 }
 451
 452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453 {
 454     return c == float_class_snan;
 455 }
 456
 457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458 {
 459     return c == float_class_qnan;
 460 }
 461
 462 /*
 463  * Structure holding all of the decomposed parts of a float.
 464  * The exponent is unbiased and the fraction is normalized.
 465  *
 466  * The fraction words are stored in big-endian word ordering,
 467  * so that truncation from a larger format to a smaller format
 468  * can be done simply by ignoring subsequent elements.
 469  */
 470
 471 typedef struct {
 472     FloatClass cls;
 473     bool sign;
 474     int32_t exp;
 475     union {
 476         /* Routines that know the structure may reference the singular name. */
 477         uint64_t frac;
 478         /*
 479          * Routines expanded with multiple structures reference "hi" and "lo"
 480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481          * both the same word and aliased here.
 482          */
 483         uint64_t frac_hi;
 484         uint64_t frac_lo;
 485     };
 486 } FloatParts64;
 487
 488 typedef struct {
 489     FloatClass cls;
 490     bool sign;
 491     int32_t exp;
 492     uint64_t frac_hi;
 493     uint64_t frac_lo;
 494 } FloatParts128;
 495
 496 typedef struct {
 497     FloatClass cls;
 498     bool sign;
 499     int32_t exp;
 500     uint64_t frac_hi;
 501     uint64_t frac_hm;  /* high-middle */
 502     uint64_t frac_lm;  /* low-middle */
 503     uint64_t frac_lo;
 504 } FloatParts256;
 505
 506 /* These apply to the most significant word of each FloatPartsN. */
 507 #define DECOMPOSED_BINARY_POINT    63
 508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510 /* Structure holding all of the relevant parameters for a format.
 511  *   exp_size: the size of the exponent field
 512  *   exp_bias: the offset applied to the exponent field
 513  *   exp_max: the maximum normalised exponent
 514  *   frac_size: the size of the fraction field
 515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516  * The following are computed based the size of fraction
 517  *   round_mask: bits below lsb which must be rounded
 518  * The following optional modifiers are available:
 519  *   arm_althp: handle ARM Alternative Half Precision
 520  *   m68k_denormal: explicit integer bit for extended precision may be 1
 521  */
 522 typedef struct {
 523     int exp_size;
 524     int exp_bias;
 525     int exp_re_bias;
 526     int exp_max;
 527     int frac_size;
 528     int frac_shift;
 529     bool arm_althp;
 530     bool m68k_denormal;
 531     uint64_t round_mask;
 532 } FloatFmt;
 533
 534 /* Expand fields based on the size of exponent and fraction */
 535 #define FLOAT_PARAMS_(E)                                \
 536     .exp_size       = E,                                \
 537     .exp_bias       = ((1 << E) - 1) >> 1,              \
 538     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
 539     .exp_max        = (1 << E) - 1
 540
 541 #define FLOAT_PARAMS(E, F)                              \
 542     FLOAT_PARAMS_(E),                                   \
 543     .frac_size      = F,                                \
 544     .frac_shift     = (-F - 1) & 63,                    \
 545     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 546
 547 static const FloatFmt float16_params = {
 548     FLOAT_PARAMS(5, 10)
 549 };
 550
 551 static const FloatFmt float16_params_ahp = {
 552     FLOAT_PARAMS(5, 10),
 553     .arm_althp = true
 554 };
 555
 556 static const FloatFmt bfloat16_params = {
 557     FLOAT_PARAMS(8, 7)
 558 };
 559
 560 static const FloatFmt float32_params = {
 561     FLOAT_PARAMS(8, 23)
 562 };
 563
 564 static const FloatFmt float64_params = {
 565     FLOAT_PARAMS(11, 52)
 566 };
 567
 568 static const FloatFmt float128_params = {
 569     FLOAT_PARAMS(15, 112)
 570 };
 571
 572 #define FLOATX80_PARAMS(R)              \
 573     FLOAT_PARAMS_(15),                  \
 574     .frac_size = R == 64 ? 63 : R,      \
 575     .frac_shift = 0,                    \
 576     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 577
 578 static const FloatFmt floatx80_params[3] = {
 579     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 580     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 581     [floatx80_precision_x] = {
 582         FLOATX80_PARAMS(64),
 583 #ifdef TARGET_M68K
 584         .m68k_denormal = true,
 585 #endif
 586     },
 587 };
 588
 589 /* Unpack a float to parts, but do not canonicalize.  */
 590 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 591 {
 592     const int f_size = fmt->frac_size;
 593     const int e_size = fmt->exp_size;
 594
 595     *r = (FloatParts64) {
 596         .cls = float_class_unclassified,
 597         .sign = extract64(raw, f_size + e_size, 1),
 598         .exp = extract64(raw, f_size, e_size),
 599         .frac = extract64(raw, 0, f_size)
 600     };
 601 }
 602
 603 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
 604 {
 605     unpack_raw64(p, &float16_params, f);
 606 }
 607
 608 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 609 {
 610     unpack_raw64(p, &bfloat16_params, f);
 611 }
 612
 613 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
 614 {
 615     unpack_raw64(p, &float32_params, f);
 616 }
 617
 618 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
 619 {
 620     unpack_raw64(p, &float64_params, f);
 621 }
 622
 623 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 624 {
 625     *p = (FloatParts128) {
 626         .cls = float_class_unclassified,
 627         .sign = extract32(f.high, 15, 1),
 628         .exp = extract32(f.high, 0, 15),
 629         .frac_hi = f.low
 630     };
 631 }
 632
 633 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
 634 {
 635     const int f_size = float128_params.frac_size - 64;
 636     const int e_size = float128_params.exp_size;
 637
 638     *p = (FloatParts128) {
 639         .cls = float_class_unclassified,
 640         .sign = extract64(f.high, f_size + e_size, 1),
 641         .exp = extract64(f.high, f_size, e_size),
 642         .frac_hi = extract64(f.high, 0, f_size),
 643         .frac_lo = f.low,
 644     };
 645 }
 646
 647 /* Pack a float from parts, but do not canonicalize.  */
 648 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 649 {
 650     const int f_size = fmt->frac_size;
 651     const int e_size = fmt->exp_size;
 652     uint64_t ret;
 653
 654     ret = (uint64_t)p->sign << (f_size + e_size);
 655     ret = deposit64(ret, f_size, e_size, p->exp);
 656     ret = deposit64(ret, 0, f_size, p->frac);
 657     return ret;
 658 }
 659
 660 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
 661 {
 662     return make_float16(pack_raw64(p, &float16_params));
 663 }
 664
 665 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
 666 {
 667     return pack_raw64(p, &bfloat16_params);
 668 }
 669
 670 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
 671 {
 672     return make_float32(pack_raw64(p, &float32_params));
 673 }
 674
 675 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
 676 {
 677     return make_float64(pack_raw64(p, &float64_params));
 678 }
 679
 680 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
 681 {
 682     const int f_size = float128_params.frac_size - 64;
 683     const int e_size = float128_params.exp_size;
 684     uint64_t hi;
 685
 686     hi = (uint64_t)p->sign << (f_size + e_size);
 687     hi = deposit64(hi, f_size, e_size, p->exp);
 688     hi = deposit64(hi, 0, f_size, p->frac_hi);
 689     return make_float128(hi, p->frac_lo);
 690 }
 691
 692 /*----------------------------------------------------------------------------
 693 | Functions and definitions to determine:  (1) whether tininess for underflow
 694 | is detected before or after rounding by default, (2) what (if anything)
 695 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 696 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 697 | are propagated from function inputs to output.  These details are target-
 698 | specific.
 699 *----------------------------------------------------------------------------*/
 700 #include "softfloat-specialize.c.inc"
 701
 702 #define PARTS_GENERIC_64_128(NAME, P) \
 703     _Generic((P), FloatParts64 *: parts64_##NAME, \
 704                   FloatParts128 *: parts128_##NAME)
 705
 706 #define PARTS_GENERIC_64_128_256(NAME, P) \
 707     _Generic((P), FloatParts64 *: parts64_##NAME, \
 708                   FloatParts128 *: parts128_##NAME, \
 709                   FloatParts256 *: parts256_##NAME)
 710
 711 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 712 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 713
 714 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 715 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 716
 717 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 718
 719 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 720                                       float_status *s);
 721 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 722                                         float_status *s);
 723
 724 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 725
 726 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 727                                              FloatParts64 *c, float_status *s,
 728                                              int ab_mask, int abc_mask);
 729 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 730                                                FloatParts128 *b,
 731                                                FloatParts128 *c,
 732                                                float_status *s,
 733                                                int ab_mask, int abc_mask);
 734
 735 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 736     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 737
 738 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 739                                  const FloatFmt *fmt);
 740 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 741                                   const FloatFmt *fmt);
 742
 743 #define parts_canonicalize(A, S, F) \
 744     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 745
 746 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 747                                    const FloatFmt *fmt);
 748 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 749                                     const FloatFmt *fmt);
 750
 751 #define parts_uncanon_normal(A, S, F) \
 752     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 753
 754 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 755                             const FloatFmt *fmt);
 756 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 757                              const FloatFmt *fmt);
 758
 759 #define parts_uncanon(A, S, F) \
 760     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 761
 762 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 763 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 764 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 765
 766 #define parts_add_normal(A, B) \
 767     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 768
 769 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 770 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 771 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 772
 773 #define parts_sub_normal(A, B) \
 774     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 775
 776 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 777                                     float_status *s, bool subtract);
 778 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 779                                       float_status *s, bool subtract);
 780
 781 #define parts_addsub(A, B, S, Z) \
 782     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 783
 784 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 785                                  float_status *s);
 786 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 787                                    float_status *s);
 788
 789 #define parts_mul(A, B, S) \
 790     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 791
 792 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 793                                     FloatParts64 *c, int flags,
 794                                     float_status *s);
 795 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 796                                       FloatParts128 *c, int flags,
 797                                       float_status *s);
 798
 799 #define parts_muladd(A, B, C, Z, S) \
 800     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 801
 802 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 803                                  float_status *s);
 804 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 805                                    float_status *s);
 806
 807 #define parts_div(A, B, S) \
 808     PARTS_GENERIC_64_128(div, A)(A, B, S)
 809
 810 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 811                                     uint64_t *mod_quot, float_status *s);
 812 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 813                                       uint64_t *mod_quot, float_status *s);
 814
 815 #define parts_modrem(A, B, Q, S) \
 816     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 817
 818 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 819 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 820
 821 #define parts_sqrt(A, S, F) \
 822     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 823
 824 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 825                                         int scale, int frac_size);
 826 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 827                                          int scale, int frac_size);
 828
 829 #define parts_round_to_int_normal(A, R, C, F) \
 830     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 831
 832 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 833                                  int scale, float_status *s,
 834                                  const FloatFmt *fmt);
 835 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 836                                   int scale, float_status *s,
 837                                   const FloatFmt *fmt);
 838
 839 #define parts_round_to_int(A, R, C, S, F) \
 840     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 841
 842 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 843                                      int scale, int64_t min, int64_t max,
 844                                      float_status *s);
 845 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 846                                      int scale, int64_t min, int64_t max,
 847                                      float_status *s);
 848
 849 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 850     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 851
 852 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 853                                       int scale, uint64_t max,
 854                                       float_status *s);
 855 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 856                                        int scale, uint64_t max,
 857                                        float_status *s);
 858
 859 #define parts_float_to_uint(P, R, Z, M, S) \
 860     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 861
 862 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
 863                                             FloatRoundMode rmode,
 864                                             int bitsm1, float_status *s);
 865 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
 866                                              FloatRoundMode rmode,
 867                                              int bitsm1, float_status *s);
 868
 869 #define parts_float_to_sint_modulo(P, R, M, S) \
 870     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
 871
 872 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 873                                   int scale, float_status *s);
 874 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 875                                    int scale, float_status *s);
 876
 877 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 878     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 879
 880 #define parts_sint_to_float(P, I, Z, S) \
 881     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 882
 883 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 884                                   int scale, float_status *s);
 885 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 886                                    int scale, float_status *s);
 887
 888 #define parts_uint_to_float(P, I, Z, S) \
 889     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 890
 891 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 892                                     float_status *s, int flags);
 893 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 894                                       float_status *s, int flags);
 895
 896 #define parts_minmax(A, B, S, F) \
 897     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 898
 899 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
 900                                      float_status *s, bool q);
 901 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
 902                                       float_status *s, bool q);
 903
 904 #define parts_compare(A, B, S, Q) \
 905     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 906
 907 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 908 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 909
 910 #define parts_scalbn(A, N, S) \
 911     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 912
 913 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 914 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 915
 916 #define parts_log2(A, S, F) \
 917     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 918
 919 /*
 920  * Helper functions for softfloat-parts.c.inc, per-size operations.
 921  */
 922
 923 #define FRAC_GENERIC_64_128(NAME, P) \
 924     _Generic((P), FloatParts64 *: frac64_##NAME, \
 925                   FloatParts128 *: frac128_##NAME)
 926
 927 #define FRAC_GENERIC_64_128_256(NAME, P) \
 928     _Generic((P), FloatParts64 *: frac64_##NAME, \
 929                   FloatParts128 *: frac128_##NAME, \
 930                   FloatParts256 *: frac256_##NAME)
 931
 932 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 933 {
 934     return uadd64_overflow(a->frac, b->frac, &r->frac);
 935 }
 936
 937 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 938 {
 939     bool c = 0;
 940     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 941     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 942     return c;
 943 }
 944
 945 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 946 {
 947     bool c = 0;
 948     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 949     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 950     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 951     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 952     return c;
 953 }
 954
 955 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 956
 957 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 958 {
 959     return uadd64_overflow(a->frac, c, &r->frac);
 960 }
 961
 962 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 963 {
 964     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 965     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 966 }
 967
 968 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 969
 970 static void frac64_allones(FloatParts64 *a)
 971 {
 972     a->frac = -1;
 973 }
 974
 975 static void frac128_allones(FloatParts128 *a)
 976 {
 977     a->frac_hi = a->frac_lo = -1;
 978 }
 979
 980 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 981
 982 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 983 {
 984     return (a->frac == b->frac ? float_relation_equal
 985             : a->frac < b->frac ? float_relation_less
 986             : float_relation_greater);
 987 }
 988
 989 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 990 {
 991     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 992     if (ta == tb) {
 993         ta = a->frac_lo, tb = b->frac_lo;
 994         if (ta == tb) {
 995             return float_relation_equal;
 996         }
 997     }
 998     return ta < tb ? float_relation_less : float_relation_greater;
 999 }
1000
1001 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1002
1003 static void frac64_clear(FloatParts64 *a)
1004 {
1005     a->frac = 0;
1006 }
1007
1008 static void frac128_clear(FloatParts128 *a)
1009 {
1010     a->frac_hi = a->frac_lo = 0;
1011 }
1012
1013 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1014
1015 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016 {
1017     uint64_t n1, n0, r, q;
1018     bool ret;
1019
1020     /*
1021      * We want a 2*N / N-bit division to produce exactly an N-bit
1022      * result, so that we do not lose any precision and so that we
1023      * do not have to renormalize afterward.  If A.frac < B.frac,
1024      * then division would produce an (N-1)-bit result; shift A left
1025      * by one to produce the an N-bit result, and return true to
1026      * decrement the exponent to match.
1027      *
1028      * The udiv_qrnnd algorithm that we're using requires normalization,
1029      * i.e. the msb of the denominator must be set, which is already true.
1030      */
1031     ret = a->frac < b->frac;
1032     if (ret) {
1033         n0 = a->frac;
1034         n1 = 0;
1035     } else {
1036         n0 = a->frac >> 1;
1037         n1 = a->frac << 63;
1038     }
1039     q = udiv_qrnnd(&r, n0, n1, b->frac);
1040
1041     /* Set lsb if there is a remainder, to set inexact. */
1042     a->frac = q | (r != 0);
1043
1044     return ret;
1045 }
1046
1047 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048 {
1049     uint64_t q0, q1, a0, a1, b0, b1;
1050     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051     bool ret = false;
1052
1053     a0 = a->frac_hi, a1 = a->frac_lo;
1054     b0 = b->frac_hi, b1 = b->frac_lo;
1055
1056     ret = lt128(a0, a1, b0, b1);
1057     if (!ret) {
1058         a1 = shr_double(a0, a1, 1);
1059         a0 = a0 >> 1;
1060     }
1061
1062     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063     q0 = estimateDiv128To64(a0, a1, b0);
1064
1065     /*
1066      * Estimate is high because B1 was not included (unless B1 == 0).
1067      * Reduce quotient and increase remainder until remainder is non-negative.
1068      * This loop will execute 0 to 2 times.
1069      */
1070     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072     while (r0 != 0) {
1073         q0--;
1074         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075     }
1076
1077     /* Repeat using the remainder, producing a second word of quotient. */
1078     q1 = estimateDiv128To64(r1, r2, b0);
1079     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081     while (r1 != 0) {
1082         q1--;
1083         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084     }
1085
1086     /* Any remainder indicates inexact; set sticky bit. */
1087     q1 |= (r2 | r3) != 0;
1088
1089     a->frac_hi = q0;
1090     a->frac_lo = q1;
1091     return ret;
1092 }
1093
1094 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1095
1096 static bool frac64_eqz(FloatParts64 *a)
1097 {
1098     return a->frac == 0;
1099 }
1100
1101 static bool frac128_eqz(FloatParts128 *a)
1102 {
1103     return (a->frac_hi | a->frac_lo) == 0;
1104 }
1105
1106 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1107
1108 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109 {
1110     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111 }
1112
1113 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114 {
1115     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117 }
1118
1119 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120
1121 static void frac64_neg(FloatParts64 *a)
1122 {
1123     a->frac = -a->frac;
1124 }
1125
1126 static void frac128_neg(FloatParts128 *a)
1127 {
1128     bool c = 0;
1129     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131 }
1132
1133 static void frac256_neg(FloatParts256 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140 }
1141
1142 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1143
1144 static int frac64_normalize(FloatParts64 *a)
1145 {
1146     if (a->frac) {
1147         int shift = clz64(a->frac);
1148         a->frac <<= shift;
1149         return shift;
1150     }
1151     return 64;
1152 }
1153
1154 static int frac128_normalize(FloatParts128 *a)
1155 {
1156     if (a->frac_hi) {
1157         int shl = clz64(a->frac_hi);
1158         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159         a->frac_lo <<= shl;
1160         return shl;
1161     } else if (a->frac_lo) {
1162         int shl = clz64(a->frac_lo);
1163         a->frac_hi = a->frac_lo << shl;
1164         a->frac_lo = 0;
1165         return shl + 64;
1166     }
1167     return 128;
1168 }
1169
1170 static int frac256_normalize(FloatParts256 *a)
1171 {
1172     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174     int ret, shl;
1175
1176     if (likely(a0)) {
1177         shl = clz64(a0);
1178         if (shl == 0) {
1179             return 0;
1180         }
1181         ret = shl;
1182     } else {
1183         if (a1) {
1184             ret = 64;
1185             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186         } else if (a2) {
1187             ret = 128;
1188             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189         } else if (a3) {
1190             ret = 192;
1191             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192         } else {
1193             ret = 256;
1194             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195             goto done;
1196         }
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             goto done;
1200         }
1201         ret += shl;
1202     }
1203
1204     a0 = shl_double(a0, a1, shl);
1205     a1 = shl_double(a1, a2, shl);
1206     a2 = shl_double(a2, a3, shl);
1207     a3 <<= shl;
1208
1209  done:
1210     a->frac_hi = a0;
1211     a->frac_hm = a1;
1212     a->frac_lm = a2;
1213     a->frac_lo = a3;
1214     return ret;
1215 }
1216
1217 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1218
1219 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220 {
1221     uint64_t a0, a1, b0, t0, t1, q, quot;
1222     int exp_diff = a->exp - b->exp;
1223     int shift;
1224
1225     a0 = a->frac;
1226     a1 = 0;
1227
1228     if (exp_diff < -1) {
1229         if (mod_quot) {
1230             *mod_quot = 0;
1231         }
1232         return;
1233     }
1234     if (exp_diff == -1) {
1235         a0 >>= 1;
1236         exp_diff = 0;
1237     }
1238
1239     b0 = b->frac;
1240     quot = q = b0 <= a0;
1241     if (q) {
1242         a0 -= b0;
1243     }
1244
1245     exp_diff -= 64;
1246     while (exp_diff > 0) {
1247         q = estimateDiv128To64(a0, a1, b0);
1248         q = q > 2 ? q - 2 : 0;
1249         mul64To128(b0, q, &t0, &t1);
1250         sub128(a0, a1, t0, t1, &a0, &a1);
1251         shortShift128Left(a0, a1, 62, &a0, &a1);
1252         exp_diff -= 62;
1253         quot = (quot << 62) + q;
1254     }
1255
1256     exp_diff += 64;
1257     if (exp_diff > 0) {
1258         q = estimateDiv128To64(a0, a1, b0);
1259         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261         sub128(a0, a1, t0, t1, &a0, &a1);
1262         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263         while (le128(t0, t1, a0, a1)) {
1264             ++q;
1265             sub128(a0, a1, t0, t1, &a0, &a1);
1266         }
1267         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268     } else {
1269         t0 = b0;
1270         t1 = 0;
1271     }
1272
1273     if (mod_quot) {
1274         *mod_quot = quot;
1275     } else {
1276         sub128(t0, t1, a0, a1, &t0, &t1);
1277         if (lt128(t0, t1, a0, a1) ||
1278             (eq128(t0, t1, a0, a1) && (q & 1))) {
1279             a0 = t0;
1280             a1 = t1;
1281             a->sign = !a->sign;
1282         }
1283     }
1284
1285     if (likely(a0)) {
1286         shift = clz64(a0);
1287         shortShift128Left(a0, a1, shift, &a0, &a1);
1288     } else if (likely(a1)) {
1289         shift = clz64(a1);
1290         a0 = a1 << shift;
1291         a1 = 0;
1292         shift += 64;
1293     } else {
1294         a->cls = float_class_zero;
1295         return;
1296     }
1297
1298     a->exp = b->exp + exp_diff - shift;
1299     a->frac = a0 | (a1 != 0);
1300 }
1301
1302 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303                            uint64_t *mod_quot)
1304 {
1305     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306     int exp_diff = a->exp - b->exp;
1307     int shift;
1308
1309     a0 = a->frac_hi;
1310     a1 = a->frac_lo;
1311     a2 = 0;
1312
1313     if (exp_diff < -1) {
1314         if (mod_quot) {
1315             *mod_quot = 0;
1316         }
1317         return;
1318     }
1319     if (exp_diff == -1) {
1320         shift128Right(a0, a1, 1, &a0, &a1);
1321         exp_diff = 0;
1322     }
1323
1324     b0 = b->frac_hi;
1325     b1 = b->frac_lo;
1326
1327     quot = q = le128(b0, b1, a0, a1);
1328     if (q) {
1329         sub128(a0, a1, b0, b1, &a0, &a1);
1330     }
1331
1332     exp_diff -= 64;
1333     while (exp_diff > 0) {
1334         q = estimateDiv128To64(a0, a1, b0);
1335         q = q > 4 ? q - 4 : 0;
1336         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339         exp_diff -= 61;
1340         quot = (quot << 61) + q;
1341     }
1342
1343     exp_diff += 64;
1344     if (exp_diff > 0) {
1345         q = estimateDiv128To64(a0, a1, b0);
1346         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350         while (le192(t0, t1, t2, a0, a1, a2)) {
1351             ++q;
1352             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353         }
1354         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355     } else {
1356         t0 = b0;
1357         t1 = b1;
1358         t2 = 0;
1359     }
1360
1361     if (mod_quot) {
1362         *mod_quot = quot;
1363     } else {
1364         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365         if (lt192(t0, t1, t2, a0, a1, a2) ||
1366             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367             a0 = t0;
1368             a1 = t1;
1369             a2 = t2;
1370             a->sign = !a->sign;
1371         }
1372     }
1373
1374     if (likely(a0)) {
1375         shift = clz64(a0);
1376         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377     } else if (likely(a1)) {
1378         shift = clz64(a1);
1379         shortShift128Left(a1, a2, shift, &a0, &a1);
1380         a2 = 0;
1381         shift += 64;
1382     } else if (likely(a2)) {
1383         shift = clz64(a2);
1384         a0 = a2 << shift;
1385         a1 = a2 = 0;
1386         shift += 128;
1387     } else {
1388         a->cls = float_class_zero;
1389         return;
1390     }
1391
1392     a->exp = b->exp + exp_diff - shift;
1393     a->frac_hi = a0;
1394     a->frac_lo = a1 | (a2 != 0);
1395 }
1396
1397 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398
1399 static void frac64_shl(FloatParts64 *a, int c)
1400 {
1401     a->frac <<= c;
1402 }
1403
1404 static void frac128_shl(FloatParts128 *a, int c)
1405 {
1406     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407
1408     if (c & 64) {
1409         a0 = a1, a1 = 0;
1410     }
1411
1412     c &= 63;
1413     if (c) {
1414         a0 = shl_double(a0, a1, c);
1415         a1 = a1 << c;
1416     }
1417
1418     a->frac_hi = a0;
1419     a->frac_lo = a1;
1420 }
1421
1422 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1423
1424 static void frac64_shr(FloatParts64 *a, int c)
1425 {
1426     a->frac >>= c;
1427 }
1428
1429 static void frac128_shr(FloatParts128 *a, int c)
1430 {
1431     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432
1433     if (c & 64) {
1434         a1 = a0, a0 = 0;
1435     }
1436
1437     c &= 63;
1438     if (c) {
1439         a1 = shr_double(a0, a1, c);
1440         a0 = a0 >> c;
1441     }
1442
1443     a->frac_hi = a0;
1444     a->frac_lo = a1;
1445 }
1446
1447 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1448
1449 static void frac64_shrjam(FloatParts64 *a, int c)
1450 {
1451     uint64_t a0 = a->frac;
1452
1453     if (likely(c != 0)) {
1454         if (likely(c < 64)) {
1455             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456         } else {
1457             a0 = a0 != 0;
1458         }
1459         a->frac = a0;
1460     }
1461 }
1462
1463 static void frac128_shrjam(FloatParts128 *a, int c)
1464 {
1465     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466     uint64_t sticky = 0;
1467
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 128)) {
1473         sticky = a1;
1474         a1 = a0;
1475         a0 = 0;
1476         c &= 63;
1477         if (c == 0) {
1478             goto done;
1479         }
1480     } else {
1481         sticky = a0 | a1;
1482         a0 = a1 = 0;
1483         goto done;
1484     }
1485
1486     sticky |= shr_double(a1, 0, c);
1487     a1 = shr_double(a0, a1, c);
1488     a0 = a0 >> c;
1489
1490  done:
1491     a->frac_lo = a1 | (sticky != 0);
1492     a->frac_hi = a0;
1493 }
1494
1495 static void frac256_shrjam(FloatParts256 *a, int c)
1496 {
1497     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499     uint64_t sticky = 0;
1500
1501     if (unlikely(c == 0)) {
1502         return;
1503     } else if (likely(c < 64)) {
1504         /* nothing */
1505     } else if (likely(c < 256)) {
1506         if (unlikely(c & 128)) {
1507             sticky |= a2 | a3;
1508             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509         }
1510         if (unlikely(c & 64)) {
1511             sticky |= a3;
1512             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513         }
1514         c &= 63;
1515         if (c == 0) {
1516             goto done;
1517         }
1518     } else {
1519         sticky = a0 | a1 | a2 | a3;
1520         a0 = a1 = a2 = a3 = 0;
1521         goto done;
1522     }
1523
1524     sticky |= shr_double(a3, 0, c);
1525     a3 = shr_double(a2, a3, c);
1526     a2 = shr_double(a1, a2, c);
1527     a1 = shr_double(a0, a1, c);
1528     a0 = a0 >> c;
1529
1530  done:
1531     a->frac_lo = a3 | (sticky != 0);
1532     a->frac_lm = a2;
1533     a->frac_hm = a1;
1534     a->frac_hi = a0;
1535 }
1536
1537 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1538
1539 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540 {
1541     return usub64_overflow(a->frac, b->frac, &r->frac);
1542 }
1543
1544 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545 {
1546     bool c = 0;
1547     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549     return c;
1550 }
1551
1552 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553 {
1554     bool c = 0;
1555     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559     return c;
1560 }
1561
1562 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1563
1564 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565 {
1566     r->frac = a->frac_hi | (a->frac_lo != 0);
1567 }
1568
1569 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570 {
1571     r->frac_hi = a->frac_hi;
1572     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573 }
1574
1575 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576
1577 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578 {
1579     r->frac_hi = a->frac;
1580     r->frac_lo = 0;
1581 }
1582
1583 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_hm = a->frac_lo;
1587     r->frac_lm = 0;
1588     r->frac_lo = 0;
1589 }
1590
1591 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1592
1593 /*
1594  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1595  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596  * and thus MIT licenced.
1597  */
1598 static const uint16_t rsqrt_tab[128] = {
1599     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615 };
1616
1617 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1618 #define FloatPartsN    glue(FloatParts,N)
1619 #define FloatPartsW    glue(FloatParts,W)
1620
1621 #define N 64
1622 #define W 128
1623
1624 #include "softfloat-parts-addsub.c.inc"
1625 #include "softfloat-parts.c.inc"
1626
1627 #undef  N
1628 #undef  W
1629 #define N 128
1630 #define W 256
1631
1632 #include "softfloat-parts-addsub.c.inc"
1633 #include "softfloat-parts.c.inc"
1634
1635 #undef  N
1636 #undef  W
1637 #define N            256
1638
1639 #include "softfloat-parts-addsub.c.inc"
1640
1641 #undef  N
1642 #undef  W
1643 #undef  partsN
1644 #undef  FloatPartsN
1645 #undef  FloatPartsW
1646
1647 /*
1648  * Pack/unpack routines with a specific FloatFmt.
1649  */
1650
1651 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652                                       float_status *s, const FloatFmt *params)
1653 {
1654     float16_unpack_raw(p, f);
1655     parts_canonicalize(p, s, params);
1656 }
1657
1658 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659                                      float_status *s)
1660 {
1661     float16a_unpack_canonical(p, f, s, &float16_params);
1662 }
1663
1664 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665                                       float_status *s)
1666 {
1667     bfloat16_unpack_raw(p, f);
1668     parts_canonicalize(p, s, &bfloat16_params);
1669 }
1670
1671 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672                                              float_status *s,
1673                                              const FloatFmt *params)
1674 {
1675     parts_uncanon(p, s, params);
1676     return float16_pack_raw(p);
1677 }
1678
1679 static float16 float16_round_pack_canonical(FloatParts64 *p,
1680                                             float_status *s)
1681 {
1682     return float16a_round_pack_canonical(p, s, &float16_params);
1683 }
1684
1685 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686                                               float_status *s)
1687 {
1688     parts_uncanon(p, s, &bfloat16_params);
1689     return bfloat16_pack_raw(p);
1690 }
1691
1692 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693                                      float_status *s)
1694 {
1695     float32_unpack_raw(p, f);
1696     parts_canonicalize(p, s, &float32_params);
1697 }
1698
1699 static float32 float32_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     parts_uncanon(p, s, &float32_params);
1703     return float32_pack_raw(p);
1704 }
1705
1706 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707                                      float_status *s)
1708 {
1709     float64_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float64_params);
1711 }
1712
1713 static float64 float64_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float64_params);
1717     return float64_pack_raw(p);
1718 }
1719
1720 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721                                                float_status *s)
1722 {
1723     parts_uncanon(p, s, &float32_params);
1724
1725     /*
1726      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727      * We need to adjust the fraction higher so that the least N bits are
1728      * zero, and the fraction is adjacent to the float64 implicit bit.
1729      */
1730     switch (p->cls) {
1731     case float_class_normal:
1732         if (unlikely(p->exp == 0)) {
1733             /*
1734              * The result is denormal for float32, but can be represented
1735              * in normalized form for float64.  Adjust, per canonicalize.
1736              */
1737             int shift = frac_normalize(p);
1738             p->exp = (float32_params.frac_shift -
1739                       float32_params.exp_bias - shift + 1 +
1740                       float64_params.exp_bias);
1741             frac_shr(p, float64_params.frac_shift);
1742         } else {
1743             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745         }
1746         break;
1747     case float_class_snan:
1748     case float_class_qnan:
1749         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750         p->exp = float64_params.exp_max;
1751         break;
1752     case float_class_inf:
1753         p->exp = float64_params.exp_max;
1754         break;
1755     case float_class_zero:
1756         break;
1757     default:
1758         g_assert_not_reached();
1759     }
1760
1761     return float64_pack_raw(p);
1762 }
1763
1764 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765                                       float_status *s)
1766 {
1767     float128_unpack_raw(p, f);
1768     parts_canonicalize(p, s, &float128_params);
1769 }
1770
1771 static float128 float128_round_pack_canonical(FloatParts128 *p,
1772                                               float_status *s)
1773 {
1774     parts_uncanon(p, s, &float128_params);
1775     return float128_pack_raw(p);
1776 }
1777
1778 /* Returns false if the encoding is invalid. */
1779 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780                                       float_status *s)
1781 {
1782     /* Ensure rounding precision is set before beginning. */
1783     switch (s->floatx80_rounding_precision) {
1784     case floatx80_precision_x:
1785     case floatx80_precision_d:
1786     case floatx80_precision_s:
1787         break;
1788     default:
1789         g_assert_not_reached();
1790     }
1791
1792     if (unlikely(floatx80_invalid_encoding(f))) {
1793         float_raise(float_flag_invalid, s);
1794         return false;
1795     }
1796
1797     floatx80_unpack_raw(p, f);
1798
1799     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801     } else {
1802         /* The explicit integer bit is ignored, after invalid checks. */
1803         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804         p->cls = (p->frac_hi == 0 ? float_class_inf
1805                   : parts_is_snan_frac(p->frac_hi, s)
1806                   ? float_class_snan : float_class_qnan);
1807     }
1808     return true;
1809 }
1810
1811 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812                                               float_status *s)
1813 {
1814     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815     uint64_t frac;
1816     int exp;
1817
1818     switch (p->cls) {
1819     case float_class_normal:
1820         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821             parts_uncanon_normal(p, s, fmt);
1822             frac = p->frac_hi;
1823             exp = p->exp;
1824         } else {
1825             FloatParts64 p64;
1826
1827             p64.sign = p->sign;
1828             p64.exp = p->exp;
1829             frac_truncjam(&p64, p);
1830             parts_uncanon_normal(&p64, s, fmt);
1831             frac = p64.frac;
1832             exp = p64.exp;
1833         }
1834         if (exp != fmt->exp_max) {
1835             break;
1836         }
1837         /* rounded to inf -- fall through to set frac correctly */
1838
1839     case float_class_inf:
1840         /* x86 and m68k differ in the setting of the integer bit. */
1841         frac = floatx80_infinity_low;
1842         exp = fmt->exp_max;
1843         break;
1844
1845     case float_class_zero:
1846         frac = 0;
1847         exp = 0;
1848         break;
1849
1850     case float_class_snan:
1851     case float_class_qnan:
1852         /* NaNs have the integer bit set. */
1853         frac = p->frac_hi | (1ull << 63);
1854         exp = fmt->exp_max;
1855         break;
1856
1857     default:
1858         g_assert_not_reached();
1859     }
1860
1861     return packFloatx80(p->sign, exp, frac);
1862 }
1863
1864 /*
1865  * Addition and subtraction
1866  */
1867
1868 static float16 QEMU_FLATTEN
1869 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1870 {
1871     FloatParts64 pa, pb, *pr;
1872
1873     float16_unpack_canonical(&pa, a, status);
1874     float16_unpack_canonical(&pb, b, status);
1875     pr = parts_addsub(&pa, &pb, status, subtract);
1876
1877     return float16_round_pack_canonical(pr, status);
1878 }
1879
1880 float16 float16_add(float16 a, float16 b, float_status *status)
1881 {
1882     return float16_addsub(a, b, status, false);
1883 }
1884
1885 float16 float16_sub(float16 a, float16 b, float_status *status)
1886 {
1887     return float16_addsub(a, b, status, true);
1888 }
1889
1890 static float32 QEMU_SOFTFLOAT_ATTR
1891 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894
1895     float32_unpack_canonical(&pa, a, status);
1896     float32_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898
1899     return float32_round_pack_canonical(pr, status);
1900 }
1901
1902 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1903 {
1904     return soft_f32_addsub(a, b, status, false);
1905 }
1906
1907 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1908 {
1909     return soft_f32_addsub(a, b, status, true);
1910 }
1911
1912 static float64 QEMU_SOFTFLOAT_ATTR
1913 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1914 {
1915     FloatParts64 pa, pb, *pr;
1916
1917     float64_unpack_canonical(&pa, a, status);
1918     float64_unpack_canonical(&pb, b, status);
1919     pr = parts_addsub(&pa, &pb, status, subtract);
1920
1921     return float64_round_pack_canonical(pr, status);
1922 }
1923
1924 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1925 {
1926     return soft_f64_addsub(a, b, status, false);
1927 }
1928
1929 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1930 {
1931     return soft_f64_addsub(a, b, status, true);
1932 }
1933
1934 static float hard_f32_add(float a, float b)
1935 {
1936     return a + b;
1937 }
1938
1939 static float hard_f32_sub(float a, float b)
1940 {
1941     return a - b;
1942 }
1943
1944 static double hard_f64_add(double a, double b)
1945 {
1946     return a + b;
1947 }
1948
1949 static double hard_f64_sub(double a, double b)
1950 {
1951     return a - b;
1952 }
1953
1954 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1955 {
1956     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958     }
1959     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960 }
1961
1962 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1963 {
1964     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966     } else {
1967         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968     }
1969 }
1970
1971 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973 {
1974     return float32_gen2(a, b, s, hard, soft,
1975                         f32_is_zon2, f32_addsubmul_post);
1976 }
1977
1978 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980 {
1981     return float64_gen2(a, b, s, hard, soft,
1982                         f64_is_zon2, f64_addsubmul_post);
1983 }
1984
1985 float32 QEMU_FLATTEN
1986 float32_add(float32 a, float32 b, float_status *s)
1987 {
1988     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989 }
1990
1991 float32 QEMU_FLATTEN
1992 float32_sub(float32 a, float32 b, float_status *s)
1993 {
1994     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995 }
1996
1997 float64 QEMU_FLATTEN
1998 float64_add(float64 a, float64 b, float_status *s)
1999 {
2000     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001 }
2002
2003 float64 QEMU_FLATTEN
2004 float64_sub(float64 a, float64 b, float_status *s)
2005 {
2006     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2007 }
2008
2009 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010                                  bool subtract)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013
2014     float64_unpack_canonical(&pa, a, status);
2015     float64_unpack_canonical(&pb, b, status);
2016     pr = parts_addsub(&pa, &pb, status, subtract);
2017
2018     return float64r32_round_pack_canonical(pr, status);
2019 }
2020
2021 float64 float64r32_add(float64 a, float64 b, float_status *status)
2022 {
2023     return float64r32_addsub(a, b, status, false);
2024 }
2025
2026 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027 {
2028     return float64r32_addsub(a, b, status, true);
2029 }
2030
2031 static bfloat16 QEMU_FLATTEN
2032 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035
2036     bfloat16_unpack_canonical(&pa, a, status);
2037     bfloat16_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039
2040     return bfloat16_round_pack_canonical(pr, status);
2041 }
2042
2043 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2044 {
2045     return bfloat16_addsub(a, b, status, false);
2046 }
2047
2048 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049 {
2050     return bfloat16_addsub(a, b, status, true);
2051 }
2052
2053 static float128 QEMU_FLATTEN
2054 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055 {
2056     FloatParts128 pa, pb, *pr;
2057
2058     float128_unpack_canonical(&pa, a, status);
2059     float128_unpack_canonical(&pb, b, status);
2060     pr = parts_addsub(&pa, &pb, status, subtract);
2061
2062     return float128_round_pack_canonical(pr, status);
2063 }
2064
2065 float128 float128_add(float128 a, float128 b, float_status *status)
2066 {
2067     return float128_addsub(a, b, status, false);
2068 }
2069
2070 float128 float128_sub(float128 a, float128 b, float_status *status)
2071 {
2072     return float128_addsub(a, b, status, true);
2073 }
2074
2075 static floatx80 QEMU_FLATTEN
2076 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079
2080     if (!floatx80_unpack_canonical(&pa, a, status) ||
2081         !floatx80_unpack_canonical(&pb, b, status)) {
2082         return floatx80_default_nan(status);
2083     }
2084
2085     pr = parts_addsub(&pa, &pb, status, subtract);
2086     return floatx80_round_pack_canonical(pr, status);
2087 }
2088
2089 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090 {
2091     return floatx80_addsub(a, b, status, false);
2092 }
2093
2094 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095 {
2096     return floatx80_addsub(a, b, status, true);
2097 }
2098
2099 /*
2100  * Multiplication
2101  */
2102
2103 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2104 {
2105     FloatParts64 pa, pb, *pr;
2106
2107     float16_unpack_canonical(&pa, a, status);
2108     float16_unpack_canonical(&pb, b, status);
2109     pr = parts_mul(&pa, &pb, status);
2110
2111     return float16_round_pack_canonical(pr, status);
2112 }
2113
2114 static float32 QEMU_SOFTFLOAT_ATTR
2115 soft_f32_mul(float32 a, float32 b, float_status *status)
2116 {
2117     FloatParts64 pa, pb, *pr;
2118
2119     float32_unpack_canonical(&pa, a, status);
2120     float32_unpack_canonical(&pb, b, status);
2121     pr = parts_mul(&pa, &pb, status);
2122
2123     return float32_round_pack_canonical(pr, status);
2124 }
2125
2126 static float64 QEMU_SOFTFLOAT_ATTR
2127 soft_f64_mul(float64 a, float64 b, float_status *status)
2128 {
2129     FloatParts64 pa, pb, *pr;
2130
2131     float64_unpack_canonical(&pa, a, status);
2132     float64_unpack_canonical(&pb, b, status);
2133     pr = parts_mul(&pa, &pb, status);
2134
2135     return float64_round_pack_canonical(pr, status);
2136 }
2137
2138 static float hard_f32_mul(float a, float b)
2139 {
2140     return a * b;
2141 }
2142
2143 static double hard_f64_mul(double a, double b)
2144 {
2145     return a * b;
2146 }
2147
2148 float32 QEMU_FLATTEN
2149 float32_mul(float32 a, float32 b, float_status *s)
2150 {
2151     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152                         f32_is_zon2, f32_addsubmul_post);
2153 }
2154
2155 float64 QEMU_FLATTEN
2156 float64_mul(float64 a, float64 b, float_status *s)
2157 {
2158     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159                         f64_is_zon2, f64_addsubmul_post);
2160 }
2161
2162 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163 {
2164     FloatParts64 pa, pb, *pr;
2165
2166     float64_unpack_canonical(&pa, a, status);
2167     float64_unpack_canonical(&pb, b, status);
2168     pr = parts_mul(&pa, &pb, status);
2169
2170     return float64r32_round_pack_canonical(pr, status);
2171 }
2172
2173 bfloat16 QEMU_FLATTEN
2174 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2175 {
2176     FloatParts64 pa, pb, *pr;
2177
2178     bfloat16_unpack_canonical(&pa, a, status);
2179     bfloat16_unpack_canonical(&pb, b, status);
2180     pr = parts_mul(&pa, &pb, status);
2181
2182     return bfloat16_round_pack_canonical(pr, status);
2183 }
2184
2185 float128 QEMU_FLATTEN
2186 float128_mul(float128 a, float128 b, float_status *status)
2187 {
2188     FloatParts128 pa, pb, *pr;
2189
2190     float128_unpack_canonical(&pa, a, status);
2191     float128_unpack_canonical(&pb, b, status);
2192     pr = parts_mul(&pa, &pb, status);
2193
2194     return float128_round_pack_canonical(pr, status);
2195 }
2196
2197 floatx80 QEMU_FLATTEN
2198 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199 {
2200     FloatParts128 pa, pb, *pr;
2201
2202     if (!floatx80_unpack_canonical(&pa, a, status) ||
2203         !floatx80_unpack_canonical(&pb, b, status)) {
2204         return floatx80_default_nan(status);
2205     }
2206
2207     pr = parts_mul(&pa, &pb, status);
2208     return floatx80_round_pack_canonical(pr, status);
2209 }
2210
2211 /*
2212  * Fused multiply-add
2213  */
2214
2215 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2216                                     int flags, float_status *status)
2217 {
2218     FloatParts64 pa, pb, pc, *pr;
2219
2220     float16_unpack_canonical(&pa, a, status);
2221     float16_unpack_canonical(&pb, b, status);
2222     float16_unpack_canonical(&pc, c, status);
2223     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2224
2225     return float16_round_pack_canonical(pr, status);
2226 }
2227
2228 static float32 QEMU_SOFTFLOAT_ATTR
2229 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2230                 float_status *status)
2231 {
2232     FloatParts64 pa, pb, pc, *pr;
2233
2234     float32_unpack_canonical(&pa, a, status);
2235     float32_unpack_canonical(&pb, b, status);
2236     float32_unpack_canonical(&pc, c, status);
2237     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2238
2239     return float32_round_pack_canonical(pr, status);
2240 }
2241
2242 static float64 QEMU_SOFTFLOAT_ATTR
2243 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2244                 float_status *status)
2245 {
2246     FloatParts64 pa, pb, pc, *pr;
2247
2248     float64_unpack_canonical(&pa, a, status);
2249     float64_unpack_canonical(&pb, b, status);
2250     float64_unpack_canonical(&pc, c, status);
2251     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2252
2253     return float64_round_pack_canonical(pr, status);
2254 }
2255
2256 static bool force_soft_fma;
2257
2258 float32 QEMU_FLATTEN
2259 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2260 {
2261     union_float32 ua, ub, uc, ur;
2262
2263     ua.s = xa;
2264     ub.s = xb;
2265     uc.s = xc;
2266
2267     if (unlikely(!can_use_fpu(s))) {
2268         goto soft;
2269     }
2270     if (unlikely(flags & float_muladd_halve_result)) {
2271         goto soft;
2272     }
2273
2274     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2275     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2276         goto soft;
2277     }
2278
2279     if (unlikely(force_soft_fma)) {
2280         goto soft;
2281     }
2282
2283     /*
2284      * When (a || b) == 0, there's no need to check for under/over flow,
2285      * since we know the addend is (normal || 0) and the product is 0.
2286      */
2287     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2288         union_float32 up;
2289         bool prod_sign;
2290
2291         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2292         prod_sign ^= !!(flags & float_muladd_negate_product);
2293         up.s = float32_set_sign(float32_zero, prod_sign);
2294
2295         if (flags & float_muladd_negate_c) {
2296             uc.h = -uc.h;
2297         }
2298         ur.h = up.h + uc.h;
2299     } else {
2300         union_float32 ua_orig = ua;
2301         union_float32 uc_orig = uc;
2302
2303         if (flags & float_muladd_negate_product) {
2304             ua.h = -ua.h;
2305         }
2306         if (flags & float_muladd_negate_c) {
2307             uc.h = -uc.h;
2308         }
2309
2310         ur.h = fmaf(ua.h, ub.h, uc.h);
2311
2312         if (unlikely(f32_is_inf(ur))) {
2313             float_raise(float_flag_overflow, s);
2314         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2315             ua = ua_orig;
2316             uc = uc_orig;
2317             goto soft;
2318         }
2319     }
2320     if (flags & float_muladd_negate_result) {
2321         return float32_chs(ur.s);
2322     }
2323     return ur.s;
2324
2325  soft:
2326     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2327 }
2328
2329 float64 QEMU_FLATTEN
2330 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2331 {
2332     union_float64 ua, ub, uc, ur;
2333
2334     ua.s = xa;
2335     ub.s = xb;
2336     uc.s = xc;
2337
2338     if (unlikely(!can_use_fpu(s))) {
2339         goto soft;
2340     }
2341     if (unlikely(flags & float_muladd_halve_result)) {
2342         goto soft;
2343     }
2344
2345     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2346     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2347         goto soft;
2348     }
2349
2350     if (unlikely(force_soft_fma)) {
2351         goto soft;
2352     }
2353
2354     /*
2355      * When (a || b) == 0, there's no need to check for under/over flow,
2356      * since we know the addend is (normal || 0) and the product is 0.
2357      */
2358     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2359         union_float64 up;
2360         bool prod_sign;
2361
2362         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2363         prod_sign ^= !!(flags & float_muladd_negate_product);
2364         up.s = float64_set_sign(float64_zero, prod_sign);
2365
2366         if (flags & float_muladd_negate_c) {
2367             uc.h = -uc.h;
2368         }
2369         ur.h = up.h + uc.h;
2370     } else {
2371         union_float64 ua_orig = ua;
2372         union_float64 uc_orig = uc;
2373
2374         if (flags & float_muladd_negate_product) {
2375             ua.h = -ua.h;
2376         }
2377         if (flags & float_muladd_negate_c) {
2378             uc.h = -uc.h;
2379         }
2380
2381         ur.h = fma(ua.h, ub.h, uc.h);
2382
2383         if (unlikely(f64_is_inf(ur))) {
2384             float_raise(float_flag_overflow, s);
2385         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2386             ua = ua_orig;
2387             uc = uc_orig;
2388             goto soft;
2389         }
2390     }
2391     if (flags & float_muladd_negate_result) {
2392         return float64_chs(ur.s);
2393     }
2394     return ur.s;
2395
2396  soft:
2397     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2398 }
2399
2400 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2401                           int flags, float_status *status)
2402 {
2403     FloatParts64 pa, pb, pc, *pr;
2404
2405     float64_unpack_canonical(&pa, a, status);
2406     float64_unpack_canonical(&pb, b, status);
2407     float64_unpack_canonical(&pc, c, status);
2408     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2409
2410     return float64r32_round_pack_canonical(pr, status);
2411 }
2412
2413 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2414                                       int flags, float_status *status)
2415 {
2416     FloatParts64 pa, pb, pc, *pr;
2417
2418     bfloat16_unpack_canonical(&pa, a, status);
2419     bfloat16_unpack_canonical(&pb, b, status);
2420     bfloat16_unpack_canonical(&pc, c, status);
2421     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2422
2423     return bfloat16_round_pack_canonical(pr, status);
2424 }
2425
2426 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2427                                       int flags, float_status *status)
2428 {
2429     FloatParts128 pa, pb, pc, *pr;
2430
2431     float128_unpack_canonical(&pa, a, status);
2432     float128_unpack_canonical(&pb, b, status);
2433     float128_unpack_canonical(&pc, c, status);
2434     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2435
2436     return float128_round_pack_canonical(pr, status);
2437 }
2438
2439 /*
2440  * Division
2441  */
2442
2443 float16 float16_div(float16 a, float16 b, float_status *status)
2444 {
2445     FloatParts64 pa, pb, *pr;
2446
2447     float16_unpack_canonical(&pa, a, status);
2448     float16_unpack_canonical(&pb, b, status);
2449     pr = parts_div(&pa, &pb, status);
2450
2451     return float16_round_pack_canonical(pr, status);
2452 }
2453
2454 static float32 QEMU_SOFTFLOAT_ATTR
2455 soft_f32_div(float32 a, float32 b, float_status *status)
2456 {
2457     FloatParts64 pa, pb, *pr;
2458
2459     float32_unpack_canonical(&pa, a, status);
2460     float32_unpack_canonical(&pb, b, status);
2461     pr = parts_div(&pa, &pb, status);
2462
2463     return float32_round_pack_canonical(pr, status);
2464 }
2465
2466 static float64 QEMU_SOFTFLOAT_ATTR
2467 soft_f64_div(float64 a, float64 b, float_status *status)
2468 {
2469     FloatParts64 pa, pb, *pr;
2470
2471     float64_unpack_canonical(&pa, a, status);
2472     float64_unpack_canonical(&pb, b, status);
2473     pr = parts_div(&pa, &pb, status);
2474
2475     return float64_round_pack_canonical(pr, status);
2476 }
2477
2478 static float hard_f32_div(float a, float b)
2479 {
2480     return a / b;
2481 }
2482
2483 static double hard_f64_div(double a, double b)
2484 {
2485     return a / b;
2486 }
2487
2488 static bool f32_div_pre(union_float32 a, union_float32 b)
2489 {
2490     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2491         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2492                fpclassify(b.h) == FP_NORMAL;
2493     }
2494     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2495 }
2496
2497 static bool f64_div_pre(union_float64 a, union_float64 b)
2498 {
2499     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2500         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2501                fpclassify(b.h) == FP_NORMAL;
2502     }
2503     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2504 }
2505
2506 static bool f32_div_post(union_float32 a, union_float32 b)
2507 {
2508     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2509         return fpclassify(a.h) != FP_ZERO;
2510     }
2511     return !float32_is_zero(a.s);
2512 }
2513
2514 static bool f64_div_post(union_float64 a, union_float64 b)
2515 {
2516     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2517         return fpclassify(a.h) != FP_ZERO;
2518     }
2519     return !float64_is_zero(a.s);
2520 }
2521
2522 float32 QEMU_FLATTEN
2523 float32_div(float32 a, float32 b, float_status *s)
2524 {
2525     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2526                         f32_div_pre, f32_div_post);
2527 }
2528
2529 float64 QEMU_FLATTEN
2530 float64_div(float64 a, float64 b, float_status *s)
2531 {
2532     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2533                         f64_div_pre, f64_div_post);
2534 }
2535
2536 float64 float64r32_div(float64 a, float64 b, float_status *status)
2537 {
2538     FloatParts64 pa, pb, *pr;
2539
2540     float64_unpack_canonical(&pa, a, status);
2541     float64_unpack_canonical(&pb, b, status);
2542     pr = parts_div(&pa, &pb, status);
2543
2544     return float64r32_round_pack_canonical(pr, status);
2545 }
2546
2547 bfloat16 QEMU_FLATTEN
2548 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2549 {
2550     FloatParts64 pa, pb, *pr;
2551
2552     bfloat16_unpack_canonical(&pa, a, status);
2553     bfloat16_unpack_canonical(&pb, b, status);
2554     pr = parts_div(&pa, &pb, status);
2555
2556     return bfloat16_round_pack_canonical(pr, status);
2557 }
2558
2559 float128 QEMU_FLATTEN
2560 float128_div(float128 a, float128 b, float_status *status)
2561 {
2562     FloatParts128 pa, pb, *pr;
2563
2564     float128_unpack_canonical(&pa, a, status);
2565     float128_unpack_canonical(&pb, b, status);
2566     pr = parts_div(&pa, &pb, status);
2567
2568     return float128_round_pack_canonical(pr, status);
2569 }
2570
2571 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2572 {
2573     FloatParts128 pa, pb, *pr;
2574
2575     if (!floatx80_unpack_canonical(&pa, a, status) ||
2576         !floatx80_unpack_canonical(&pb, b, status)) {
2577         return floatx80_default_nan(status);
2578     }
2579
2580     pr = parts_div(&pa, &pb, status);
2581     return floatx80_round_pack_canonical(pr, status);
2582 }
2583
2584 /*
2585  * Remainder
2586  */
2587
2588 float32 float32_rem(float32 a, float32 b, float_status *status)
2589 {
2590     FloatParts64 pa, pb, *pr;
2591
2592     float32_unpack_canonical(&pa, a, status);
2593     float32_unpack_canonical(&pb, b, status);
2594     pr = parts_modrem(&pa, &pb, NULL, status);
2595
2596     return float32_round_pack_canonical(pr, status);
2597 }
2598
2599 float64 float64_rem(float64 a, float64 b, float_status *status)
2600 {
2601     FloatParts64 pa, pb, *pr;
2602
2603     float64_unpack_canonical(&pa, a, status);
2604     float64_unpack_canonical(&pb, b, status);
2605     pr = parts_modrem(&pa, &pb, NULL, status);
2606
2607     return float64_round_pack_canonical(pr, status);
2608 }
2609
2610 float128 float128_rem(float128 a, float128 b, float_status *status)
2611 {
2612     FloatParts128 pa, pb, *pr;
2613
2614     float128_unpack_canonical(&pa, a, status);
2615     float128_unpack_canonical(&pb, b, status);
2616     pr = parts_modrem(&pa, &pb, NULL, status);
2617
2618     return float128_round_pack_canonical(pr, status);
2619 }
2620
2621 /*
2622  * Returns the remainder of the extended double-precision floating-point value
2623  * `a' with respect to the corresponding value `b'.
2624  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2625  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2626  * the remainder based on truncating the quotient toward zero instead and
2627  * *quotient is set to the low 64 bits of the absolute value of the integer
2628  * quotient.
2629  */
2630 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2631                          uint64_t *quotient, float_status *status)
2632 {
2633     FloatParts128 pa, pb, *pr;
2634
2635     *quotient = 0;
2636     if (!floatx80_unpack_canonical(&pa, a, status) ||
2637         !floatx80_unpack_canonical(&pb, b, status)) {
2638         return floatx80_default_nan(status);
2639     }
2640     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2641
2642     return floatx80_round_pack_canonical(pr, status);
2643 }
2644
2645 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2646 {
2647     uint64_t quotient;
2648     return floatx80_modrem(a, b, false, &quotient, status);
2649 }
2650
2651 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2652 {
2653     uint64_t quotient;
2654     return floatx80_modrem(a, b, true, &quotient, status);
2655 }
2656
2657 /*
2658  * Float to Float conversions
2659  *
2660  * Returns the result of converting one float format to another. The
2661  * conversion is performed according to the IEC/IEEE Standard for
2662  * Binary Floating-Point Arithmetic.
2663  *
2664  * Usually this only needs to take care of raising invalid exceptions
2665  * and handling the conversion on NaNs.
2666  */
2667
2668 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2669 {
2670     switch (a->cls) {
2671     case float_class_snan:
2672         float_raise(float_flag_invalid_snan, s);
2673         /* fall through */
2674     case float_class_qnan:
2675         /*
2676          * There is no NaN in the destination format.  Raise Invalid
2677          * and return a zero with the sign of the input NaN.
2678          */
2679         float_raise(float_flag_invalid, s);
2680         a->cls = float_class_zero;
2681         break;
2682
2683     case float_class_inf:
2684         /*
2685          * There is no Inf in the destination format.  Raise Invalid
2686          * and return the maximum normal with the correct sign.
2687          */
2688         float_raise(float_flag_invalid, s);
2689         a->cls = float_class_normal;
2690         a->exp = float16_params_ahp.exp_max;
2691         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2692                                   float16_params_ahp.frac_size + 1);
2693         break;
2694
2695     case float_class_normal:
2696     case float_class_zero:
2697         break;
2698
2699     default:
2700         g_assert_not_reached();
2701     }
2702 }
2703
2704 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2705 {
2706     if (is_nan(a->cls)) {
2707         parts_return_nan(a, s);
2708     }
2709 }
2710
2711 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2712 {
2713     if (is_nan(a->cls)) {
2714         parts_return_nan(a, s);
2715     }
2716 }
2717
2718 #define parts_float_to_float(P, S) \
2719     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2720
2721 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2722                                         float_status *s)
2723 {
2724     a->cls = b->cls;
2725     a->sign = b->sign;
2726     a->exp = b->exp;
2727
2728     if (a->cls == float_class_normal) {
2729         frac_truncjam(a, b);
2730     } else if (is_nan(a->cls)) {
2731         /* Discard the low bits of the NaN. */
2732         a->frac = b->frac_hi;
2733         parts_return_nan(a, s);
2734     }
2735 }
2736
2737 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2738                                        float_status *s)
2739 {
2740     a->cls = b->cls;
2741     a->sign = b->sign;
2742     a->exp = b->exp;
2743     frac_widen(a, b);
2744
2745     if (is_nan(a->cls)) {
2746         parts_return_nan(a, s);
2747     }
2748 }
2749
2750 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2751 {
2752     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2753     FloatParts64 p;
2754
2755     float16a_unpack_canonical(&p, a, s, fmt16);
2756     parts_float_to_float(&p, s);
2757     return float32_round_pack_canonical(&p, s);
2758 }
2759
2760 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2761 {
2762     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2763     FloatParts64 p;
2764
2765     float16a_unpack_canonical(&p, a, s, fmt16);
2766     parts_float_to_float(&p, s);
2767     return float64_round_pack_canonical(&p, s);
2768 }
2769
2770 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2771 {
2772     FloatParts64 p;
2773     const FloatFmt *fmt;
2774
2775     float32_unpack_canonical(&p, a, s);
2776     if (ieee) {
2777         parts_float_to_float(&p, s);
2778         fmt = &float16_params;
2779     } else {
2780         parts_float_to_ahp(&p, s);
2781         fmt = &float16_params_ahp;
2782     }
2783     return float16a_round_pack_canonical(&p, s, fmt);
2784 }
2785
2786 static float64 QEMU_SOFTFLOAT_ATTR
2787 soft_float32_to_float64(float32 a, float_status *s)
2788 {
2789     FloatParts64 p;
2790
2791     float32_unpack_canonical(&p, a, s);
2792     parts_float_to_float(&p, s);
2793     return float64_round_pack_canonical(&p, s);
2794 }
2795
2796 float64 float32_to_float64(float32 a, float_status *s)
2797 {
2798     if (likely(float32_is_normal(a))) {
2799         /* Widening conversion can never produce inexact results.  */
2800         union_float32 uf;
2801         union_float64 ud;
2802         uf.s = a;
2803         ud.h = uf.h;
2804         return ud.s;
2805     } else if (float32_is_zero(a)) {
2806         return float64_set_sign(float64_zero, float32_is_neg(a));
2807     } else {
2808         return soft_float32_to_float64(a, s);
2809     }
2810 }
2811
2812 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2813 {
2814     FloatParts64 p;
2815     const FloatFmt *fmt;
2816
2817     float64_unpack_canonical(&p, a, s);
2818     if (ieee) {
2819         parts_float_to_float(&p, s);
2820         fmt = &float16_params;
2821     } else {
2822         parts_float_to_ahp(&p, s);
2823         fmt = &float16_params_ahp;
2824     }
2825     return float16a_round_pack_canonical(&p, s, fmt);
2826 }
2827
2828 float32 float64_to_float32(float64 a, float_status *s)
2829 {
2830     FloatParts64 p;
2831
2832     float64_unpack_canonical(&p, a, s);
2833     parts_float_to_float(&p, s);
2834     return float32_round_pack_canonical(&p, s);
2835 }
2836
2837 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2838 {
2839     FloatParts64 p;
2840
2841     bfloat16_unpack_canonical(&p, a, s);
2842     parts_float_to_float(&p, s);
2843     return float32_round_pack_canonical(&p, s);
2844 }
2845
2846 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2847 {
2848     FloatParts64 p;
2849
2850     bfloat16_unpack_canonical(&p, a, s);
2851     parts_float_to_float(&p, s);
2852     return float64_round_pack_canonical(&p, s);
2853 }
2854
2855 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2856 {
2857     FloatParts64 p;
2858
2859     float32_unpack_canonical(&p, a, s);
2860     parts_float_to_float(&p, s);
2861     return bfloat16_round_pack_canonical(&p, s);
2862 }
2863
2864 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2865 {
2866     FloatParts64 p;
2867
2868     float64_unpack_canonical(&p, a, s);
2869     parts_float_to_float(&p, s);
2870     return bfloat16_round_pack_canonical(&p, s);
2871 }
2872
2873 float32 float128_to_float32(float128 a, float_status *s)
2874 {
2875     FloatParts64 p64;
2876     FloatParts128 p128;
2877
2878     float128_unpack_canonical(&p128, a, s);
2879     parts_float_to_float_narrow(&p64, &p128, s);
2880     return float32_round_pack_canonical(&p64, s);
2881 }
2882
2883 float64 float128_to_float64(float128 a, float_status *s)
2884 {
2885     FloatParts64 p64;
2886     FloatParts128 p128;
2887
2888     float128_unpack_canonical(&p128, a, s);
2889     parts_float_to_float_narrow(&p64, &p128, s);
2890     return float64_round_pack_canonical(&p64, s);
2891 }
2892
2893 float128 float32_to_float128(float32 a, float_status *s)
2894 {
2895     FloatParts64 p64;
2896     FloatParts128 p128;
2897
2898     float32_unpack_canonical(&p64, a, s);
2899     parts_float_to_float_widen(&p128, &p64, s);
2900     return float128_round_pack_canonical(&p128, s);
2901 }
2902
2903 float128 float64_to_float128(float64 a, float_status *s)
2904 {
2905     FloatParts64 p64;
2906     FloatParts128 p128;
2907
2908     float64_unpack_canonical(&p64, a, s);
2909     parts_float_to_float_widen(&p128, &p64, s);
2910     return float128_round_pack_canonical(&p128, s);
2911 }
2912
2913 float32 floatx80_to_float32(floatx80 a, float_status *s)
2914 {
2915     FloatParts64 p64;
2916     FloatParts128 p128;
2917
2918     if (floatx80_unpack_canonical(&p128, a, s)) {
2919         parts_float_to_float_narrow(&p64, &p128, s);
2920     } else {
2921         parts_default_nan(&p64, s);
2922     }
2923     return float32_round_pack_canonical(&p64, s);
2924 }
2925
2926 float64 floatx80_to_float64(floatx80 a, float_status *s)
2927 {
2928     FloatParts64 p64;
2929     FloatParts128 p128;
2930
2931     if (floatx80_unpack_canonical(&p128, a, s)) {
2932         parts_float_to_float_narrow(&p64, &p128, s);
2933     } else {
2934         parts_default_nan(&p64, s);
2935     }
2936     return float64_round_pack_canonical(&p64, s);
2937 }
2938
2939 float128 floatx80_to_float128(floatx80 a, float_status *s)
2940 {
2941     FloatParts128 p;
2942
2943     if (floatx80_unpack_canonical(&p, a, s)) {
2944         parts_float_to_float(&p, s);
2945     } else {
2946         parts_default_nan(&p, s);
2947     }
2948     return float128_round_pack_canonical(&p, s);
2949 }
2950
2951 floatx80 float32_to_floatx80(float32 a, float_status *s)
2952 {
2953     FloatParts64 p64;
2954     FloatParts128 p128;
2955
2956     float32_unpack_canonical(&p64, a, s);
2957     parts_float_to_float_widen(&p128, &p64, s);
2958     return floatx80_round_pack_canonical(&p128, s);
2959 }
2960
2961 floatx80 float64_to_floatx80(float64 a, float_status *s)
2962 {
2963     FloatParts64 p64;
2964     FloatParts128 p128;
2965
2966     float64_unpack_canonical(&p64, a, s);
2967     parts_float_to_float_widen(&p128, &p64, s);
2968     return floatx80_round_pack_canonical(&p128, s);
2969 }
2970
2971 floatx80 float128_to_floatx80(float128 a, float_status *s)
2972 {
2973     FloatParts128 p;
2974
2975     float128_unpack_canonical(&p, a, s);
2976     parts_float_to_float(&p, s);
2977     return floatx80_round_pack_canonical(&p, s);
2978 }
2979
2980 /*
2981  * Round to integral value
2982  */
2983
2984 float16 float16_round_to_int(float16 a, float_status *s)
2985 {
2986     FloatParts64 p;
2987
2988     float16_unpack_canonical(&p, a, s);
2989     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2990     return float16_round_pack_canonical(&p, s);
2991 }
2992
2993 float32 float32_round_to_int(float32 a, float_status *s)
2994 {
2995     FloatParts64 p;
2996
2997     float32_unpack_canonical(&p, a, s);
2998     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2999     return float32_round_pack_canonical(&p, s);
3000 }
3001
3002 float64 float64_round_to_int(float64 a, float_status *s)
3003 {
3004     FloatParts64 p;
3005
3006     float64_unpack_canonical(&p, a, s);
3007     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3008     return float64_round_pack_canonical(&p, s);
3009 }
3010
3011 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3012 {
3013     FloatParts64 p;
3014
3015     bfloat16_unpack_canonical(&p, a, s);
3016     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3017     return bfloat16_round_pack_canonical(&p, s);
3018 }
3019
3020 float128 float128_round_to_int(float128 a, float_status *s)
3021 {
3022     FloatParts128 p;
3023
3024     float128_unpack_canonical(&p, a, s);
3025     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3026     return float128_round_pack_canonical(&p, s);
3027 }
3028
3029 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3030 {
3031     FloatParts128 p;
3032
3033     if (!floatx80_unpack_canonical(&p, a, status)) {
3034         return floatx80_default_nan(status);
3035     }
3036
3037     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3038                        &floatx80_params[status->floatx80_rounding_precision]);
3039     return floatx80_round_pack_canonical(&p, status);
3040 }
3041
3042 /*
3043  * Floating-point to signed integer conversions
3044  */
3045
3046 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3047                               float_status *s)
3048 {
3049     FloatParts64 p;
3050
3051     float16_unpack_canonical(&p, a, s);
3052     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3053 }
3054
3055 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3056                                 float_status *s)
3057 {
3058     FloatParts64 p;
3059
3060     float16_unpack_canonical(&p, a, s);
3061     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3062 }
3063
3064 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3065                                 float_status *s)
3066 {
3067     FloatParts64 p;
3068
3069     float16_unpack_canonical(&p, a, s);
3070     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3071 }
3072
3073 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3074                                 float_status *s)
3075 {
3076     FloatParts64 p;
3077
3078     float16_unpack_canonical(&p, a, s);
3079     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3080 }
3081
3082 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3083                                 float_status *s)
3084 {
3085     FloatParts64 p;
3086
3087     float32_unpack_canonical(&p, a, s);
3088     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3089 }
3090
3091 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3092                                 float_status *s)
3093 {
3094     FloatParts64 p;
3095
3096     float32_unpack_canonical(&p, a, s);
3097     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3098 }
3099
3100 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3101                                 float_status *s)
3102 {
3103     FloatParts64 p;
3104
3105     float32_unpack_canonical(&p, a, s);
3106     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3107 }
3108
3109 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3110                                 float_status *s)
3111 {
3112     FloatParts64 p;
3113
3114     float64_unpack_canonical(&p, a, s);
3115     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3116 }
3117
3118 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3119                                 float_status *s)
3120 {
3121     FloatParts64 p;
3122
3123     float64_unpack_canonical(&p, a, s);
3124     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3125 }
3126
3127 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3128                                 float_status *s)
3129 {
3130     FloatParts64 p;
3131
3132     float64_unpack_canonical(&p, a, s);
3133     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3134 }
3135
3136 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3137                                float_status *s)
3138 {
3139     FloatParts64 p;
3140
3141     bfloat16_unpack_canonical(&p, a, s);
3142     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3143 }
3144
3145 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3146                                  float_status *s)
3147 {
3148     FloatParts64 p;
3149
3150     bfloat16_unpack_canonical(&p, a, s);
3151     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3152 }
3153
3154 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3155                                  float_status *s)
3156 {
3157     FloatParts64 p;
3158
3159     bfloat16_unpack_canonical(&p, a, s);
3160     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3161 }
3162
3163 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164                                  float_status *s)
3165 {
3166     FloatParts64 p;
3167
3168     bfloat16_unpack_canonical(&p, a, s);
3169     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3170 }
3171
3172 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3173                                         int scale, float_status *s)
3174 {
3175     FloatParts128 p;
3176
3177     float128_unpack_canonical(&p, a, s);
3178     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3179 }
3180
3181 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3182                                         int scale, float_status *s)
3183 {
3184     FloatParts128 p;
3185
3186     float128_unpack_canonical(&p, a, s);
3187     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3188 }
3189
3190 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3191                                         int scale, float_status *s)
3192 {
3193     int flags = 0;
3194     Int128 r;
3195     FloatParts128 p;
3196
3197     float128_unpack_canonical(&p, a, s);
3198
3199     switch (p.cls) {
3200     case float_class_snan:
3201         flags |= float_flag_invalid_snan;
3202         /* fall through */
3203     case float_class_qnan:
3204         flags |= float_flag_invalid;
3205         r = UINT128_MAX;
3206         break;
3207
3208     case float_class_inf:
3209         flags = float_flag_invalid | float_flag_invalid_cvti;
3210         r = p.sign ? INT128_MIN : INT128_MAX;
3211         break;
3212
3213     case float_class_zero:
3214         return int128_zero();
3215
3216     case float_class_normal:
3217         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3218             flags = float_flag_inexact;
3219         }
3220
3221         if (p.exp < 127) {
3222             int shift = 127 - p.exp;
3223             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3224             if (p.sign) {
3225                 r = int128_neg(r);
3226             }
3227         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3228                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3229             r = INT128_MIN;
3230         } else {
3231             flags = float_flag_invalid | float_flag_invalid_cvti;
3232             r = p.sign ? INT128_MIN : INT128_MAX;
3233         }
3234         break;
3235
3236     default:
3237         g_assert_not_reached();
3238     }
3239
3240     float_raise(flags, s);
3241     return r;
3242 }
3243
3244 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3245                                         int scale, float_status *s)
3246 {
3247     FloatParts128 p;
3248
3249     if (!floatx80_unpack_canonical(&p, a, s)) {
3250         parts_default_nan(&p, s);
3251     }
3252     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3253 }
3254
3255 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3256                                         int scale, float_status *s)
3257 {
3258     FloatParts128 p;
3259
3260     if (!floatx80_unpack_canonical(&p, a, s)) {
3261         parts_default_nan(&p, s);
3262     }
3263     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264 }
3265
3266 int8_t float16_to_int8(float16 a, float_status *s)
3267 {
3268     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3269 }
3270
3271 int16_t float16_to_int16(float16 a, float_status *s)
3272 {
3273     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3274 }
3275
3276 int32_t float16_to_int32(float16 a, float_status *s)
3277 {
3278     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3279 }
3280
3281 int64_t float16_to_int64(float16 a, float_status *s)
3282 {
3283     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3284 }
3285
3286 int16_t float32_to_int16(float32 a, float_status *s)
3287 {
3288     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3289 }
3290
3291 int32_t float32_to_int32(float32 a, float_status *s)
3292 {
3293     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3294 }
3295
3296 int64_t float32_to_int64(float32 a, float_status *s)
3297 {
3298     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3299 }
3300
3301 int16_t float64_to_int16(float64 a, float_status *s)
3302 {
3303     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3304 }
3305
3306 int32_t float64_to_int32(float64 a, float_status *s)
3307 {
3308     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3309 }
3310
3311 int64_t float64_to_int64(float64 a, float_status *s)
3312 {
3313     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3314 }
3315
3316 int32_t float128_to_int32(float128 a, float_status *s)
3317 {
3318     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3319 }
3320
3321 int64_t float128_to_int64(float128 a, float_status *s)
3322 {
3323     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3324 }
3325
3326 Int128 float128_to_int128(float128 a, float_status *s)
3327 {
3328     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3329 }
3330
3331 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3332 {
3333     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3334 }
3335
3336 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3337 {
3338     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3339 }
3340
3341 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3342 {
3343     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3344 }
3345
3346 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3347 {
3348     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3349 }
3350
3351 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3352 {
3353     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3354 }
3355
3356 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3357 {
3358     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3359 }
3360
3361 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3362 {
3363     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3364 }
3365
3366 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3367 {
3368     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3369 }
3370
3371 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3372 {
3373     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3374 }
3375
3376 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3377 {
3378     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3379 }
3380
3381 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3382 {
3383     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3384 }
3385
3386 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3387 {
3388     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3389 }
3390
3391 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3392 {
3393     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3394 }
3395
3396 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3397 {
3398     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3399 }
3400
3401 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3402 {
3403     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3404 }
3405
3406 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3407 {
3408     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3409 }
3410
3411 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3412 {
3413     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3414 }
3415
3416 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3417 {
3418     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3419 }
3420
3421 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3422 {
3423     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3424 }
3425
3426 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3427 {
3428     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3429 }
3430
3431 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3432 {
3433     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3434 }
3435
3436 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3437 {
3438     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3439 }
3440
3441 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3442 {
3443     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3444 }
3445
3446 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3447 {
3448     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3449 }
3450
3451 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3452                                 float_status *s)
3453 {
3454     FloatParts64 p;
3455
3456     float64_unpack_canonical(&p, a, s);
3457     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3458 }
3459
3460 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3461                                 float_status *s)
3462 {
3463     FloatParts64 p;
3464
3465     float64_unpack_canonical(&p, a, s);
3466     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3467 }
3468
3469 /*
3470  * Floating-point to unsigned integer conversions
3471  */
3472
3473 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3474                                 float_status *s)
3475 {
3476     FloatParts64 p;
3477
3478     float16_unpack_canonical(&p, a, s);
3479     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3480 }
3481
3482 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3483                                   float_status *s)
3484 {
3485     FloatParts64 p;
3486
3487     float16_unpack_canonical(&p, a, s);
3488     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3489 }
3490
3491 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3492                                   float_status *s)
3493 {
3494     FloatParts64 p;
3495
3496     float16_unpack_canonical(&p, a, s);
3497     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3498 }
3499
3500 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3501                                   float_status *s)
3502 {
3503     FloatParts64 p;
3504
3505     float16_unpack_canonical(&p, a, s);
3506     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3507 }
3508
3509 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3510                                   float_status *s)
3511 {
3512     FloatParts64 p;
3513
3514     float32_unpack_canonical(&p, a, s);
3515     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3516 }
3517
3518 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3519                                   float_status *s)
3520 {
3521     FloatParts64 p;
3522
3523     float32_unpack_canonical(&p, a, s);
3524     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3525 }
3526
3527 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3528                                   float_status *s)
3529 {
3530     FloatParts64 p;
3531
3532     float32_unpack_canonical(&p, a, s);
3533     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3534 }
3535
3536 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3537                                   float_status *s)
3538 {
3539     FloatParts64 p;
3540
3541     float64_unpack_canonical(&p, a, s);
3542     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3543 }
3544
3545 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3546                                   float_status *s)
3547 {
3548     FloatParts64 p;
3549
3550     float64_unpack_canonical(&p, a, s);
3551     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3552 }
3553
3554 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3555                                   float_status *s)
3556 {
3557     FloatParts64 p;
3558
3559     float64_unpack_canonical(&p, a, s);
3560     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3561 }
3562
3563 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3564                                  int scale, float_status *s)
3565 {
3566     FloatParts64 p;
3567
3568     bfloat16_unpack_canonical(&p, a, s);
3569     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3570 }
3571
3572 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3573                                    int scale, float_status *s)
3574 {
3575     FloatParts64 p;
3576
3577     bfloat16_unpack_canonical(&p, a, s);
3578     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3579 }
3580
3581 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3582                                    int scale, float_status *s)
3583 {
3584     FloatParts64 p;
3585
3586     bfloat16_unpack_canonical(&p, a, s);
3587     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3588 }
3589
3590 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3591                                    int scale, float_status *s)
3592 {
3593     FloatParts64 p;
3594
3595     bfloat16_unpack_canonical(&p, a, s);
3596     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3597 }
3598
3599 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3600                                           int scale, float_status *s)
3601 {
3602     FloatParts128 p;
3603
3604     float128_unpack_canonical(&p, a, s);
3605     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3606 }
3607
3608 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3609                                           int scale, float_status *s)
3610 {
3611     FloatParts128 p;
3612
3613     float128_unpack_canonical(&p, a, s);
3614     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3615 }
3616
3617 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3618                                          int scale, float_status *s)
3619 {
3620     int flags = 0;
3621     Int128 r;
3622     FloatParts128 p;
3623
3624     float128_unpack_canonical(&p, a, s);
3625
3626     switch (p.cls) {
3627     case float_class_snan:
3628         flags |= float_flag_invalid_snan;
3629         /* fall through */
3630     case float_class_qnan:
3631         flags |= float_flag_invalid;
3632         r = UINT128_MAX;
3633         break;
3634
3635     case float_class_inf:
3636         flags = float_flag_invalid | float_flag_invalid_cvti;
3637         r = p.sign ? int128_zero() : UINT128_MAX;
3638         break;
3639
3640     case float_class_zero:
3641         return int128_zero();
3642
3643     case float_class_normal:
3644         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3645             flags = float_flag_inexact;
3646             if (p.cls == float_class_zero) {
3647                 r = int128_zero();
3648                 break;
3649             }
3650         }
3651
3652         if (p.sign) {
3653             flags = float_flag_invalid | float_flag_invalid_cvti;
3654             r = int128_zero();
3655         } else if (p.exp <= 127) {
3656             int shift = 127 - p.exp;
3657             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3658         } else {
3659             flags = float_flag_invalid | float_flag_invalid_cvti;
3660             r = UINT128_MAX;
3661         }
3662         break;
3663
3664     default:
3665         g_assert_not_reached();
3666     }
3667
3668     float_raise(flags, s);
3669     return r;
3670 }
3671
3672 uint8_t float16_to_uint8(float16 a, float_status *s)
3673 {
3674     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3675 }
3676
3677 uint16_t float16_to_uint16(float16 a, float_status *s)
3678 {
3679     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3680 }
3681
3682 uint32_t float16_to_uint32(float16 a, float_status *s)
3683 {
3684     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3685 }
3686
3687 uint64_t float16_to_uint64(float16 a, float_status *s)
3688 {
3689     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3690 }
3691
3692 uint16_t float32_to_uint16(float32 a, float_status *s)
3693 {
3694     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3695 }
3696
3697 uint32_t float32_to_uint32(float32 a, float_status *s)
3698 {
3699     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3700 }
3701
3702 uint64_t float32_to_uint64(float32 a, float_status *s)
3703 {
3704     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3705 }
3706
3707 uint16_t float64_to_uint16(float64 a, float_status *s)
3708 {
3709     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3710 }
3711
3712 uint32_t float64_to_uint32(float64 a, float_status *s)
3713 {
3714     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3715 }
3716
3717 uint64_t float64_to_uint64(float64 a, float_status *s)
3718 {
3719     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3720 }
3721
3722 uint32_t float128_to_uint32(float128 a, float_status *s)
3723 {
3724     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3725 }
3726
3727 uint64_t float128_to_uint64(float128 a, float_status *s)
3728 {
3729     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3730 }
3731
3732 Int128 float128_to_uint128(float128 a, float_status *s)
3733 {
3734     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3735 }
3736
3737 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3738 {
3739     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3740 }
3741
3742 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3743 {
3744     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3745 }
3746
3747 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3748 {
3749     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3750 }
3751
3752 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3753 {
3754     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3755 }
3756
3757 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3758 {
3759     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3760 }
3761
3762 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3763 {
3764     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3765 }
3766
3767 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3768 {
3769     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3770 }
3771
3772 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3773 {
3774     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3775 }
3776
3777 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3778 {
3779     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3780 }
3781
3782 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3783 {
3784     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3785 }
3786
3787 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3788 {
3789     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3790 }
3791
3792 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3793 {
3794     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3795 }
3796
3797 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3798 {
3799     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3800 }
3801
3802 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3803 {
3804     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3805 }
3806
3807 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3808 {
3809     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3810 }
3811
3812 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3813 {
3814     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3815 }
3816
3817 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3818 {
3819     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3820 }
3821
3822 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3823 {
3824     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3825 }
3826
3827 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3828 {
3829     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3830 }
3831
3832 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3833 {
3834     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3835 }
3836
3837 /*
3838  * Signed integer to floating-point conversions
3839  */
3840
3841 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3842 {
3843     FloatParts64 p;
3844
3845     parts_sint_to_float(&p, a, scale, status);
3846     return float16_round_pack_canonical(&p, status);
3847 }
3848
3849 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3850 {
3851     return int64_to_float16_scalbn(a, scale, status);
3852 }
3853
3854 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3855 {
3856     return int64_to_float16_scalbn(a, scale, status);
3857 }
3858
3859 float16 int64_to_float16(int64_t a, float_status *status)
3860 {
3861     return int64_to_float16_scalbn(a, 0, status);
3862 }
3863
3864 float16 int32_to_float16(int32_t a, float_status *status)
3865 {
3866     return int64_to_float16_scalbn(a, 0, status);
3867 }
3868
3869 float16 int16_to_float16(int16_t a, float_status *status)
3870 {
3871     return int64_to_float16_scalbn(a, 0, status);
3872 }
3873
3874 float16 int8_to_float16(int8_t a, float_status *status)
3875 {
3876     return int64_to_float16_scalbn(a, 0, status);
3877 }
3878
3879 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3880 {
3881     FloatParts64 p;
3882
3883     /* Without scaling, there are no overflow concerns. */
3884     if (likely(scale == 0) && can_use_fpu(status)) {
3885         union_float32 ur;
3886         ur.h = a;
3887         return ur.s;
3888     }
3889
3890     parts64_sint_to_float(&p, a, scale, status);
3891     return float32_round_pack_canonical(&p, status);
3892 }
3893
3894 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3895 {
3896     return int64_to_float32_scalbn(a, scale, status);
3897 }
3898
3899 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3900 {
3901     return int64_to_float32_scalbn(a, scale, status);
3902 }
3903
3904 float32 int64_to_float32(int64_t a, float_status *status)
3905 {
3906     return int64_to_float32_scalbn(a, 0, status);
3907 }
3908
3909 float32 int32_to_float32(int32_t a, float_status *status)
3910 {
3911     return int64_to_float32_scalbn(a, 0, status);
3912 }
3913
3914 float32 int16_to_float32(int16_t a, float_status *status)
3915 {
3916     return int64_to_float32_scalbn(a, 0, status);
3917 }
3918
3919 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3920 {
3921     FloatParts64 p;
3922
3923     /* Without scaling, there are no overflow concerns. */
3924     if (likely(scale == 0) && can_use_fpu(status)) {
3925         union_float64 ur;
3926         ur.h = a;
3927         return ur.s;
3928     }
3929
3930     parts_sint_to_float(&p, a, scale, status);
3931     return float64_round_pack_canonical(&p, status);
3932 }
3933
3934 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3935 {
3936     return int64_to_float64_scalbn(a, scale, status);
3937 }
3938
3939 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3940 {
3941     return int64_to_float64_scalbn(a, scale, status);
3942 }
3943
3944 float64 int64_to_float64(int64_t a, float_status *status)
3945 {
3946     return int64_to_float64_scalbn(a, 0, status);
3947 }
3948
3949 float64 int32_to_float64(int32_t a, float_status *status)
3950 {
3951     return int64_to_float64_scalbn(a, 0, status);
3952 }
3953
3954 float64 int16_to_float64(int16_t a, float_status *status)
3955 {
3956     return int64_to_float64_scalbn(a, 0, status);
3957 }
3958
3959 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3960 {
3961     FloatParts64 p;
3962
3963     parts_sint_to_float(&p, a, scale, status);
3964     return bfloat16_round_pack_canonical(&p, status);
3965 }
3966
3967 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3968 {
3969     return int64_to_bfloat16_scalbn(a, scale, status);
3970 }
3971
3972 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3973 {
3974     return int64_to_bfloat16_scalbn(a, scale, status);
3975 }
3976
3977 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3978 {
3979     return int64_to_bfloat16_scalbn(a, scale, status);
3980 }
3981
3982 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3983 {
3984     return int64_to_bfloat16_scalbn(a, 0, status);
3985 }
3986
3987 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3988 {
3989     return int64_to_bfloat16_scalbn(a, 0, status);
3990 }
3991
3992 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3993 {
3994     return int64_to_bfloat16_scalbn(a, 0, status);
3995 }
3996
3997 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3998 {
3999     return int64_to_bfloat16_scalbn(a, 0, status);
4000 }
4001
4002 float128 int128_to_float128(Int128 a, float_status *status)
4003 {
4004     FloatParts128 p = { };
4005     int shift;
4006
4007     if (int128_nz(a)) {
4008         p.cls = float_class_normal;
4009         if (!int128_nonneg(a)) {
4010             p.sign = true;
4011             a = int128_neg(a);
4012         }
4013
4014         shift = clz64(int128_gethi(a));
4015         if (shift == 64) {
4016             shift += clz64(int128_getlo(a));
4017         }
4018
4019         p.exp = 127 - shift;
4020         a = int128_lshift(a, shift);
4021
4022         p.frac_hi = int128_gethi(a);
4023         p.frac_lo = int128_getlo(a);
4024     } else {
4025         p.cls = float_class_zero;
4026     }
4027
4028     return float128_round_pack_canonical(&p, status);
4029 }
4030
4031 float128 int64_to_float128(int64_t a, float_status *status)
4032 {
4033     FloatParts128 p;
4034
4035     parts_sint_to_float(&p, a, 0, status);
4036     return float128_round_pack_canonical(&p, status);
4037 }
4038
4039 float128 int32_to_float128(int32_t a, float_status *status)
4040 {
4041     return int64_to_float128(a, status);
4042 }
4043
4044 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4045 {
4046     FloatParts128 p;
4047
4048     parts_sint_to_float(&p, a, 0, status);
4049     return floatx80_round_pack_canonical(&p, status);
4050 }
4051
4052 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4053 {
4054     return int64_to_floatx80(a, status);
4055 }
4056
4057 /*
4058  * Unsigned Integer to floating-point conversions
4059  */
4060
4061 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4062 {
4063     FloatParts64 p;
4064
4065     parts_uint_to_float(&p, a, scale, status);
4066     return float16_round_pack_canonical(&p, status);
4067 }
4068
4069 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4070 {
4071     return uint64_to_float16_scalbn(a, scale, status);
4072 }
4073
4074 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4075 {
4076     return uint64_to_float16_scalbn(a, scale, status);
4077 }
4078
4079 float16 uint64_to_float16(uint64_t a, float_status *status)
4080 {
4081     return uint64_to_float16_scalbn(a, 0, status);
4082 }
4083
4084 float16 uint32_to_float16(uint32_t a, float_status *status)
4085 {
4086     return uint64_to_float16_scalbn(a, 0, status);
4087 }
4088
4089 float16 uint16_to_float16(uint16_t a, float_status *status)
4090 {
4091     return uint64_to_float16_scalbn(a, 0, status);
4092 }
4093
4094 float16 uint8_to_float16(uint8_t a, float_status *status)
4095 {
4096     return uint64_to_float16_scalbn(a, 0, status);
4097 }
4098
4099 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4100 {
4101     FloatParts64 p;
4102
4103     /* Without scaling, there are no overflow concerns. */
4104     if (likely(scale == 0) && can_use_fpu(status)) {
4105         union_float32 ur;
4106         ur.h = a;
4107         return ur.s;
4108     }
4109
4110     parts_uint_to_float(&p, a, scale, status);
4111     return float32_round_pack_canonical(&p, status);
4112 }
4113
4114 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4115 {
4116     return uint64_to_float32_scalbn(a, scale, status);
4117 }
4118
4119 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4120 {
4121     return uint64_to_float32_scalbn(a, scale, status);
4122 }
4123
4124 float32 uint64_to_float32(uint64_t a, float_status *status)
4125 {
4126     return uint64_to_float32_scalbn(a, 0, status);
4127 }
4128
4129 float32 uint32_to_float32(uint32_t a, float_status *status)
4130 {
4131     return uint64_to_float32_scalbn(a, 0, status);
4132 }
4133
4134 float32 uint16_to_float32(uint16_t a, float_status *status)
4135 {
4136     return uint64_to_float32_scalbn(a, 0, status);
4137 }
4138
4139 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4140 {
4141     FloatParts64 p;
4142
4143     /* Without scaling, there are no overflow concerns. */
4144     if (likely(scale == 0) && can_use_fpu(status)) {
4145         union_float64 ur;
4146         ur.h = a;
4147         return ur.s;
4148     }
4149
4150     parts_uint_to_float(&p, a, scale, status);
4151     return float64_round_pack_canonical(&p, status);
4152 }
4153
4154 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4155 {
4156     return uint64_to_float64_scalbn(a, scale, status);
4157 }
4158
4159 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4160 {
4161     return uint64_to_float64_scalbn(a, scale, status);
4162 }
4163
4164 float64 uint64_to_float64(uint64_t a, float_status *status)
4165 {
4166     return uint64_to_float64_scalbn(a, 0, status);
4167 }
4168
4169 float64 uint32_to_float64(uint32_t a, float_status *status)
4170 {
4171     return uint64_to_float64_scalbn(a, 0, status);
4172 }
4173
4174 float64 uint16_to_float64(uint16_t a, float_status *status)
4175 {
4176     return uint64_to_float64_scalbn(a, 0, status);
4177 }
4178
4179 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4180 {
4181     FloatParts64 p;
4182
4183     parts_uint_to_float(&p, a, scale, status);
4184     return bfloat16_round_pack_canonical(&p, status);
4185 }
4186
4187 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4188 {
4189     return uint64_to_bfloat16_scalbn(a, scale, status);
4190 }
4191
4192 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4193 {
4194     return uint64_to_bfloat16_scalbn(a, scale, status);
4195 }
4196
4197 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4198 {
4199     return uint64_to_bfloat16_scalbn(a, scale, status);
4200 }
4201
4202 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4203 {
4204     return uint64_to_bfloat16_scalbn(a, 0, status);
4205 }
4206
4207 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4208 {
4209     return uint64_to_bfloat16_scalbn(a, 0, status);
4210 }
4211
4212 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4213 {
4214     return uint64_to_bfloat16_scalbn(a, 0, status);
4215 }
4216
4217 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4218 {
4219     return uint64_to_bfloat16_scalbn(a, 0, status);
4220 }
4221
4222 float128 uint64_to_float128(uint64_t a, float_status *status)
4223 {
4224     FloatParts128 p;
4225
4226     parts_uint_to_float(&p, a, 0, status);
4227     return float128_round_pack_canonical(&p, status);
4228 }
4229
4230 float128 uint128_to_float128(Int128 a, float_status *status)
4231 {
4232     FloatParts128 p = { };
4233     int shift;
4234
4235     if (int128_nz(a)) {
4236         p.cls = float_class_normal;
4237
4238         shift = clz64(int128_gethi(a));
4239         if (shift == 64) {
4240             shift += clz64(int128_getlo(a));
4241         }
4242
4243         p.exp = 127 - shift;
4244         a = int128_lshift(a, shift);
4245
4246         p.frac_hi = int128_gethi(a);
4247         p.frac_lo = int128_getlo(a);
4248     } else {
4249         p.cls = float_class_zero;
4250     }
4251
4252     return float128_round_pack_canonical(&p, status);
4253 }
4254
4255 /*
4256  * Minimum and maximum
4257  */
4258
4259 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4260 {
4261     FloatParts64 pa, pb, *pr;
4262
4263     float16_unpack_canonical(&pa, a, s);
4264     float16_unpack_canonical(&pb, b, s);
4265     pr = parts_minmax(&pa, &pb, s, flags);
4266
4267     return float16_round_pack_canonical(pr, s);
4268 }
4269
4270 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4271                                 float_status *s, int flags)
4272 {
4273     FloatParts64 pa, pb, *pr;
4274
4275     bfloat16_unpack_canonical(&pa, a, s);
4276     bfloat16_unpack_canonical(&pb, b, s);
4277     pr = parts_minmax(&pa, &pb, s, flags);
4278
4279     return bfloat16_round_pack_canonical(pr, s);
4280 }
4281
4282 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4283 {
4284     FloatParts64 pa, pb, *pr;
4285
4286     float32_unpack_canonical(&pa, a, s);
4287     float32_unpack_canonical(&pb, b, s);
4288     pr = parts_minmax(&pa, &pb, s, flags);
4289
4290     return float32_round_pack_canonical(pr, s);
4291 }
4292
4293 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4294 {
4295     FloatParts64 pa, pb, *pr;
4296
4297     float64_unpack_canonical(&pa, a, s);
4298     float64_unpack_canonical(&pb, b, s);
4299     pr = parts_minmax(&pa, &pb, s, flags);
4300
4301     return float64_round_pack_canonical(pr, s);
4302 }
4303
4304 static float128 float128_minmax(float128 a, float128 b,
4305                                 float_status *s, int flags)
4306 {
4307     FloatParts128 pa, pb, *pr;
4308
4309     float128_unpack_canonical(&pa, a, s);
4310     float128_unpack_canonical(&pb, b, s);
4311     pr = parts_minmax(&pa, &pb, s, flags);
4312
4313     return float128_round_pack_canonical(pr, s);
4314 }
4315
4316 #define MINMAX_1(type, name, flags) \
4317     type type##_##name(type a, type b, float_status *s) \
4318     { return type##_minmax(a, b, s, flags); }
4319
4320 #define MINMAX_2(type) \
4321     MINMAX_1(type, max, 0)                                                \
4322     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4323     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4324     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4325     MINMAX_1(type, min, minmax_ismin)                                     \
4326     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4327     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4328     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4329
4330 MINMAX_2(float16)
4331 MINMAX_2(bfloat16)
4332 MINMAX_2(float32)
4333 MINMAX_2(float64)
4334 MINMAX_2(float128)
4335
4336 #undef MINMAX_1
4337 #undef MINMAX_2
4338
4339 /*
4340  * Floating point compare
4341  */
4342
4343 static FloatRelation QEMU_FLATTEN
4344 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4345 {
4346     FloatParts64 pa, pb;
4347
4348     float16_unpack_canonical(&pa, a, s);
4349     float16_unpack_canonical(&pb, b, s);
4350     return parts_compare(&pa, &pb, s, is_quiet);
4351 }
4352
4353 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4354 {
4355     return float16_do_compare(a, b, s, false);
4356 }
4357
4358 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4359 {
4360     return float16_do_compare(a, b, s, true);
4361 }
4362
4363 static FloatRelation QEMU_SOFTFLOAT_ATTR
4364 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4365 {
4366     FloatParts64 pa, pb;
4367
4368     float32_unpack_canonical(&pa, a, s);
4369     float32_unpack_canonical(&pb, b, s);
4370     return parts_compare(&pa, &pb, s, is_quiet);
4371 }
4372
4373 static FloatRelation QEMU_FLATTEN
4374 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4375 {
4376     union_float32 ua, ub;
4377
4378     ua.s = xa;
4379     ub.s = xb;
4380
4381     if (QEMU_NO_HARDFLOAT) {
4382         goto soft;
4383     }
4384
4385     float32_input_flush2(&ua.s, &ub.s, s);
4386     if (isgreaterequal(ua.h, ub.h)) {
4387         if (isgreater(ua.h, ub.h)) {
4388             return float_relation_greater;
4389         }
4390         return float_relation_equal;
4391     }
4392     if (likely(isless(ua.h, ub.h))) {
4393         return float_relation_less;
4394     }
4395     /*
4396      * The only condition remaining is unordered.
4397      * Fall through to set flags.
4398      */
4399  soft:
4400     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4401 }
4402
4403 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4404 {
4405     return float32_hs_compare(a, b, s, false);
4406 }
4407
4408 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4409 {
4410     return float32_hs_compare(a, b, s, true);
4411 }
4412
4413 static FloatRelation QEMU_SOFTFLOAT_ATTR
4414 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4415 {
4416     FloatParts64 pa, pb;
4417
4418     float64_unpack_canonical(&pa, a, s);
4419     float64_unpack_canonical(&pb, b, s);
4420     return parts_compare(&pa, &pb, s, is_quiet);
4421 }
4422
4423 static FloatRelation QEMU_FLATTEN
4424 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4425 {
4426     union_float64 ua, ub;
4427
4428     ua.s = xa;
4429     ub.s = xb;
4430
4431     if (QEMU_NO_HARDFLOAT) {
4432         goto soft;
4433     }
4434
4435     float64_input_flush2(&ua.s, &ub.s, s);
4436     if (isgreaterequal(ua.h, ub.h)) {
4437         if (isgreater(ua.h, ub.h)) {
4438             return float_relation_greater;
4439         }
4440         return float_relation_equal;
4441     }
4442     if (likely(isless(ua.h, ub.h))) {
4443         return float_relation_less;
4444     }
4445     /*
4446      * The only condition remaining is unordered.
4447      * Fall through to set flags.
4448      */
4449  soft:
4450     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4451 }
4452
4453 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4454 {
4455     return float64_hs_compare(a, b, s, false);
4456 }
4457
4458 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4459 {
4460     return float64_hs_compare(a, b, s, true);
4461 }
4462
4463 static FloatRelation QEMU_FLATTEN
4464 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4465 {
4466     FloatParts64 pa, pb;
4467
4468     bfloat16_unpack_canonical(&pa, a, s);
4469     bfloat16_unpack_canonical(&pb, b, s);
4470     return parts_compare(&pa, &pb, s, is_quiet);
4471 }
4472
4473 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4474 {
4475     return bfloat16_do_compare(a, b, s, false);
4476 }
4477
4478 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4479 {
4480     return bfloat16_do_compare(a, b, s, true);
4481 }
4482
4483 static FloatRelation QEMU_FLATTEN
4484 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4485 {
4486     FloatParts128 pa, pb;
4487
4488     float128_unpack_canonical(&pa, a, s);
4489     float128_unpack_canonical(&pb, b, s);
4490     return parts_compare(&pa, &pb, s, is_quiet);
4491 }
4492
4493 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4494 {
4495     return float128_do_compare(a, b, s, false);
4496 }
4497
4498 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4499 {
4500     return float128_do_compare(a, b, s, true);
4501 }
4502
4503 static FloatRelation QEMU_FLATTEN
4504 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4505 {
4506     FloatParts128 pa, pb;
4507
4508     if (!floatx80_unpack_canonical(&pa, a, s) ||
4509         !floatx80_unpack_canonical(&pb, b, s)) {
4510         return float_relation_unordered;
4511     }
4512     return parts_compare(&pa, &pb, s, is_quiet);
4513 }
4514
4515 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4516 {
4517     return floatx80_do_compare(a, b, s, false);
4518 }
4519
4520 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4521 {
4522     return floatx80_do_compare(a, b, s, true);
4523 }
4524
4525 /*
4526  * Scale by 2**N
4527  */
4528
4529 float16 float16_scalbn(float16 a, int n, float_status *status)
4530 {
4531     FloatParts64 p;
4532
4533     float16_unpack_canonical(&p, a, status);
4534     parts_scalbn(&p, n, status);
4535     return float16_round_pack_canonical(&p, status);
4536 }
4537
4538 float32 float32_scalbn(float32 a, int n, float_status *status)
4539 {
4540     FloatParts64 p;
4541
4542     float32_unpack_canonical(&p, a, status);
4543     parts_scalbn(&p, n, status);
4544     return float32_round_pack_canonical(&p, status);
4545 }
4546
4547 float64 float64_scalbn(float64 a, int n, float_status *status)
4548 {
4549     FloatParts64 p;
4550
4551     float64_unpack_canonical(&p, a, status);
4552     parts_scalbn(&p, n, status);
4553     return float64_round_pack_canonical(&p, status);
4554 }
4555
4556 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4557 {
4558     FloatParts64 p;
4559
4560     bfloat16_unpack_canonical(&p, a, status);
4561     parts_scalbn(&p, n, status);
4562     return bfloat16_round_pack_canonical(&p, status);
4563 }
4564
4565 float128 float128_scalbn(float128 a, int n, float_status *status)
4566 {
4567     FloatParts128 p;
4568
4569     float128_unpack_canonical(&p, a, status);
4570     parts_scalbn(&p, n, status);
4571     return float128_round_pack_canonical(&p, status);
4572 }
4573
4574 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4575 {
4576     FloatParts128 p;
4577
4578     if (!floatx80_unpack_canonical(&p, a, status)) {
4579         return floatx80_default_nan(status);
4580     }
4581     parts_scalbn(&p, n, status);
4582     return floatx80_round_pack_canonical(&p, status);
4583 }
4584
4585 /*
4586  * Square Root
4587  */
4588
4589 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4590 {
4591     FloatParts64 p;
4592
4593     float16_unpack_canonical(&p, a, status);
4594     parts_sqrt(&p, status, &float16_params);
4595     return float16_round_pack_canonical(&p, status);
4596 }
4597
4598 static float32 QEMU_SOFTFLOAT_ATTR
4599 soft_f32_sqrt(float32 a, float_status *status)
4600 {
4601     FloatParts64 p;
4602
4603     float32_unpack_canonical(&p, a, status);
4604     parts_sqrt(&p, status, &float32_params);
4605     return float32_round_pack_canonical(&p, status);
4606 }
4607
4608 static float64 QEMU_SOFTFLOAT_ATTR
4609 soft_f64_sqrt(float64 a, float_status *status)
4610 {
4611     FloatParts64 p;
4612
4613     float64_unpack_canonical(&p, a, status);
4614     parts_sqrt(&p, status, &float64_params);
4615     return float64_round_pack_canonical(&p, status);
4616 }
4617
4618 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4619 {
4620     union_float32 ua, ur;
4621
4622     ua.s = xa;
4623     if (unlikely(!can_use_fpu(s))) {
4624         goto soft;
4625     }
4626
4627     float32_input_flush1(&ua.s, s);
4628     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4629         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4630                        fpclassify(ua.h) == FP_ZERO) ||
4631                      signbit(ua.h))) {
4632             goto soft;
4633         }
4634     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4635                         float32_is_neg(ua.s))) {
4636         goto soft;
4637     }
4638     ur.h = sqrtf(ua.h);
4639     return ur.s;
4640
4641  soft:
4642     return soft_f32_sqrt(ua.s, s);
4643 }
4644
4645 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4646 {
4647     union_float64 ua, ur;
4648
4649     ua.s = xa;
4650     if (unlikely(!can_use_fpu(s))) {
4651         goto soft;
4652     }
4653
4654     float64_input_flush1(&ua.s, s);
4655     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4656         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4657                        fpclassify(ua.h) == FP_ZERO) ||
4658                      signbit(ua.h))) {
4659             goto soft;
4660         }
4661     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4662                         float64_is_neg(ua.s))) {
4663         goto soft;
4664     }
4665     ur.h = sqrt(ua.h);
4666     return ur.s;
4667
4668  soft:
4669     return soft_f64_sqrt(ua.s, s);
4670 }
4671
4672 float64 float64r32_sqrt(float64 a, float_status *status)
4673 {
4674     FloatParts64 p;
4675
4676     float64_unpack_canonical(&p, a, status);
4677     parts_sqrt(&p, status, &float64_params);
4678     return float64r32_round_pack_canonical(&p, status);
4679 }
4680
4681 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4682 {
4683     FloatParts64 p;
4684
4685     bfloat16_unpack_canonical(&p, a, status);
4686     parts_sqrt(&p, status, &bfloat16_params);
4687     return bfloat16_round_pack_canonical(&p, status);
4688 }
4689
4690 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4691 {
4692     FloatParts128 p;
4693
4694     float128_unpack_canonical(&p, a, status);
4695     parts_sqrt(&p, status, &float128_params);
4696     return float128_round_pack_canonical(&p, status);
4697 }
4698
4699 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4700 {
4701     FloatParts128 p;
4702
4703     if (!floatx80_unpack_canonical(&p, a, s)) {
4704         return floatx80_default_nan(s);
4705     }
4706     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4707     return floatx80_round_pack_canonical(&p, s);
4708 }
4709
4710 /*
4711  * log2
4712  */
4713 float32 float32_log2(float32 a, float_status *status)
4714 {
4715     FloatParts64 p;
4716
4717     float32_unpack_canonical(&p, a, status);
4718     parts_log2(&p, status, &float32_params);
4719     return float32_round_pack_canonical(&p, status);
4720 }
4721
4722 float64 float64_log2(float64 a, float_status *status)
4723 {
4724     FloatParts64 p;
4725
4726     float64_unpack_canonical(&p, a, status);
4727     parts_log2(&p, status, &float64_params);
4728     return float64_round_pack_canonical(&p, status);
4729 }
4730
4731 /*----------------------------------------------------------------------------
4732 | The pattern for a default generated NaN.
4733 *----------------------------------------------------------------------------*/
4734
4735 float16 float16_default_nan(float_status *status)
4736 {
4737     FloatParts64 p;
4738
4739     parts_default_nan(&p, status);
4740     p.frac >>= float16_params.frac_shift;
4741     return float16_pack_raw(&p);
4742 }
4743
4744 float32 float32_default_nan(float_status *status)
4745 {
4746     FloatParts64 p;
4747
4748     parts_default_nan(&p, status);
4749     p.frac >>= float32_params.frac_shift;
4750     return float32_pack_raw(&p);
4751 }
4752
4753 float64 float64_default_nan(float_status *status)
4754 {
4755     FloatParts64 p;
4756
4757     parts_default_nan(&p, status);
4758     p.frac >>= float64_params.frac_shift;
4759     return float64_pack_raw(&p);
4760 }
4761
4762 float128 float128_default_nan(float_status *status)
4763 {
4764     FloatParts128 p;
4765
4766     parts_default_nan(&p, status);
4767     frac_shr(&p, float128_params.frac_shift);
4768     return float128_pack_raw(&p);
4769 }
4770
4771 bfloat16 bfloat16_default_nan(float_status *status)
4772 {
4773     FloatParts64 p;
4774
4775     parts_default_nan(&p, status);
4776     p.frac >>= bfloat16_params.frac_shift;
4777     return bfloat16_pack_raw(&p);
4778 }
4779
4780 /*----------------------------------------------------------------------------
4781 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4782 *----------------------------------------------------------------------------*/
4783
4784 float16 float16_silence_nan(float16 a, float_status *status)
4785 {
4786     FloatParts64 p;
4787
4788     float16_unpack_raw(&p, a);
4789     p.frac <<= float16_params.frac_shift;
4790     parts_silence_nan(&p, status);
4791     p.frac >>= float16_params.frac_shift;
4792     return float16_pack_raw(&p);
4793 }
4794
4795 float32 float32_silence_nan(float32 a, float_status *status)
4796 {
4797     FloatParts64 p;
4798
4799     float32_unpack_raw(&p, a);
4800     p.frac <<= float32_params.frac_shift;
4801     parts_silence_nan(&p, status);
4802     p.frac >>= float32_params.frac_shift;
4803     return float32_pack_raw(&p);
4804 }
4805
4806 float64 float64_silence_nan(float64 a, float_status *status)
4807 {
4808     FloatParts64 p;
4809
4810     float64_unpack_raw(&p, a);
4811     p.frac <<= float64_params.frac_shift;
4812     parts_silence_nan(&p, status);
4813     p.frac >>= float64_params.frac_shift;
4814     return float64_pack_raw(&p);
4815 }
4816
4817 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4818 {
4819     FloatParts64 p;
4820
4821     bfloat16_unpack_raw(&p, a);
4822     p.frac <<= bfloat16_params.frac_shift;
4823     parts_silence_nan(&p, status);
4824     p.frac >>= bfloat16_params.frac_shift;
4825     return bfloat16_pack_raw(&p);
4826 }
4827
4828 float128 float128_silence_nan(float128 a, float_status *status)
4829 {
4830     FloatParts128 p;
4831
4832     float128_unpack_raw(&p, a);
4833     frac_shl(&p, float128_params.frac_shift);
4834     parts_silence_nan(&p, status);
4835     frac_shr(&p, float128_params.frac_shift);
4836     return float128_pack_raw(&p);
4837 }
4838
4839 /*----------------------------------------------------------------------------
4840 | If `a' is denormal and we are in flush-to-zero mode then set the
4841 | input-denormal exception and return zero. Otherwise just return the value.
4842 *----------------------------------------------------------------------------*/
4843
4844 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4845 {
4846     if (p.exp == 0 && p.frac != 0) {
4847         float_raise(float_flag_input_denormal, status);
4848         return true;
4849     }
4850
4851     return false;
4852 }
4853
4854 float16 float16_squash_input_denormal(float16 a, float_status *status)
4855 {
4856     if (status->flush_inputs_to_zero) {
4857         FloatParts64 p;
4858
4859         float16_unpack_raw(&p, a);
4860         if (parts_squash_denormal(p, status)) {
4861             return float16_set_sign(float16_zero, p.sign);
4862         }
4863     }
4864     return a;
4865 }
4866
4867 float32 float32_squash_input_denormal(float32 a, float_status *status)
4868 {
4869     if (status->flush_inputs_to_zero) {
4870         FloatParts64 p;
4871
4872         float32_unpack_raw(&p, a);
4873         if (parts_squash_denormal(p, status)) {
4874             return float32_set_sign(float32_zero, p.sign);
4875         }
4876     }
4877     return a;
4878 }
4879
4880 float64 float64_squash_input_denormal(float64 a, float_status *status)
4881 {
4882     if (status->flush_inputs_to_zero) {
4883         FloatParts64 p;
4884
4885         float64_unpack_raw(&p, a);
4886         if (parts_squash_denormal(p, status)) {
4887             return float64_set_sign(float64_zero, p.sign);
4888         }
4889     }
4890     return a;
4891 }
4892
4893 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4894 {
4895     if (status->flush_inputs_to_zero) {
4896         FloatParts64 p;
4897
4898         bfloat16_unpack_raw(&p, a);
4899         if (parts_squash_denormal(p, status)) {
4900             return bfloat16_set_sign(bfloat16_zero, p.sign);
4901         }
4902     }
4903     return a;
4904 }
4905
4906 /*----------------------------------------------------------------------------
4907 | Normalizes the subnormal extended double-precision floating-point value
4908 | represented by the denormalized significand `aSig'.  The normalized exponent
4909 | and significand are stored at the locations pointed to by `zExpPtr' and
4910 | `zSigPtr', respectively.
4911 *----------------------------------------------------------------------------*/
4912
4913 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4914                                 uint64_t *zSigPtr)
4915 {
4916     int8_t shiftCount;
4917
4918     shiftCount = clz64(aSig);
4919     *zSigPtr = aSig<<shiftCount;
4920     *zExpPtr = 1 - shiftCount;
4921 }
4922
4923 /*----------------------------------------------------------------------------
4924 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4925 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4926 | and returns the proper extended double-precision floating-point value
4927 | corresponding to the abstract input.  Ordinarily, the abstract value is
4928 | rounded and packed into the extended double-precision format, with the
4929 | inexact exception raised if the abstract input cannot be represented
4930 | exactly.  However, if the abstract value is too large, the overflow and
4931 | inexact exceptions are raised and an infinity or maximal finite value is
4932 | returned.  If the abstract value is too small, the input value is rounded to
4933 | a subnormal number, and the underflow and inexact exceptions are raised if
4934 | the abstract input cannot be represented exactly as a subnormal extended
4935 | double-precision floating-point number.
4936 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4937 | the result is rounded to the same number of bits as single or double
4938 | precision, respectively.  Otherwise, the result is rounded to the full
4939 | precision of the extended double-precision format.
4940 |     The input significand must be normalized or smaller.  If the input
4941 | significand is not normalized, `zExp' must be 0; in that case, the result
4942 | returned is a subnormal number, and it must not require rounding.  The
4943 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4944 | Floating-Point Arithmetic.
4945 *----------------------------------------------------------------------------*/
4946
4947 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4948                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4949                               float_status *status)
4950 {
4951     FloatRoundMode roundingMode;
4952     bool roundNearestEven, increment, isTiny;
4953     int64_t roundIncrement, roundMask, roundBits;
4954
4955     roundingMode = status->float_rounding_mode;
4956     roundNearestEven = ( roundingMode == float_round_nearest_even );
4957     switch (roundingPrecision) {
4958     case floatx80_precision_x:
4959         goto precision80;
4960     case floatx80_precision_d:
4961         roundIncrement = UINT64_C(0x0000000000000400);
4962         roundMask = UINT64_C(0x00000000000007FF);
4963         break;
4964     case floatx80_precision_s:
4965         roundIncrement = UINT64_C(0x0000008000000000);
4966         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4967         break;
4968     default:
4969         g_assert_not_reached();
4970     }
4971     zSig0 |= ( zSig1 != 0 );
4972     switch (roundingMode) {
4973     case float_round_nearest_even:
4974     case float_round_ties_away:
4975         break;
4976     case float_round_to_zero:
4977         roundIncrement = 0;
4978         break;
4979     case float_round_up:
4980         roundIncrement = zSign ? 0 : roundMask;
4981         break;
4982     case float_round_down:
4983         roundIncrement = zSign ? roundMask : 0;
4984         break;
4985     default:
4986         abort();
4987     }
4988     roundBits = zSig0 & roundMask;
4989     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4990         if (    ( 0x7FFE < zExp )
4991              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4992            ) {
4993             goto overflow;
4994         }
4995         if ( zExp <= 0 ) {
4996             if (status->flush_to_zero) {
4997                 float_raise(float_flag_output_denormal, status);
4998                 return packFloatx80(zSign, 0, 0);
4999             }
5000             isTiny = status->tininess_before_rounding
5001                   || (zExp < 0 )
5002                   || (zSig0 <= zSig0 + roundIncrement);
5003             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5004             zExp = 0;
5005             roundBits = zSig0 & roundMask;
5006             if (isTiny && roundBits) {
5007                 float_raise(float_flag_underflow, status);
5008             }
5009             if (roundBits) {
5010                 float_raise(float_flag_inexact, status);
5011             }
5012             zSig0 += roundIncrement;
5013             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5014             roundIncrement = roundMask + 1;
5015             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5016                 roundMask |= roundIncrement;
5017             }
5018             zSig0 &= ~ roundMask;
5019             return packFloatx80( zSign, zExp, zSig0 );
5020         }
5021     }
5022     if (roundBits) {
5023         float_raise(float_flag_inexact, status);
5024     }
5025     zSig0 += roundIncrement;
5026     if ( zSig0 < roundIncrement ) {
5027         ++zExp;
5028         zSig0 = UINT64_C(0x8000000000000000);
5029     }
5030     roundIncrement = roundMask + 1;
5031     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5032         roundMask |= roundIncrement;
5033     }
5034     zSig0 &= ~ roundMask;
5035     if ( zSig0 == 0 ) zExp = 0;
5036     return packFloatx80( zSign, zExp, zSig0 );
5037  precision80:
5038     switch (roundingMode) {
5039     case float_round_nearest_even:
5040     case float_round_ties_away:
5041         increment = ((int64_t)zSig1 < 0);
5042         break;
5043     case float_round_to_zero:
5044         increment = 0;
5045         break;
5046     case float_round_up:
5047         increment = !zSign && zSig1;
5048         break;
5049     case float_round_down:
5050         increment = zSign && zSig1;
5051         break;
5052     default:
5053         abort();
5054     }
5055     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5056         if (    ( 0x7FFE < zExp )
5057              || (    ( zExp == 0x7FFE )
5058                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5059                   && increment
5060                 )
5061            ) {
5062             roundMask = 0;
5063  overflow:
5064             float_raise(float_flag_overflow | float_flag_inexact, status);
5065             if (    ( roundingMode == float_round_to_zero )
5066                  || ( zSign && ( roundingMode == float_round_up ) )
5067                  || ( ! zSign && ( roundingMode == float_round_down ) )
5068                ) {
5069                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5070             }
5071             return packFloatx80(zSign,
5072                                 floatx80_infinity_high,
5073                                 floatx80_infinity_low);
5074         }
5075         if ( zExp <= 0 ) {
5076             isTiny = status->tininess_before_rounding
5077                   || (zExp < 0)
5078                   || !increment
5079                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5080             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5081             zExp = 0;
5082             if (isTiny && zSig1) {
5083                 float_raise(float_flag_underflow, status);
5084             }
5085             if (zSig1) {
5086                 float_raise(float_flag_inexact, status);
5087             }
5088             switch (roundingMode) {
5089             case float_round_nearest_even:
5090             case float_round_ties_away:
5091                 increment = ((int64_t)zSig1 < 0);
5092                 break;
5093             case float_round_to_zero:
5094                 increment = 0;
5095                 break;
5096             case float_round_up:
5097                 increment = !zSign && zSig1;
5098                 break;
5099             case float_round_down:
5100                 increment = zSign && zSig1;
5101                 break;
5102             default:
5103                 abort();
5104             }
5105             if ( increment ) {
5106                 ++zSig0;
5107                 if (!(zSig1 << 1) && roundNearestEven) {
5108                     zSig0 &= ~1;
5109                 }
5110                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5111             }
5112             return packFloatx80( zSign, zExp, zSig0 );
5113         }
5114     }
5115     if (zSig1) {
5116         float_raise(float_flag_inexact, status);
5117     }
5118     if ( increment ) {
5119         ++zSig0;
5120         if ( zSig0 == 0 ) {
5121             ++zExp;
5122             zSig0 = UINT64_C(0x8000000000000000);
5123         }
5124         else {
5125             if (!(zSig1 << 1) && roundNearestEven) {
5126                 zSig0 &= ~1;
5127             }
5128         }
5129     }
5130     else {
5131         if ( zSig0 == 0 ) zExp = 0;
5132     }
5133     return packFloatx80( zSign, zExp, zSig0 );
5134
5135 }
5136
5137 /*----------------------------------------------------------------------------
5138 | Takes an abstract floating-point value having sign `zSign', exponent
5139 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5140 | and returns the proper extended double-precision floating-point value
5141 | corresponding to the abstract input.  This routine is just like
5142 | `roundAndPackFloatx80' except that the input significand does not have to be
5143 | normalized.
5144 *----------------------------------------------------------------------------*/
5145
5146 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5147                                        bool zSign, int32_t zExp,
5148                                        uint64_t zSig0, uint64_t zSig1,
5149                                        float_status *status)
5150 {
5151     int8_t shiftCount;
5152
5153     if ( zSig0 == 0 ) {
5154         zSig0 = zSig1;
5155         zSig1 = 0;
5156         zExp -= 64;
5157     }
5158     shiftCount = clz64(zSig0);
5159     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5160     zExp -= shiftCount;
5161     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5162                                 zSig0, zSig1, status);
5163
5164 }
5165
5166 /*----------------------------------------------------------------------------
5167 | Returns the binary exponential of the single-precision floating-point value
5168 | `a'. The operation is performed according to the IEC/IEEE Standard for
5169 | Binary Floating-Point Arithmetic.
5170 |
5171 | Uses the following identities:
5172 |
5173 | 1. -------------------------------------------------------------------------
5174 |      x    x*ln(2)
5175 |     2  = e
5176 |
5177 | 2. -------------------------------------------------------------------------
5178 |                      2     3     4     5           n
5179 |      x        x     x     x     x     x           x
5180 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5181 |               1!    2!    3!    4!    5!          n!
5182 *----------------------------------------------------------------------------*/
5183
5184 static const float64 float32_exp2_coefficients[15] =
5185 {
5186     const_float64( 0x3ff0000000000000ll ), /*  1 */
5187     const_float64( 0x3fe0000000000000ll ), /*  2 */
5188     const_float64( 0x3fc5555555555555ll ), /*  3 */
5189     const_float64( 0x3fa5555555555555ll ), /*  4 */
5190     const_float64( 0x3f81111111111111ll ), /*  5 */
5191     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5192     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5193     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5194     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5195     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5196     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5197     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5198     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5199     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5200     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5201 };
5202
5203 float32 float32_exp2(float32 a, float_status *status)
5204 {
5205     FloatParts64 xp, xnp, tp, rp;
5206     int i;
5207
5208     float32_unpack_canonical(&xp, a, status);
5209     if (unlikely(xp.cls != float_class_normal)) {
5210         switch (xp.cls) {
5211         case float_class_snan:
5212         case float_class_qnan:
5213             parts_return_nan(&xp, status);
5214             return float32_round_pack_canonical(&xp, status);
5215         case float_class_inf:
5216             return xp.sign ? float32_zero : a;
5217         case float_class_zero:
5218             return float32_one;
5219         default:
5220             break;
5221         }
5222         g_assert_not_reached();
5223     }
5224
5225     float_raise(float_flag_inexact, status);
5226
5227     float64_unpack_canonical(&tp, float64_ln2, status);
5228     xp = *parts_mul(&xp, &tp, status);
5229     xnp = xp;
5230
5231     float64_unpack_canonical(&rp, float64_one, status);
5232     for (i = 0 ; i < 15 ; i++) {
5233         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5234         rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5235         xnp = *parts_mul(&xnp, &xp, status);
5236     }
5237
5238     return float32_round_pack_canonical(&rp, status);
5239 }
5240
5241 /*----------------------------------------------------------------------------
5242 | Rounds the extended double-precision floating-point value `a'
5243 | to the precision provided by floatx80_rounding_precision and returns the
5244 | result as an extended double-precision floating-point value.
5245 | The operation is performed according to the IEC/IEEE Standard for Binary
5246 | Floating-Point Arithmetic.
5247 *----------------------------------------------------------------------------*/
5248
5249 floatx80 floatx80_round(floatx80 a, float_status *status)
5250 {
5251     FloatParts128 p;
5252
5253     if (!floatx80_unpack_canonical(&p, a, status)) {
5254         return floatx80_default_nan(status);
5255     }
5256     return floatx80_round_pack_canonical(&p, status);
5257 }
5258
5259 static void __attribute__((constructor)) softfloat_init(void)
5260 {
5261     union_float64 ua, ub, uc, ur;
5262
5263     if (QEMU_NO_HARDFLOAT) {
5264         return;
5265     }
5266     /*
5267      * Test that the host's FMA is not obviously broken. For example,
5268      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5269      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5270      */
5271     ua.s = 0x0020000000000001ULL;
5272     ub.s = 0x3ca0000000000000ULL;
5273     uc.s = 0x0020000000000000ULL;
5274     ur.h = fma(ua.h, ub.h, uc.h);
5275     if (ur.s != 0x0020000000000001ULL) {
5276         force_soft_fma = true;
5277     }
5278 }