fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439     /*
 440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441      * operations.
 442      */
 443     minmax_isnumber = 8,
 444 };
 445
 446 /* Simple helpers for checking if, or what kind of, NaN we have */
 447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448 {
 449     return unlikely(c >= float_class_qnan);
 450 }
 451
 452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453 {
 454     return c == float_class_snan;
 455 }
 456
 457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458 {
 459     return c == float_class_qnan;
 460 }
 461
 462 /*
 463  * Structure holding all of the decomposed parts of a float.
 464  * The exponent is unbiased and the fraction is normalized.
 465  *
 466  * The fraction words are stored in big-endian word ordering,
 467  * so that truncation from a larger format to a smaller format
 468  * can be done simply by ignoring subsequent elements.
 469  */
 470
 471 typedef struct {
 472     FloatClass cls;
 473     bool sign;
 474     int32_t exp;
 475     union {
 476         /* Routines that know the structure may reference the singular name. */
 477         uint64_t frac;
 478         /*
 479          * Routines expanded with multiple structures reference "hi" and "lo"
 480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481          * both the same word and aliased here.
 482          */
 483         uint64_t frac_hi;
 484         uint64_t frac_lo;
 485     };
 486 } FloatParts64;
 487
 488 typedef struct {
 489     FloatClass cls;
 490     bool sign;
 491     int32_t exp;
 492     uint64_t frac_hi;
 493     uint64_t frac_lo;
 494 } FloatParts128;
 495
 496 typedef struct {
 497     FloatClass cls;
 498     bool sign;
 499     int32_t exp;
 500     uint64_t frac_hi;
 501     uint64_t frac_hm;  /* high-middle */
 502     uint64_t frac_lm;  /* low-middle */
 503     uint64_t frac_lo;
 504 } FloatParts256;
 505
 506 /* These apply to the most significant word of each FloatPartsN. */
 507 #define DECOMPOSED_BINARY_POINT    63
 508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510 /* Structure holding all of the relevant parameters for a format.
 511  *   exp_size: the size of the exponent field
 512  *   exp_bias: the offset applied to the exponent field
 513  *   exp_max: the maximum normalised exponent
 514  *   frac_size: the size of the fraction field
 515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516  * The following are computed based the size of fraction
 517  *   round_mask: bits below lsb which must be rounded
 518  * The following optional modifiers are available:
 519  *   arm_althp: handle ARM Alternative Half Precision
 520  */
 521 typedef struct {
 522     int exp_size;
 523     int exp_bias;
 524     int exp_max;
 525     int frac_size;
 526     int frac_shift;
 527     bool arm_althp;
 528     uint64_t round_mask;
 529 } FloatFmt;
 530
 531 /* Expand fields based on the size of exponent and fraction */
 532 #define FLOAT_PARAMS_(E)                                \
 533     .exp_size       = E,                                \
 534     .exp_bias       = ((1 << E) - 1) >> 1,              \
 535     .exp_max        = (1 << E) - 1
 536
 537 #define FLOAT_PARAMS(E, F)                              \
 538     FLOAT_PARAMS_(E),                                   \
 539     .frac_size      = F,                                \
 540     .frac_shift     = (-F - 1) & 63,                    \
 541     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 542
 543 static const FloatFmt float16_params = {
 544     FLOAT_PARAMS(5, 10)
 545 };
 546
 547 static const FloatFmt float16_params_ahp = {
 548     FLOAT_PARAMS(5, 10),
 549     .arm_althp = true
 550 };
 551
 552 static const FloatFmt bfloat16_params = {
 553     FLOAT_PARAMS(8, 7)
 554 };
 555
 556 static const FloatFmt float32_params = {
 557     FLOAT_PARAMS(8, 23)
 558 };
 559
 560 static const FloatFmt float64_params = {
 561     FLOAT_PARAMS(11, 52)
 562 };
 563
 564 static const FloatFmt float128_params = {
 565     FLOAT_PARAMS(15, 112)
 566 };
 567
 568 #define FLOATX80_PARAMS(R)              \
 569     FLOAT_PARAMS_(15),                  \
 570     .frac_size = R == 64 ? 63 : R,      \
 571     .frac_shift = 0,                    \
 572     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 573
 574 static const FloatFmt floatx80_params[3] = {
 575     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 576     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 577     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 578 };
 579
 580 /* Unpack a float to parts, but do not canonicalize.  */
 581 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 582 {
 583     const int f_size = fmt->frac_size;
 584     const int e_size = fmt->exp_size;
 585
 586     *r = (FloatParts64) {
 587         .cls = float_class_unclassified,
 588         .sign = extract64(raw, f_size + e_size, 1),
 589         .exp = extract64(raw, f_size, e_size),
 590         .frac = extract64(raw, 0, f_size)
 591     };
 592 }
 593
 594 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 595 {
 596     unpack_raw64(p, &float16_params, f);
 597 }
 598
 599 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 600 {
 601     unpack_raw64(p, &bfloat16_params, f);
 602 }
 603
 604 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 605 {
 606     unpack_raw64(p, &float32_params, f);
 607 }
 608
 609 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 610 {
 611     unpack_raw64(p, &float64_params, f);
 612 }
 613
 614 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 615 {
 616     *p = (FloatParts128) {
 617         .cls = float_class_unclassified,
 618         .sign = extract32(f.high, 15, 1),
 619         .exp = extract32(f.high, 0, 15),
 620         .frac_hi = f.low
 621     };
 622 }
 623
 624 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 625 {
 626     const int f_size = float128_params.frac_size - 64;
 627     const int e_size = float128_params.exp_size;
 628
 629     *p = (FloatParts128) {
 630         .cls = float_class_unclassified,
 631         .sign = extract64(f.high, f_size + e_size, 1),
 632         .exp = extract64(f.high, f_size, e_size),
 633         .frac_hi = extract64(f.high, 0, f_size),
 634         .frac_lo = f.low,
 635     };
 636 }
 637
 638 /* Pack a float from parts, but do not canonicalize.  */
 639 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 640 {
 641     const int f_size = fmt->frac_size;
 642     const int e_size = fmt->exp_size;
 643     uint64_t ret;
 644
 645     ret = (uint64_t)p->sign << (f_size + e_size);
 646     ret = deposit64(ret, f_size, e_size, p->exp);
 647     ret = deposit64(ret, 0, f_size, p->frac);
 648     return ret;
 649 }
 650
 651 static inline float16 float16_pack_raw(const FloatParts64 *p)
 652 {
 653     return make_float16(pack_raw64(p, &float16_params));
 654 }
 655
 656 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 657 {
 658     return pack_raw64(p, &bfloat16_params);
 659 }
 660
 661 static inline float32 float32_pack_raw(const FloatParts64 *p)
 662 {
 663     return make_float32(pack_raw64(p, &float32_params));
 664 }
 665
 666 static inline float64 float64_pack_raw(const FloatParts64 *p)
 667 {
 668     return make_float64(pack_raw64(p, &float64_params));
 669 }
 670
 671 static float128 float128_pack_raw(const FloatParts128 *p)
 672 {
 673     const int f_size = float128_params.frac_size - 64;
 674     const int e_size = float128_params.exp_size;
 675     uint64_t hi;
 676
 677     hi = (uint64_t)p->sign << (f_size + e_size);
 678     hi = deposit64(hi, f_size, e_size, p->exp);
 679     hi = deposit64(hi, 0, f_size, p->frac_hi);
 680     return make_float128(hi, p->frac_lo);
 681 }
 682
 683 /*----------------------------------------------------------------------------
 684 | Functions and definitions to determine:  (1) whether tininess for underflow
 685 | is detected before or after rounding by default, (2) what (if anything)
 686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 688 | are propagated from function inputs to output.  These details are target-
 689 | specific.
 690 *----------------------------------------------------------------------------*/
 691 #include "softfloat-specialize.c.inc"
 692
 693 #define PARTS_GENERIC_64_128(NAME, P) \
 694     _Generic((P), FloatParts64 *: parts64_##NAME, \
 695                   FloatParts128 *: parts128_##NAME)
 696
 697 #define PARTS_GENERIC_64_128_256(NAME, P) \
 698     _Generic((P), FloatParts64 *: parts64_##NAME, \
 699                   FloatParts128 *: parts128_##NAME, \
 700                   FloatParts256 *: parts256_##NAME)
 701
 702 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 703 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 704
 705 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 706 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 707
 708 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 709
 710 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 711                                       float_status *s);
 712 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 713                                         float_status *s);
 714
 715 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 716
 717 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 718                                              FloatParts64 *c, float_status *s,
 719                                              int ab_mask, int abc_mask);
 720 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 721                                                FloatParts128 *b,
 722                                                FloatParts128 *c,
 723                                                float_status *s,
 724                                                int ab_mask, int abc_mask);
 725
 726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 727     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 728
 729 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 730                                  const FloatFmt *fmt);
 731 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 732                                   const FloatFmt *fmt);
 733
 734 #define parts_canonicalize(A, S, F) \
 735     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 736
 737 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 738                                    const FloatFmt *fmt);
 739 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 740                                     const FloatFmt *fmt);
 741
 742 #define parts_uncanon_normal(A, S, F) \
 743     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 744
 745 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 746                             const FloatFmt *fmt);
 747 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 748                              const FloatFmt *fmt);
 749
 750 #define parts_uncanon(A, S, F) \
 751     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 752
 753 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 754 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 755 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757 #define parts_add_normal(A, B) \
 758     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 759
 760 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 761 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 762 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 763
 764 #define parts_sub_normal(A, B) \
 765     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 766
 767 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 768                                     float_status *s, bool subtract);
 769 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 770                                       float_status *s, bool subtract);
 771
 772 #define parts_addsub(A, B, S, Z) \
 773     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 774
 775 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 776                                  float_status *s);
 777 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 778                                    float_status *s);
 779
 780 #define parts_mul(A, B, S) \
 781     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 782
 783 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 784                                     FloatParts64 *c, int flags,
 785                                     float_status *s);
 786 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 787                                       FloatParts128 *c, int flags,
 788                                       float_status *s);
 789
 790 #define parts_muladd(A, B, C, Z, S) \
 791     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 792
 793 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 794                                  float_status *s);
 795 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 796                                    float_status *s);
 797
 798 #define parts_div(A, B, S) \
 799     PARTS_GENERIC_64_128(div, A)(A, B, S)
 800
 801 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 802                                     uint64_t *mod_quot, float_status *s);
 803 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 804                                       uint64_t *mod_quot, float_status *s);
 805
 806 #define parts_modrem(A, B, Q, S) \
 807     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 808
 809 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 810 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 811
 812 #define parts_sqrt(A, S, F) \
 813     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 814
 815 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 816                                         int scale, int frac_size);
 817 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 818                                          int scale, int frac_size);
 819
 820 #define parts_round_to_int_normal(A, R, C, F) \
 821     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 822
 823 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 824                                  int scale, float_status *s,
 825                                  const FloatFmt *fmt);
 826 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 827                                   int scale, float_status *s,
 828                                   const FloatFmt *fmt);
 829
 830 #define parts_round_to_int(A, R, C, S, F) \
 831     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 832
 833 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 834                                      int scale, int64_t min, int64_t max,
 835                                      float_status *s);
 836 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 837                                      int scale, int64_t min, int64_t max,
 838                                      float_status *s);
 839
 840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 841     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 842
 843 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 844                                       int scale, uint64_t max,
 845                                       float_status *s);
 846 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 847                                        int scale, uint64_t max,
 848                                        float_status *s);
 849
 850 #define parts_float_to_uint(P, R, Z, M, S) \
 851     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 852
 853 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 854                                   int scale, float_status *s);
 855 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 856                                    int scale, float_status *s);
 857
 858 #define parts_sint_to_float(P, I, Z, S) \
 859     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 860
 861 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 862                                   int scale, float_status *s);
 863 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 864                                    int scale, float_status *s);
 865
 866 #define parts_uint_to_float(P, I, Z, S) \
 867     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 868
 869 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 870                                     float_status *s, int flags);
 871 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 872                                       float_status *s, int flags);
 873
 874 #define parts_minmax(A, B, S, F) \
 875     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 876
 877 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 878                            float_status *s, bool q);
 879 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 880                             float_status *s, bool q);
 881
 882 #define parts_compare(A, B, S, Q) \
 883     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 884
 885 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 886 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 887
 888 #define parts_scalbn(A, N, S) \
 889     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 890
 891 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 892 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 893
 894 #define parts_log2(A, S, F) \
 895     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 896
 897 /*
 898  * Helper functions for softfloat-parts.c.inc, per-size operations.
 899  */
 900
 901 #define FRAC_GENERIC_64_128(NAME, P) \
 902     _Generic((P), FloatParts64 *: frac64_##NAME, \
 903                   FloatParts128 *: frac128_##NAME)
 904
 905 #define FRAC_GENERIC_64_128_256(NAME, P) \
 906     _Generic((P), FloatParts64 *: frac64_##NAME, \
 907                   FloatParts128 *: frac128_##NAME, \
 908                   FloatParts256 *: frac256_##NAME)
 909
 910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911 {
 912     return uadd64_overflow(a->frac, b->frac, &r->frac);
 913 }
 914
 915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916 {
 917     bool c = 0;
 918     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920     return c;
 921 }
 922
 923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924 {
 925     bool c = 0;
 926     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930     return c;
 931 }
 932
 933 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936 {
 937     return uadd64_overflow(a->frac, c, &r->frac);
 938 }
 939
 940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941 {
 942     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944 }
 945
 946 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948 static void frac64_allones(FloatParts64 *a)
 949 {
 950     a->frac = -1;
 951 }
 952
 953 static void frac128_allones(FloatParts128 *a)
 954 {
 955     a->frac_hi = a->frac_lo = -1;
 956 }
 957
 958 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961 {
 962     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 963 }
 964
 965 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 966 {
 967     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 968     if (ta == tb) {
 969         ta = a->frac_lo, tb = b->frac_lo;
 970         if (ta == tb) {
 971             return 0;
 972         }
 973     }
 974     return ta < tb ? -1 : 1;
 975 }
 976
 977 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 978
 979 static void frac64_clear(FloatParts64 *a)
 980 {
 981     a->frac = 0;
 982 }
 983
 984 static void frac128_clear(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = 0;
 987 }
 988
 989 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 990
 991 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     uint64_t n1, n0, r, q;
 994     bool ret;
 995
 996     /*
 997      * We want a 2*N / N-bit division to produce exactly an N-bit
 998      * result, so that we do not lose any precision and so that we
 999      * do not have to renormalize afterward.  If A.frac < B.frac,
1000      * then division would produce an (N-1)-bit result; shift A left
1001      * by one to produce the an N-bit result, and return true to
1002      * decrement the exponent to match.
1003      *
1004      * The udiv_qrnnd algorithm that we're using requires normalization,
1005      * i.e. the msb of the denominator must be set, which is already true.
1006      */
1007     ret = a->frac < b->frac;
1008     if (ret) {
1009         n0 = a->frac;
1010         n1 = 0;
1011     } else {
1012         n0 = a->frac >> 1;
1013         n1 = a->frac << 63;
1014     }
1015     q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017     /* Set lsb if there is a remainder, to set inexact. */
1018     a->frac = q | (r != 0);
1019
1020     return ret;
1021 }
1022
1023 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024 {
1025     uint64_t q0, q1, a0, a1, b0, b1;
1026     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027     bool ret = false;
1028
1029     a0 = a->frac_hi, a1 = a->frac_lo;
1030     b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032     ret = lt128(a0, a1, b0, b1);
1033     if (!ret) {
1034         a1 = shr_double(a0, a1, 1);
1035         a0 = a0 >> 1;
1036     }
1037
1038     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039     q0 = estimateDiv128To64(a0, a1, b0);
1040
1041     /*
1042      * Estimate is high because B1 was not included (unless B1 == 0).
1043      * Reduce quotient and increase remainder until remainder is non-negative.
1044      * This loop will execute 0 to 2 times.
1045      */
1046     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048     while (r0 != 0) {
1049         q0--;
1050         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051     }
1052
1053     /* Repeat using the remainder, producing a second word of quotient. */
1054     q1 = estimateDiv128To64(r1, r2, b0);
1055     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057     while (r1 != 0) {
1058         q1--;
1059         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060     }
1061
1062     /* Any remainder indicates inexact; set sticky bit. */
1063     q1 |= (r2 | r3) != 0;
1064
1065     a->frac_hi = q0;
1066     a->frac_lo = q1;
1067     return ret;
1068 }
1069
1070 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071
1072 static bool frac64_eqz(FloatParts64 *a)
1073 {
1074     return a->frac == 0;
1075 }
1076
1077 static bool frac128_eqz(FloatParts128 *a)
1078 {
1079     return (a->frac_hi | a->frac_lo) == 0;
1080 }
1081
1082 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083
1084 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085 {
1086     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087 }
1088
1089 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090 {
1091     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093 }
1094
1095 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
1097 static void frac64_neg(FloatParts64 *a)
1098 {
1099     a->frac = -a->frac;
1100 }
1101
1102 static void frac128_neg(FloatParts128 *a)
1103 {
1104     bool c = 0;
1105     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 static void frac256_neg(FloatParts256 *a)
1110 {
1111     bool c = 0;
1112     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116 }
1117
1118 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119
1120 static int frac64_normalize(FloatParts64 *a)
1121 {
1122     if (a->frac) {
1123         int shift = clz64(a->frac);
1124         a->frac <<= shift;
1125         return shift;
1126     }
1127     return 64;
1128 }
1129
1130 static int frac128_normalize(FloatParts128 *a)
1131 {
1132     if (a->frac_hi) {
1133         int shl = clz64(a->frac_hi);
1134         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135         a->frac_lo <<= shl;
1136         return shl;
1137     } else if (a->frac_lo) {
1138         int shl = clz64(a->frac_lo);
1139         a->frac_hi = a->frac_lo << shl;
1140         a->frac_lo = 0;
1141         return shl + 64;
1142     }
1143     return 128;
1144 }
1145
1146 static int frac256_normalize(FloatParts256 *a)
1147 {
1148     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150     int ret, shl;
1151
1152     if (likely(a0)) {
1153         shl = clz64(a0);
1154         if (shl == 0) {
1155             return 0;
1156         }
1157         ret = shl;
1158     } else {
1159         if (a1) {
1160             ret = 64;
1161             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162         } else if (a2) {
1163             ret = 128;
1164             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165         } else if (a3) {
1166             ret = 192;
1167             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168         } else {
1169             ret = 256;
1170             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171             goto done;
1172         }
1173         shl = clz64(a0);
1174         if (shl == 0) {
1175             goto done;
1176         }
1177         ret += shl;
1178     }
1179
1180     a0 = shl_double(a0, a1, shl);
1181     a1 = shl_double(a1, a2, shl);
1182     a2 = shl_double(a2, a3, shl);
1183     a3 <<= shl;
1184
1185  done:
1186     a->frac_hi = a0;
1187     a->frac_hm = a1;
1188     a->frac_lm = a2;
1189     a->frac_lo = a3;
1190     return ret;
1191 }
1192
1193 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194
1195 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196 {
1197     uint64_t a0, a1, b0, t0, t1, q, quot;
1198     int exp_diff = a->exp - b->exp;
1199     int shift;
1200
1201     a0 = a->frac;
1202     a1 = 0;
1203
1204     if (exp_diff < -1) {
1205         if (mod_quot) {
1206             *mod_quot = 0;
1207         }
1208         return;
1209     }
1210     if (exp_diff == -1) {
1211         a0 >>= 1;
1212         exp_diff = 0;
1213     }
1214
1215     b0 = b->frac;
1216     quot = q = b0 <= a0;
1217     if (q) {
1218         a0 -= b0;
1219     }
1220
1221     exp_diff -= 64;
1222     while (exp_diff > 0) {
1223         q = estimateDiv128To64(a0, a1, b0);
1224         q = q > 2 ? q - 2 : 0;
1225         mul64To128(b0, q, &t0, &t1);
1226         sub128(a0, a1, t0, t1, &a0, &a1);
1227         shortShift128Left(a0, a1, 62, &a0, &a1);
1228         exp_diff -= 62;
1229         quot = (quot << 62) + q;
1230     }
1231
1232     exp_diff += 64;
1233     if (exp_diff > 0) {
1234         q = estimateDiv128To64(a0, a1, b0);
1235         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237         sub128(a0, a1, t0, t1, &a0, &a1);
1238         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239         while (le128(t0, t1, a0, a1)) {
1240             ++q;
1241             sub128(a0, a1, t0, t1, &a0, &a1);
1242         }
1243         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244     } else {
1245         t0 = b0;
1246         t1 = 0;
1247     }
1248
1249     if (mod_quot) {
1250         *mod_quot = quot;
1251     } else {
1252         sub128(t0, t1, a0, a1, &t0, &t1);
1253         if (lt128(t0, t1, a0, a1) ||
1254             (eq128(t0, t1, a0, a1) && (q & 1))) {
1255             a0 = t0;
1256             a1 = t1;
1257             a->sign = !a->sign;
1258         }
1259     }
1260
1261     if (likely(a0)) {
1262         shift = clz64(a0);
1263         shortShift128Left(a0, a1, shift, &a0, &a1);
1264     } else if (likely(a1)) {
1265         shift = clz64(a1);
1266         a0 = a1 << shift;
1267         a1 = 0;
1268         shift += 64;
1269     } else {
1270         a->cls = float_class_zero;
1271         return;
1272     }
1273
1274     a->exp = b->exp + exp_diff - shift;
1275     a->frac = a0 | (a1 != 0);
1276 }
1277
1278 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279                            uint64_t *mod_quot)
1280 {
1281     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282     int exp_diff = a->exp - b->exp;
1283     int shift;
1284
1285     a0 = a->frac_hi;
1286     a1 = a->frac_lo;
1287     a2 = 0;
1288
1289     if (exp_diff < -1) {
1290         if (mod_quot) {
1291             *mod_quot = 0;
1292         }
1293         return;
1294     }
1295     if (exp_diff == -1) {
1296         shift128Right(a0, a1, 1, &a0, &a1);
1297         exp_diff = 0;
1298     }
1299
1300     b0 = b->frac_hi;
1301     b1 = b->frac_lo;
1302
1303     quot = q = le128(b0, b1, a0, a1);
1304     if (q) {
1305         sub128(a0, a1, b0, b1, &a0, &a1);
1306     }
1307
1308     exp_diff -= 64;
1309     while (exp_diff > 0) {
1310         q = estimateDiv128To64(a0, a1, b0);
1311         q = q > 4 ? q - 4 : 0;
1312         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315         exp_diff -= 61;
1316         quot = (quot << 61) + q;
1317     }
1318
1319     exp_diff += 64;
1320     if (exp_diff > 0) {
1321         q = estimateDiv128To64(a0, a1, b0);
1322         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326         while (le192(t0, t1, t2, a0, a1, a2)) {
1327             ++q;
1328             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329         }
1330         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331     } else {
1332         t0 = b0;
1333         t1 = b1;
1334         t2 = 0;
1335     }
1336
1337     if (mod_quot) {
1338         *mod_quot = quot;
1339     } else {
1340         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341         if (lt192(t0, t1, t2, a0, a1, a2) ||
1342             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343             a0 = t0;
1344             a1 = t1;
1345             a2 = t2;
1346             a->sign = !a->sign;
1347         }
1348     }
1349
1350     if (likely(a0)) {
1351         shift = clz64(a0);
1352         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353     } else if (likely(a1)) {
1354         shift = clz64(a1);
1355         shortShift128Left(a1, a2, shift, &a0, &a1);
1356         a2 = 0;
1357         shift += 64;
1358     } else if (likely(a2)) {
1359         shift = clz64(a2);
1360         a0 = a2 << shift;
1361         a1 = a2 = 0;
1362         shift += 128;
1363     } else {
1364         a->cls = float_class_zero;
1365         return;
1366     }
1367
1368     a->exp = b->exp + exp_diff - shift;
1369     a->frac_hi = a0;
1370     a->frac_lo = a1 | (a2 != 0);
1371 }
1372
1373 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374
1375 static void frac64_shl(FloatParts64 *a, int c)
1376 {
1377     a->frac <<= c;
1378 }
1379
1380 static void frac128_shl(FloatParts128 *a, int c)
1381 {
1382     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383
1384     if (c & 64) {
1385         a0 = a1, a1 = 0;
1386     }
1387
1388     c &= 63;
1389     if (c) {
1390         a0 = shl_double(a0, a1, c);
1391         a1 = a1 << c;
1392     }
1393
1394     a->frac_hi = a0;
1395     a->frac_lo = a1;
1396 }
1397
1398 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1399
1400 static void frac64_shr(FloatParts64 *a, int c)
1401 {
1402     a->frac >>= c;
1403 }
1404
1405 static void frac128_shr(FloatParts128 *a, int c)
1406 {
1407     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408
1409     if (c & 64) {
1410         a1 = a0, a0 = 0;
1411     }
1412
1413     c &= 63;
1414     if (c) {
1415         a1 = shr_double(a0, a1, c);
1416         a0 = a0 >> c;
1417     }
1418
1419     a->frac_hi = a0;
1420     a->frac_lo = a1;
1421 }
1422
1423 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1424
1425 static void frac64_shrjam(FloatParts64 *a, int c)
1426 {
1427     uint64_t a0 = a->frac;
1428
1429     if (likely(c != 0)) {
1430         if (likely(c < 64)) {
1431             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432         } else {
1433             a0 = a0 != 0;
1434         }
1435         a->frac = a0;
1436     }
1437 }
1438
1439 static void frac128_shrjam(FloatParts128 *a, int c)
1440 {
1441     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442     uint64_t sticky = 0;
1443
1444     if (unlikely(c == 0)) {
1445         return;
1446     } else if (likely(c < 64)) {
1447         /* nothing */
1448     } else if (likely(c < 128)) {
1449         sticky = a1;
1450         a1 = a0;
1451         a0 = 0;
1452         c &= 63;
1453         if (c == 0) {
1454             goto done;
1455         }
1456     } else {
1457         sticky = a0 | a1;
1458         a0 = a1 = 0;
1459         goto done;
1460     }
1461
1462     sticky |= shr_double(a1, 0, c);
1463     a1 = shr_double(a0, a1, c);
1464     a0 = a0 >> c;
1465
1466  done:
1467     a->frac_lo = a1 | (sticky != 0);
1468     a->frac_hi = a0;
1469 }
1470
1471 static void frac256_shrjam(FloatParts256 *a, int c)
1472 {
1473     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475     uint64_t sticky = 0;
1476
1477     if (unlikely(c == 0)) {
1478         return;
1479     } else if (likely(c < 64)) {
1480         /* nothing */
1481     } else if (likely(c < 256)) {
1482         if (unlikely(c & 128)) {
1483             sticky |= a2 | a3;
1484             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485         }
1486         if (unlikely(c & 64)) {
1487             sticky |= a3;
1488             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489         }
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1 | a2 | a3;
1496         a0 = a1 = a2 = a3 = 0;
1497         goto done;
1498     }
1499
1500     sticky |= shr_double(a3, 0, c);
1501     a3 = shr_double(a2, a3, c);
1502     a2 = shr_double(a1, a2, c);
1503     a1 = shr_double(a0, a1, c);
1504     a0 = a0 >> c;
1505
1506  done:
1507     a->frac_lo = a3 | (sticky != 0);
1508     a->frac_lm = a2;
1509     a->frac_hm = a1;
1510     a->frac_hi = a0;
1511 }
1512
1513 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1514
1515 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516 {
1517     return usub64_overflow(a->frac, b->frac, &r->frac);
1518 }
1519
1520 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521 {
1522     bool c = 0;
1523     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525     return c;
1526 }
1527
1528 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529 {
1530     bool c = 0;
1531     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535     return c;
1536 }
1537
1538 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1539
1540 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541 {
1542     r->frac = a->frac_hi | (a->frac_lo != 0);
1543 }
1544
1545 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546 {
1547     r->frac_hi = a->frac_hi;
1548     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549 }
1550
1551 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552
1553 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554 {
1555     r->frac_hi = a->frac;
1556     r->frac_lo = 0;
1557 }
1558
1559 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560 {
1561     r->frac_hi = a->frac_hi;
1562     r->frac_hm = a->frac_lo;
1563     r->frac_lm = 0;
1564     r->frac_lo = 0;
1565 }
1566
1567 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1568
1569 /*
1570  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1571  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572  * and thus MIT licenced.
1573  */
1574 static const uint16_t rsqrt_tab[128] = {
1575     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591 };
1592
1593 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1594 #define FloatPartsN    glue(FloatParts,N)
1595 #define FloatPartsW    glue(FloatParts,W)
1596
1597 #define N 64
1598 #define W 128
1599
1600 #include "softfloat-parts-addsub.c.inc"
1601 #include "softfloat-parts.c.inc"
1602
1603 #undef  N
1604 #undef  W
1605 #define N 128
1606 #define W 256
1607
1608 #include "softfloat-parts-addsub.c.inc"
1609 #include "softfloat-parts.c.inc"
1610
1611 #undef  N
1612 #undef  W
1613 #define N            256
1614
1615 #include "softfloat-parts-addsub.c.inc"
1616
1617 #undef  N
1618 #undef  W
1619 #undef  partsN
1620 #undef  FloatPartsN
1621 #undef  FloatPartsW
1622
1623 /*
1624  * Pack/unpack routines with a specific FloatFmt.
1625  */
1626
1627 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628                                       float_status *s, const FloatFmt *params)
1629 {
1630     float16_unpack_raw(p, f);
1631     parts_canonicalize(p, s, params);
1632 }
1633
1634 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635                                      float_status *s)
1636 {
1637     float16a_unpack_canonical(p, f, s, &float16_params);
1638 }
1639
1640 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641                                       float_status *s)
1642 {
1643     bfloat16_unpack_raw(p, f);
1644     parts_canonicalize(p, s, &bfloat16_params);
1645 }
1646
1647 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648                                              float_status *s,
1649                                              const FloatFmt *params)
1650 {
1651     parts_uncanon(p, s, params);
1652     return float16_pack_raw(p);
1653 }
1654
1655 static float16 float16_round_pack_canonical(FloatParts64 *p,
1656                                             float_status *s)
1657 {
1658     return float16a_round_pack_canonical(p, s, &float16_params);
1659 }
1660
1661 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662                                               float_status *s)
1663 {
1664     parts_uncanon(p, s, &bfloat16_params);
1665     return bfloat16_pack_raw(p);
1666 }
1667
1668 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669                                      float_status *s)
1670 {
1671     float32_unpack_raw(p, f);
1672     parts_canonicalize(p, s, &float32_params);
1673 }
1674
1675 static float32 float32_round_pack_canonical(FloatParts64 *p,
1676                                             float_status *s)
1677 {
1678     parts_uncanon(p, s, &float32_params);
1679     return float32_pack_raw(p);
1680 }
1681
1682 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683                                      float_status *s)
1684 {
1685     float64_unpack_raw(p, f);
1686     parts_canonicalize(p, s, &float64_params);
1687 }
1688
1689 static float64 float64_round_pack_canonical(FloatParts64 *p,
1690                                             float_status *s)
1691 {
1692     parts_uncanon(p, s, &float64_params);
1693     return float64_pack_raw(p);
1694 }
1695
1696 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1697                                       float_status *s)
1698 {
1699     float128_unpack_raw(p, f);
1700     parts_canonicalize(p, s, &float128_params);
1701 }
1702
1703 static float128 float128_round_pack_canonical(FloatParts128 *p,
1704                                               float_status *s)
1705 {
1706     parts_uncanon(p, s, &float128_params);
1707     return float128_pack_raw(p);
1708 }
1709
1710 /* Returns false if the encoding is invalid. */
1711 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1712                                       float_status *s)
1713 {
1714     /* Ensure rounding precision is set before beginning. */
1715     switch (s->floatx80_rounding_precision) {
1716     case floatx80_precision_x:
1717     case floatx80_precision_d:
1718     case floatx80_precision_s:
1719         break;
1720     default:
1721         g_assert_not_reached();
1722     }
1723
1724     if (unlikely(floatx80_invalid_encoding(f))) {
1725         float_raise(float_flag_invalid, s);
1726         return false;
1727     }
1728
1729     floatx80_unpack_raw(p, f);
1730
1731     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1732         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1733     } else {
1734         /* The explicit integer bit is ignored, after invalid checks. */
1735         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1736         p->cls = (p->frac_hi == 0 ? float_class_inf
1737                   : parts_is_snan_frac(p->frac_hi, s)
1738                   ? float_class_snan : float_class_qnan);
1739     }
1740     return true;
1741 }
1742
1743 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1744                                               float_status *s)
1745 {
1746     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1747     uint64_t frac;
1748     int exp;
1749
1750     switch (p->cls) {
1751     case float_class_normal:
1752         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1753             parts_uncanon_normal(p, s, fmt);
1754             frac = p->frac_hi;
1755             exp = p->exp;
1756         } else {
1757             FloatParts64 p64;
1758
1759             p64.sign = p->sign;
1760             p64.exp = p->exp;
1761             frac_truncjam(&p64, p);
1762             parts_uncanon_normal(&p64, s, fmt);
1763             frac = p64.frac;
1764             exp = p64.exp;
1765         }
1766         if (exp != fmt->exp_max) {
1767             break;
1768         }
1769         /* rounded to inf -- fall through to set frac correctly */
1770
1771     case float_class_inf:
1772         /* x86 and m68k differ in the setting of the integer bit. */
1773         frac = floatx80_infinity_low;
1774         exp = fmt->exp_max;
1775         break;
1776
1777     case float_class_zero:
1778         frac = 0;
1779         exp = 0;
1780         break;
1781
1782     case float_class_snan:
1783     case float_class_qnan:
1784         /* NaNs have the integer bit set. */
1785         frac = p->frac_hi | (1ull << 63);
1786         exp = fmt->exp_max;
1787         break;
1788
1789     default:
1790         g_assert_not_reached();
1791     }
1792
1793     return packFloatx80(p->sign, exp, frac);
1794 }
1795
1796 /*
1797  * Addition and subtraction
1798  */
1799
1800 static float16 QEMU_FLATTEN
1801 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1802 {
1803     FloatParts64 pa, pb, *pr;
1804
1805     float16_unpack_canonical(&pa, a, status);
1806     float16_unpack_canonical(&pb, b, status);
1807     pr = parts_addsub(&pa, &pb, status, subtract);
1808
1809     return float16_round_pack_canonical(pr, status);
1810 }
1811
1812 float16 float16_add(float16 a, float16 b, float_status *status)
1813 {
1814     return float16_addsub(a, b, status, false);
1815 }
1816
1817 float16 float16_sub(float16 a, float16 b, float_status *status)
1818 {
1819     return float16_addsub(a, b, status, true);
1820 }
1821
1822 static float32 QEMU_SOFTFLOAT_ATTR
1823 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1824 {
1825     FloatParts64 pa, pb, *pr;
1826
1827     float32_unpack_canonical(&pa, a, status);
1828     float32_unpack_canonical(&pb, b, status);
1829     pr = parts_addsub(&pa, &pb, status, subtract);
1830
1831     return float32_round_pack_canonical(pr, status);
1832 }
1833
1834 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1835 {
1836     return soft_f32_addsub(a, b, status, false);
1837 }
1838
1839 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1840 {
1841     return soft_f32_addsub(a, b, status, true);
1842 }
1843
1844 static float64 QEMU_SOFTFLOAT_ATTR
1845 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1846 {
1847     FloatParts64 pa, pb, *pr;
1848
1849     float64_unpack_canonical(&pa, a, status);
1850     float64_unpack_canonical(&pb, b, status);
1851     pr = parts_addsub(&pa, &pb, status, subtract);
1852
1853     return float64_round_pack_canonical(pr, status);
1854 }
1855
1856 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1857 {
1858     return soft_f64_addsub(a, b, status, false);
1859 }
1860
1861 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1862 {
1863     return soft_f64_addsub(a, b, status, true);
1864 }
1865
1866 static float hard_f32_add(float a, float b)
1867 {
1868     return a + b;
1869 }
1870
1871 static float hard_f32_sub(float a, float b)
1872 {
1873     return a - b;
1874 }
1875
1876 static double hard_f64_add(double a, double b)
1877 {
1878     return a + b;
1879 }
1880
1881 static double hard_f64_sub(double a, double b)
1882 {
1883     return a - b;
1884 }
1885
1886 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1887 {
1888     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1889         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1890     }
1891     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1892 }
1893
1894 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1895 {
1896     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1897         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1898     } else {
1899         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1900     }
1901 }
1902
1903 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1904                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1905 {
1906     return float32_gen2(a, b, s, hard, soft,
1907                         f32_is_zon2, f32_addsubmul_post);
1908 }
1909
1910 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1911                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1912 {
1913     return float64_gen2(a, b, s, hard, soft,
1914                         f64_is_zon2, f64_addsubmul_post);
1915 }
1916
1917 float32 QEMU_FLATTEN
1918 float32_add(float32 a, float32 b, float_status *s)
1919 {
1920     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1921 }
1922
1923 float32 QEMU_FLATTEN
1924 float32_sub(float32 a, float32 b, float_status *s)
1925 {
1926     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1927 }
1928
1929 float64 QEMU_FLATTEN
1930 float64_add(float64 a, float64 b, float_status *s)
1931 {
1932     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1933 }
1934
1935 float64 QEMU_FLATTEN
1936 float64_sub(float64 a, float64 b, float_status *s)
1937 {
1938     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1939 }
1940
1941 static bfloat16 QEMU_FLATTEN
1942 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1943 {
1944     FloatParts64 pa, pb, *pr;
1945
1946     bfloat16_unpack_canonical(&pa, a, status);
1947     bfloat16_unpack_canonical(&pb, b, status);
1948     pr = parts_addsub(&pa, &pb, status, subtract);
1949
1950     return bfloat16_round_pack_canonical(pr, status);
1951 }
1952
1953 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1954 {
1955     return bfloat16_addsub(a, b, status, false);
1956 }
1957
1958 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1959 {
1960     return bfloat16_addsub(a, b, status, true);
1961 }
1962
1963 static float128 QEMU_FLATTEN
1964 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1965 {
1966     FloatParts128 pa, pb, *pr;
1967
1968     float128_unpack_canonical(&pa, a, status);
1969     float128_unpack_canonical(&pb, b, status);
1970     pr = parts_addsub(&pa, &pb, status, subtract);
1971
1972     return float128_round_pack_canonical(pr, status);
1973 }
1974
1975 float128 float128_add(float128 a, float128 b, float_status *status)
1976 {
1977     return float128_addsub(a, b, status, false);
1978 }
1979
1980 float128 float128_sub(float128 a, float128 b, float_status *status)
1981 {
1982     return float128_addsub(a, b, status, true);
1983 }
1984
1985 static floatx80 QEMU_FLATTEN
1986 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1987 {
1988     FloatParts128 pa, pb, *pr;
1989
1990     if (!floatx80_unpack_canonical(&pa, a, status) ||
1991         !floatx80_unpack_canonical(&pb, b, status)) {
1992         return floatx80_default_nan(status);
1993     }
1994
1995     pr = parts_addsub(&pa, &pb, status, subtract);
1996     return floatx80_round_pack_canonical(pr, status);
1997 }
1998
1999 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2000 {
2001     return floatx80_addsub(a, b, status, false);
2002 }
2003
2004 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2005 {
2006     return floatx80_addsub(a, b, status, true);
2007 }
2008
2009 /*
2010  * Multiplication
2011  */
2012
2013 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2014 {
2015     FloatParts64 pa, pb, *pr;
2016
2017     float16_unpack_canonical(&pa, a, status);
2018     float16_unpack_canonical(&pb, b, status);
2019     pr = parts_mul(&pa, &pb, status);
2020
2021     return float16_round_pack_canonical(pr, status);
2022 }
2023
2024 static float32 QEMU_SOFTFLOAT_ATTR
2025 soft_f32_mul(float32 a, float32 b, float_status *status)
2026 {
2027     FloatParts64 pa, pb, *pr;
2028
2029     float32_unpack_canonical(&pa, a, status);
2030     float32_unpack_canonical(&pb, b, status);
2031     pr = parts_mul(&pa, &pb, status);
2032
2033     return float32_round_pack_canonical(pr, status);
2034 }
2035
2036 static float64 QEMU_SOFTFLOAT_ATTR
2037 soft_f64_mul(float64 a, float64 b, float_status *status)
2038 {
2039     FloatParts64 pa, pb, *pr;
2040
2041     float64_unpack_canonical(&pa, a, status);
2042     float64_unpack_canonical(&pb, b, status);
2043     pr = parts_mul(&pa, &pb, status);
2044
2045     return float64_round_pack_canonical(pr, status);
2046 }
2047
2048 static float hard_f32_mul(float a, float b)
2049 {
2050     return a * b;
2051 }
2052
2053 static double hard_f64_mul(double a, double b)
2054 {
2055     return a * b;
2056 }
2057
2058 float32 QEMU_FLATTEN
2059 float32_mul(float32 a, float32 b, float_status *s)
2060 {
2061     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2062                         f32_is_zon2, f32_addsubmul_post);
2063 }
2064
2065 float64 QEMU_FLATTEN
2066 float64_mul(float64 a, float64 b, float_status *s)
2067 {
2068     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2069                         f64_is_zon2, f64_addsubmul_post);
2070 }
2071
2072 bfloat16 QEMU_FLATTEN
2073 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2074 {
2075     FloatParts64 pa, pb, *pr;
2076
2077     bfloat16_unpack_canonical(&pa, a, status);
2078     bfloat16_unpack_canonical(&pb, b, status);
2079     pr = parts_mul(&pa, &pb, status);
2080
2081     return bfloat16_round_pack_canonical(pr, status);
2082 }
2083
2084 float128 QEMU_FLATTEN
2085 float128_mul(float128 a, float128 b, float_status *status)
2086 {
2087     FloatParts128 pa, pb, *pr;
2088
2089     float128_unpack_canonical(&pa, a, status);
2090     float128_unpack_canonical(&pb, b, status);
2091     pr = parts_mul(&pa, &pb, status);
2092
2093     return float128_round_pack_canonical(pr, status);
2094 }
2095
2096 floatx80 QEMU_FLATTEN
2097 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2098 {
2099     FloatParts128 pa, pb, *pr;
2100
2101     if (!floatx80_unpack_canonical(&pa, a, status) ||
2102         !floatx80_unpack_canonical(&pb, b, status)) {
2103         return floatx80_default_nan(status);
2104     }
2105
2106     pr = parts_mul(&pa, &pb, status);
2107     return floatx80_round_pack_canonical(pr, status);
2108 }
2109
2110 /*
2111  * Fused multiply-add
2112  */
2113
2114 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2115                                     int flags, float_status *status)
2116 {
2117     FloatParts64 pa, pb, pc, *pr;
2118
2119     float16_unpack_canonical(&pa, a, status);
2120     float16_unpack_canonical(&pb, b, status);
2121     float16_unpack_canonical(&pc, c, status);
2122     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2123
2124     return float16_round_pack_canonical(pr, status);
2125 }
2126
2127 static float32 QEMU_SOFTFLOAT_ATTR
2128 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2129                 float_status *status)
2130 {
2131     FloatParts64 pa, pb, pc, *pr;
2132
2133     float32_unpack_canonical(&pa, a, status);
2134     float32_unpack_canonical(&pb, b, status);
2135     float32_unpack_canonical(&pc, c, status);
2136     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2137
2138     return float32_round_pack_canonical(pr, status);
2139 }
2140
2141 static float64 QEMU_SOFTFLOAT_ATTR
2142 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2143                 float_status *status)
2144 {
2145     FloatParts64 pa, pb, pc, *pr;
2146
2147     float64_unpack_canonical(&pa, a, status);
2148     float64_unpack_canonical(&pb, b, status);
2149     float64_unpack_canonical(&pc, c, status);
2150     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2151
2152     return float64_round_pack_canonical(pr, status);
2153 }
2154
2155 static bool force_soft_fma;
2156
2157 float32 QEMU_FLATTEN
2158 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2159 {
2160     union_float32 ua, ub, uc, ur;
2161
2162     ua.s = xa;
2163     ub.s = xb;
2164     uc.s = xc;
2165
2166     if (unlikely(!can_use_fpu(s))) {
2167         goto soft;
2168     }
2169     if (unlikely(flags & float_muladd_halve_result)) {
2170         goto soft;
2171     }
2172
2173     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2174     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2175         goto soft;
2176     }
2177
2178     if (unlikely(force_soft_fma)) {
2179         goto soft;
2180     }
2181
2182     /*
2183      * When (a || b) == 0, there's no need to check for under/over flow,
2184      * since we know the addend is (normal || 0) and the product is 0.
2185      */
2186     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2187         union_float32 up;
2188         bool prod_sign;
2189
2190         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2191         prod_sign ^= !!(flags & float_muladd_negate_product);
2192         up.s = float32_set_sign(float32_zero, prod_sign);
2193
2194         if (flags & float_muladd_negate_c) {
2195             uc.h = -uc.h;
2196         }
2197         ur.h = up.h + uc.h;
2198     } else {
2199         union_float32 ua_orig = ua;
2200         union_float32 uc_orig = uc;
2201
2202         if (flags & float_muladd_negate_product) {
2203             ua.h = -ua.h;
2204         }
2205         if (flags & float_muladd_negate_c) {
2206             uc.h = -uc.h;
2207         }
2208
2209         ur.h = fmaf(ua.h, ub.h, uc.h);
2210
2211         if (unlikely(f32_is_inf(ur))) {
2212             float_raise(float_flag_overflow, s);
2213         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2214             ua = ua_orig;
2215             uc = uc_orig;
2216             goto soft;
2217         }
2218     }
2219     if (flags & float_muladd_negate_result) {
2220         return float32_chs(ur.s);
2221     }
2222     return ur.s;
2223
2224  soft:
2225     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2226 }
2227
2228 float64 QEMU_FLATTEN
2229 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2230 {
2231     union_float64 ua, ub, uc, ur;
2232
2233     ua.s = xa;
2234     ub.s = xb;
2235     uc.s = xc;
2236
2237     if (unlikely(!can_use_fpu(s))) {
2238         goto soft;
2239     }
2240     if (unlikely(flags & float_muladd_halve_result)) {
2241         goto soft;
2242     }
2243
2244     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2245     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2246         goto soft;
2247     }
2248
2249     if (unlikely(force_soft_fma)) {
2250         goto soft;
2251     }
2252
2253     /*
2254      * When (a || b) == 0, there's no need to check for under/over flow,
2255      * since we know the addend is (normal || 0) and the product is 0.
2256      */
2257     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2258         union_float64 up;
2259         bool prod_sign;
2260
2261         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2262         prod_sign ^= !!(flags & float_muladd_negate_product);
2263         up.s = float64_set_sign(float64_zero, prod_sign);
2264
2265         if (flags & float_muladd_negate_c) {
2266             uc.h = -uc.h;
2267         }
2268         ur.h = up.h + uc.h;
2269     } else {
2270         union_float64 ua_orig = ua;
2271         union_float64 uc_orig = uc;
2272
2273         if (flags & float_muladd_negate_product) {
2274             ua.h = -ua.h;
2275         }
2276         if (flags & float_muladd_negate_c) {
2277             uc.h = -uc.h;
2278         }
2279
2280         ur.h = fma(ua.h, ub.h, uc.h);
2281
2282         if (unlikely(f64_is_inf(ur))) {
2283             float_raise(float_flag_overflow, s);
2284         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2285             ua = ua_orig;
2286             uc = uc_orig;
2287             goto soft;
2288         }
2289     }
2290     if (flags & float_muladd_negate_result) {
2291         return float64_chs(ur.s);
2292     }
2293     return ur.s;
2294
2295  soft:
2296     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2297 }
2298
2299 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2300                                       int flags, float_status *status)
2301 {
2302     FloatParts64 pa, pb, pc, *pr;
2303
2304     bfloat16_unpack_canonical(&pa, a, status);
2305     bfloat16_unpack_canonical(&pb, b, status);
2306     bfloat16_unpack_canonical(&pc, c, status);
2307     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2308
2309     return bfloat16_round_pack_canonical(pr, status);
2310 }
2311
2312 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2313                                       int flags, float_status *status)
2314 {
2315     FloatParts128 pa, pb, pc, *pr;
2316
2317     float128_unpack_canonical(&pa, a, status);
2318     float128_unpack_canonical(&pb, b, status);
2319     float128_unpack_canonical(&pc, c, status);
2320     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2321
2322     return float128_round_pack_canonical(pr, status);
2323 }
2324
2325 /*
2326  * Division
2327  */
2328
2329 float16 float16_div(float16 a, float16 b, float_status *status)
2330 {
2331     FloatParts64 pa, pb, *pr;
2332
2333     float16_unpack_canonical(&pa, a, status);
2334     float16_unpack_canonical(&pb, b, status);
2335     pr = parts_div(&pa, &pb, status);
2336
2337     return float16_round_pack_canonical(pr, status);
2338 }
2339
2340 static float32 QEMU_SOFTFLOAT_ATTR
2341 soft_f32_div(float32 a, float32 b, float_status *status)
2342 {
2343     FloatParts64 pa, pb, *pr;
2344
2345     float32_unpack_canonical(&pa, a, status);
2346     float32_unpack_canonical(&pb, b, status);
2347     pr = parts_div(&pa, &pb, status);
2348
2349     return float32_round_pack_canonical(pr, status);
2350 }
2351
2352 static float64 QEMU_SOFTFLOAT_ATTR
2353 soft_f64_div(float64 a, float64 b, float_status *status)
2354 {
2355     FloatParts64 pa, pb, *pr;
2356
2357     float64_unpack_canonical(&pa, a, status);
2358     float64_unpack_canonical(&pb, b, status);
2359     pr = parts_div(&pa, &pb, status);
2360
2361     return float64_round_pack_canonical(pr, status);
2362 }
2363
2364 static float hard_f32_div(float a, float b)
2365 {
2366     return a / b;
2367 }
2368
2369 static double hard_f64_div(double a, double b)
2370 {
2371     return a / b;
2372 }
2373
2374 static bool f32_div_pre(union_float32 a, union_float32 b)
2375 {
2376     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2377         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378                fpclassify(b.h) == FP_NORMAL;
2379     }
2380     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2381 }
2382
2383 static bool f64_div_pre(union_float64 a, union_float64 b)
2384 {
2385     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2386         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2387                fpclassify(b.h) == FP_NORMAL;
2388     }
2389     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2390 }
2391
2392 static bool f32_div_post(union_float32 a, union_float32 b)
2393 {
2394     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2395         return fpclassify(a.h) != FP_ZERO;
2396     }
2397     return !float32_is_zero(a.s);
2398 }
2399
2400 static bool f64_div_post(union_float64 a, union_float64 b)
2401 {
2402     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2403         return fpclassify(a.h) != FP_ZERO;
2404     }
2405     return !float64_is_zero(a.s);
2406 }
2407
2408 float32 QEMU_FLATTEN
2409 float32_div(float32 a, float32 b, float_status *s)
2410 {
2411     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2412                         f32_div_pre, f32_div_post);
2413 }
2414
2415 float64 QEMU_FLATTEN
2416 float64_div(float64 a, float64 b, float_status *s)
2417 {
2418     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2419                         f64_div_pre, f64_div_post);
2420 }
2421
2422 bfloat16 QEMU_FLATTEN
2423 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2424 {
2425     FloatParts64 pa, pb, *pr;
2426
2427     bfloat16_unpack_canonical(&pa, a, status);
2428     bfloat16_unpack_canonical(&pb, b, status);
2429     pr = parts_div(&pa, &pb, status);
2430
2431     return bfloat16_round_pack_canonical(pr, status);
2432 }
2433
2434 float128 QEMU_FLATTEN
2435 float128_div(float128 a, float128 b, float_status *status)
2436 {
2437     FloatParts128 pa, pb, *pr;
2438
2439     float128_unpack_canonical(&pa, a, status);
2440     float128_unpack_canonical(&pb, b, status);
2441     pr = parts_div(&pa, &pb, status);
2442
2443     return float128_round_pack_canonical(pr, status);
2444 }
2445
2446 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2447 {
2448     FloatParts128 pa, pb, *pr;
2449
2450     if (!floatx80_unpack_canonical(&pa, a, status) ||
2451         !floatx80_unpack_canonical(&pb, b, status)) {
2452         return floatx80_default_nan(status);
2453     }
2454
2455     pr = parts_div(&pa, &pb, status);
2456     return floatx80_round_pack_canonical(pr, status);
2457 }
2458
2459 /*
2460  * Remainder
2461  */
2462
2463 float32 float32_rem(float32 a, float32 b, float_status *status)
2464 {
2465     FloatParts64 pa, pb, *pr;
2466
2467     float32_unpack_canonical(&pa, a, status);
2468     float32_unpack_canonical(&pb, b, status);
2469     pr = parts_modrem(&pa, &pb, NULL, status);
2470
2471     return float32_round_pack_canonical(pr, status);
2472 }
2473
2474 float64 float64_rem(float64 a, float64 b, float_status *status)
2475 {
2476     FloatParts64 pa, pb, *pr;
2477
2478     float64_unpack_canonical(&pa, a, status);
2479     float64_unpack_canonical(&pb, b, status);
2480     pr = parts_modrem(&pa, &pb, NULL, status);
2481
2482     return float64_round_pack_canonical(pr, status);
2483 }
2484
2485 float128 float128_rem(float128 a, float128 b, float_status *status)
2486 {
2487     FloatParts128 pa, pb, *pr;
2488
2489     float128_unpack_canonical(&pa, a, status);
2490     float128_unpack_canonical(&pb, b, status);
2491     pr = parts_modrem(&pa, &pb, NULL, status);
2492
2493     return float128_round_pack_canonical(pr, status);
2494 }
2495
2496 /*
2497  * Returns the remainder of the extended double-precision floating-point value
2498  * `a' with respect to the corresponding value `b'.
2499  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2500  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2501  * the remainder based on truncating the quotient toward zero instead and
2502  * *quotient is set to the low 64 bits of the absolute value of the integer
2503  * quotient.
2504  */
2505 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2506                          uint64_t *quotient, float_status *status)
2507 {
2508     FloatParts128 pa, pb, *pr;
2509
2510     *quotient = 0;
2511     if (!floatx80_unpack_canonical(&pa, a, status) ||
2512         !floatx80_unpack_canonical(&pb, b, status)) {
2513         return floatx80_default_nan(status);
2514     }
2515     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2516
2517     return floatx80_round_pack_canonical(pr, status);
2518 }
2519
2520 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2521 {
2522     uint64_t quotient;
2523     return floatx80_modrem(a, b, false, &quotient, status);
2524 }
2525
2526 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2527 {
2528     uint64_t quotient;
2529     return floatx80_modrem(a, b, true, &quotient, status);
2530 }
2531
2532 /*
2533  * Float to Float conversions
2534  *
2535  * Returns the result of converting one float format to another. The
2536  * conversion is performed according to the IEC/IEEE Standard for
2537  * Binary Floating-Point Arithmetic.
2538  *
2539  * Usually this only needs to take care of raising invalid exceptions
2540  * and handling the conversion on NaNs.
2541  */
2542
2543 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2544 {
2545     switch (a->cls) {
2546     case float_class_qnan:
2547     case float_class_snan:
2548         /*
2549          * There is no NaN in the destination format.  Raise Invalid
2550          * and return a zero with the sign of the input NaN.
2551          */
2552         float_raise(float_flag_invalid, s);
2553         a->cls = float_class_zero;
2554         break;
2555
2556     case float_class_inf:
2557         /*
2558          * There is no Inf in the destination format.  Raise Invalid
2559          * and return the maximum normal with the correct sign.
2560          */
2561         float_raise(float_flag_invalid, s);
2562         a->cls = float_class_normal;
2563         a->exp = float16_params_ahp.exp_max;
2564         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2565                                   float16_params_ahp.frac_size + 1);
2566         break;
2567
2568     case float_class_normal:
2569     case float_class_zero:
2570         break;
2571
2572     default:
2573         g_assert_not_reached();
2574     }
2575 }
2576
2577 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2578 {
2579     if (is_nan(a->cls)) {
2580         parts_return_nan(a, s);
2581     }
2582 }
2583
2584 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2585 {
2586     if (is_nan(a->cls)) {
2587         parts_return_nan(a, s);
2588     }
2589 }
2590
2591 #define parts_float_to_float(P, S) \
2592     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2593
2594 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2595                                         float_status *s)
2596 {
2597     a->cls = b->cls;
2598     a->sign = b->sign;
2599     a->exp = b->exp;
2600
2601     if (a->cls == float_class_normal) {
2602         frac_truncjam(a, b);
2603     } else if (is_nan(a->cls)) {
2604         /* Discard the low bits of the NaN. */
2605         a->frac = b->frac_hi;
2606         parts_return_nan(a, s);
2607     }
2608 }
2609
2610 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2611                                        float_status *s)
2612 {
2613     a->cls = b->cls;
2614     a->sign = b->sign;
2615     a->exp = b->exp;
2616     frac_widen(a, b);
2617
2618     if (is_nan(a->cls)) {
2619         parts_return_nan(a, s);
2620     }
2621 }
2622
2623 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2624 {
2625     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2626     FloatParts64 p;
2627
2628     float16a_unpack_canonical(&p, a, s, fmt16);
2629     parts_float_to_float(&p, s);
2630     return float32_round_pack_canonical(&p, s);
2631 }
2632
2633 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2634 {
2635     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2636     FloatParts64 p;
2637
2638     float16a_unpack_canonical(&p, a, s, fmt16);
2639     parts_float_to_float(&p, s);
2640     return float64_round_pack_canonical(&p, s);
2641 }
2642
2643 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2644 {
2645     FloatParts64 p;
2646     const FloatFmt *fmt;
2647
2648     float32_unpack_canonical(&p, a, s);
2649     if (ieee) {
2650         parts_float_to_float(&p, s);
2651         fmt = &float16_params;
2652     } else {
2653         parts_float_to_ahp(&p, s);
2654         fmt = &float16_params_ahp;
2655     }
2656     return float16a_round_pack_canonical(&p, s, fmt);
2657 }
2658
2659 static float64 QEMU_SOFTFLOAT_ATTR
2660 soft_float32_to_float64(float32 a, float_status *s)
2661 {
2662     FloatParts64 p;
2663
2664     float32_unpack_canonical(&p, a, s);
2665     parts_float_to_float(&p, s);
2666     return float64_round_pack_canonical(&p, s);
2667 }
2668
2669 float64 float32_to_float64(float32 a, float_status *s)
2670 {
2671     if (likely(float32_is_normal(a))) {
2672         /* Widening conversion can never produce inexact results.  */
2673         union_float32 uf;
2674         union_float64 ud;
2675         uf.s = a;
2676         ud.h = uf.h;
2677         return ud.s;
2678     } else if (float32_is_zero(a)) {
2679         return float64_set_sign(float64_zero, float32_is_neg(a));
2680     } else {
2681         return soft_float32_to_float64(a, s);
2682     }
2683 }
2684
2685 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2686 {
2687     FloatParts64 p;
2688     const FloatFmt *fmt;
2689
2690     float64_unpack_canonical(&p, a, s);
2691     if (ieee) {
2692         parts_float_to_float(&p, s);
2693         fmt = &float16_params;
2694     } else {
2695         parts_float_to_ahp(&p, s);
2696         fmt = &float16_params_ahp;
2697     }
2698     return float16a_round_pack_canonical(&p, s, fmt);
2699 }
2700
2701 float32 float64_to_float32(float64 a, float_status *s)
2702 {
2703     FloatParts64 p;
2704
2705     float64_unpack_canonical(&p, a, s);
2706     parts_float_to_float(&p, s);
2707     return float32_round_pack_canonical(&p, s);
2708 }
2709
2710 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2711 {
2712     FloatParts64 p;
2713
2714     bfloat16_unpack_canonical(&p, a, s);
2715     parts_float_to_float(&p, s);
2716     return float32_round_pack_canonical(&p, s);
2717 }
2718
2719 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2720 {
2721     FloatParts64 p;
2722
2723     bfloat16_unpack_canonical(&p, a, s);
2724     parts_float_to_float(&p, s);
2725     return float64_round_pack_canonical(&p, s);
2726 }
2727
2728 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2729 {
2730     FloatParts64 p;
2731
2732     float32_unpack_canonical(&p, a, s);
2733     parts_float_to_float(&p, s);
2734     return bfloat16_round_pack_canonical(&p, s);
2735 }
2736
2737 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2738 {
2739     FloatParts64 p;
2740
2741     float64_unpack_canonical(&p, a, s);
2742     parts_float_to_float(&p, s);
2743     return bfloat16_round_pack_canonical(&p, s);
2744 }
2745
2746 float32 float128_to_float32(float128 a, float_status *s)
2747 {
2748     FloatParts64 p64;
2749     FloatParts128 p128;
2750
2751     float128_unpack_canonical(&p128, a, s);
2752     parts_float_to_float_narrow(&p64, &p128, s);
2753     return float32_round_pack_canonical(&p64, s);
2754 }
2755
2756 float64 float128_to_float64(float128 a, float_status *s)
2757 {
2758     FloatParts64 p64;
2759     FloatParts128 p128;
2760
2761     float128_unpack_canonical(&p128, a, s);
2762     parts_float_to_float_narrow(&p64, &p128, s);
2763     return float64_round_pack_canonical(&p64, s);
2764 }
2765
2766 float128 float32_to_float128(float32 a, float_status *s)
2767 {
2768     FloatParts64 p64;
2769     FloatParts128 p128;
2770
2771     float32_unpack_canonical(&p64, a, s);
2772     parts_float_to_float_widen(&p128, &p64, s);
2773     return float128_round_pack_canonical(&p128, s);
2774 }
2775
2776 float128 float64_to_float128(float64 a, float_status *s)
2777 {
2778     FloatParts64 p64;
2779     FloatParts128 p128;
2780
2781     float64_unpack_canonical(&p64, a, s);
2782     parts_float_to_float_widen(&p128, &p64, s);
2783     return float128_round_pack_canonical(&p128, s);
2784 }
2785
2786 float32 floatx80_to_float32(floatx80 a, float_status *s)
2787 {
2788     FloatParts64 p64;
2789     FloatParts128 p128;
2790
2791     if (floatx80_unpack_canonical(&p128, a, s)) {
2792         parts_float_to_float_narrow(&p64, &p128, s);
2793     } else {
2794         parts_default_nan(&p64, s);
2795     }
2796     return float32_round_pack_canonical(&p64, s);
2797 }
2798
2799 float64 floatx80_to_float64(floatx80 a, float_status *s)
2800 {
2801     FloatParts64 p64;
2802     FloatParts128 p128;
2803
2804     if (floatx80_unpack_canonical(&p128, a, s)) {
2805         parts_float_to_float_narrow(&p64, &p128, s);
2806     } else {
2807         parts_default_nan(&p64, s);
2808     }
2809     return float64_round_pack_canonical(&p64, s);
2810 }
2811
2812 float128 floatx80_to_float128(floatx80 a, float_status *s)
2813 {
2814     FloatParts128 p;
2815
2816     if (floatx80_unpack_canonical(&p, a, s)) {
2817         parts_float_to_float(&p, s);
2818     } else {
2819         parts_default_nan(&p, s);
2820     }
2821     return float128_round_pack_canonical(&p, s);
2822 }
2823
2824 floatx80 float32_to_floatx80(float32 a, float_status *s)
2825 {
2826     FloatParts64 p64;
2827     FloatParts128 p128;
2828
2829     float32_unpack_canonical(&p64, a, s);
2830     parts_float_to_float_widen(&p128, &p64, s);
2831     return floatx80_round_pack_canonical(&p128, s);
2832 }
2833
2834 floatx80 float64_to_floatx80(float64 a, float_status *s)
2835 {
2836     FloatParts64 p64;
2837     FloatParts128 p128;
2838
2839     float64_unpack_canonical(&p64, a, s);
2840     parts_float_to_float_widen(&p128, &p64, s);
2841     return floatx80_round_pack_canonical(&p128, s);
2842 }
2843
2844 floatx80 float128_to_floatx80(float128 a, float_status *s)
2845 {
2846     FloatParts128 p;
2847
2848     float128_unpack_canonical(&p, a, s);
2849     parts_float_to_float(&p, s);
2850     return floatx80_round_pack_canonical(&p, s);
2851 }
2852
2853 /*
2854  * Round to integral value
2855  */
2856
2857 float16 float16_round_to_int(float16 a, float_status *s)
2858 {
2859     FloatParts64 p;
2860
2861     float16_unpack_canonical(&p, a, s);
2862     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2863     return float16_round_pack_canonical(&p, s);
2864 }
2865
2866 float32 float32_round_to_int(float32 a, float_status *s)
2867 {
2868     FloatParts64 p;
2869
2870     float32_unpack_canonical(&p, a, s);
2871     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2872     return float32_round_pack_canonical(&p, s);
2873 }
2874
2875 float64 float64_round_to_int(float64 a, float_status *s)
2876 {
2877     FloatParts64 p;
2878
2879     float64_unpack_canonical(&p, a, s);
2880     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2881     return float64_round_pack_canonical(&p, s);
2882 }
2883
2884 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2885 {
2886     FloatParts64 p;
2887
2888     bfloat16_unpack_canonical(&p, a, s);
2889     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2890     return bfloat16_round_pack_canonical(&p, s);
2891 }
2892
2893 float128 float128_round_to_int(float128 a, float_status *s)
2894 {
2895     FloatParts128 p;
2896
2897     float128_unpack_canonical(&p, a, s);
2898     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2899     return float128_round_pack_canonical(&p, s);
2900 }
2901
2902 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2903 {
2904     FloatParts128 p;
2905
2906     if (!floatx80_unpack_canonical(&p, a, status)) {
2907         return floatx80_default_nan(status);
2908     }
2909
2910     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2911                        &floatx80_params[status->floatx80_rounding_precision]);
2912     return floatx80_round_pack_canonical(&p, status);
2913 }
2914
2915 /*
2916  * Floating-point to signed integer conversions
2917  */
2918
2919 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                               float_status *s)
2921 {
2922     FloatParts64 p;
2923
2924     float16_unpack_canonical(&p, a, s);
2925     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2926 }
2927
2928 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                 float_status *s)
2930 {
2931     FloatParts64 p;
2932
2933     float16_unpack_canonical(&p, a, s);
2934     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2935 }
2936
2937 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2938                                 float_status *s)
2939 {
2940     FloatParts64 p;
2941
2942     float16_unpack_canonical(&p, a, s);
2943     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2944 }
2945
2946 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2947                                 float_status *s)
2948 {
2949     FloatParts64 p;
2950
2951     float16_unpack_canonical(&p, a, s);
2952     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2953 }
2954
2955 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                 float_status *s)
2957 {
2958     FloatParts64 p;
2959
2960     float32_unpack_canonical(&p, a, s);
2961     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2962 }
2963
2964 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2965                                 float_status *s)
2966 {
2967     FloatParts64 p;
2968
2969     float32_unpack_canonical(&p, a, s);
2970     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2971 }
2972
2973 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2974                                 float_status *s)
2975 {
2976     FloatParts64 p;
2977
2978     float32_unpack_canonical(&p, a, s);
2979     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2980 }
2981
2982 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                 float_status *s)
2984 {
2985     FloatParts64 p;
2986
2987     float64_unpack_canonical(&p, a, s);
2988     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2989 }
2990
2991 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2992                                 float_status *s)
2993 {
2994     FloatParts64 p;
2995
2996     float64_unpack_canonical(&p, a, s);
2997     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2998 }
2999
3000 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3001                                 float_status *s)
3002 {
3003     FloatParts64 p;
3004
3005     float64_unpack_canonical(&p, a, s);
3006     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3007 }
3008
3009 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3010                                  float_status *s)
3011 {
3012     FloatParts64 p;
3013
3014     bfloat16_unpack_canonical(&p, a, s);
3015     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3016 }
3017
3018 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3019                                  float_status *s)
3020 {
3021     FloatParts64 p;
3022
3023     bfloat16_unpack_canonical(&p, a, s);
3024     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3025 }
3026
3027 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3028                                  float_status *s)
3029 {
3030     FloatParts64 p;
3031
3032     bfloat16_unpack_canonical(&p, a, s);
3033     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3034 }
3035
3036 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3037                                         int scale, float_status *s)
3038 {
3039     FloatParts128 p;
3040
3041     float128_unpack_canonical(&p, a, s);
3042     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3043 }
3044
3045 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3046                                         int scale, float_status *s)
3047 {
3048     FloatParts128 p;
3049
3050     float128_unpack_canonical(&p, a, s);
3051     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3052 }
3053
3054 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3055                                         int scale, float_status *s)
3056 {
3057     FloatParts128 p;
3058
3059     if (!floatx80_unpack_canonical(&p, a, s)) {
3060         parts_default_nan(&p, s);
3061     }
3062     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3063 }
3064
3065 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3066                                         int scale, float_status *s)
3067 {
3068     FloatParts128 p;
3069
3070     if (!floatx80_unpack_canonical(&p, a, s)) {
3071         parts_default_nan(&p, s);
3072     }
3073     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3074 }
3075
3076 int8_t float16_to_int8(float16 a, float_status *s)
3077 {
3078     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3079 }
3080
3081 int16_t float16_to_int16(float16 a, float_status *s)
3082 {
3083     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3084 }
3085
3086 int32_t float16_to_int32(float16 a, float_status *s)
3087 {
3088     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3089 }
3090
3091 int64_t float16_to_int64(float16 a, float_status *s)
3092 {
3093     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3094 }
3095
3096 int16_t float32_to_int16(float32 a, float_status *s)
3097 {
3098     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3099 }
3100
3101 int32_t float32_to_int32(float32 a, float_status *s)
3102 {
3103     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3104 }
3105
3106 int64_t float32_to_int64(float32 a, float_status *s)
3107 {
3108     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3109 }
3110
3111 int16_t float64_to_int16(float64 a, float_status *s)
3112 {
3113     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3114 }
3115
3116 int32_t float64_to_int32(float64 a, float_status *s)
3117 {
3118     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3119 }
3120
3121 int64_t float64_to_int64(float64 a, float_status *s)
3122 {
3123     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3124 }
3125
3126 int32_t float128_to_int32(float128 a, float_status *s)
3127 {
3128     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3129 }
3130
3131 int64_t float128_to_int64(float128 a, float_status *s)
3132 {
3133     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3134 }
3135
3136 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3137 {
3138     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3139 }
3140
3141 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3142 {
3143     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3144 }
3145
3146 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3147 {
3148     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3149 }
3150
3151 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3152 {
3153     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3154 }
3155
3156 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3157 {
3158     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3159 }
3160
3161 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3162 {
3163     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3164 }
3165
3166 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3167 {
3168     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3169 }
3170
3171 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3172 {
3173     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3174 }
3175
3176 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3177 {
3178     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3179 }
3180
3181 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3182 {
3183     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3184 }
3185
3186 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3187 {
3188     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3189 }
3190
3191 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3192 {
3193     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3194 }
3195
3196 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3197 {
3198     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3199 }
3200
3201 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3202 {
3203     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3204 }
3205
3206 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3207 {
3208     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3209 }
3210
3211 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3212 {
3213     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3214 }
3215
3216 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3217 {
3218     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3219 }
3220
3221 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3222 {
3223     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3224 }
3225
3226 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3227 {
3228     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3229 }
3230
3231 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3232 {
3233     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3234 }
3235
3236 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3237 {
3238     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3239 }
3240
3241 /*
3242  * Floating-point to unsigned integer conversions
3243  */
3244
3245 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3246                                 float_status *s)
3247 {
3248     FloatParts64 p;
3249
3250     float16_unpack_canonical(&p, a, s);
3251     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3252 }
3253
3254 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3255                                   float_status *s)
3256 {
3257     FloatParts64 p;
3258
3259     float16_unpack_canonical(&p, a, s);
3260     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3261 }
3262
3263 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3264                                   float_status *s)
3265 {
3266     FloatParts64 p;
3267
3268     float16_unpack_canonical(&p, a, s);
3269     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3270 }
3271
3272 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3273                                   float_status *s)
3274 {
3275     FloatParts64 p;
3276
3277     float16_unpack_canonical(&p, a, s);
3278     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3279 }
3280
3281 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3282                                   float_status *s)
3283 {
3284     FloatParts64 p;
3285
3286     float32_unpack_canonical(&p, a, s);
3287     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3288 }
3289
3290 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3291                                   float_status *s)
3292 {
3293     FloatParts64 p;
3294
3295     float32_unpack_canonical(&p, a, s);
3296     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3297 }
3298
3299 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3300                                   float_status *s)
3301 {
3302     FloatParts64 p;
3303
3304     float32_unpack_canonical(&p, a, s);
3305     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3306 }
3307
3308 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3309                                   float_status *s)
3310 {
3311     FloatParts64 p;
3312
3313     float64_unpack_canonical(&p, a, s);
3314     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3315 }
3316
3317 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3318                                   float_status *s)
3319 {
3320     FloatParts64 p;
3321
3322     float64_unpack_canonical(&p, a, s);
3323     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3324 }
3325
3326 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3327                                   float_status *s)
3328 {
3329     FloatParts64 p;
3330
3331     float64_unpack_canonical(&p, a, s);
3332     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3333 }
3334
3335 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3336                                    int scale, float_status *s)
3337 {
3338     FloatParts64 p;
3339
3340     bfloat16_unpack_canonical(&p, a, s);
3341     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3342 }
3343
3344 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3345                                    int scale, float_status *s)
3346 {
3347     FloatParts64 p;
3348
3349     bfloat16_unpack_canonical(&p, a, s);
3350     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3351 }
3352
3353 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3354                                    int scale, float_status *s)
3355 {
3356     FloatParts64 p;
3357
3358     bfloat16_unpack_canonical(&p, a, s);
3359     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3360 }
3361
3362 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3363                                           int scale, float_status *s)
3364 {
3365     FloatParts128 p;
3366
3367     float128_unpack_canonical(&p, a, s);
3368     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3369 }
3370
3371 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3372                                           int scale, float_status *s)
3373 {
3374     FloatParts128 p;
3375
3376     float128_unpack_canonical(&p, a, s);
3377     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3378 }
3379
3380 uint8_t float16_to_uint8(float16 a, float_status *s)
3381 {
3382     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3383 }
3384
3385 uint16_t float16_to_uint16(float16 a, float_status *s)
3386 {
3387     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3388 }
3389
3390 uint32_t float16_to_uint32(float16 a, float_status *s)
3391 {
3392     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3393 }
3394
3395 uint64_t float16_to_uint64(float16 a, float_status *s)
3396 {
3397     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3398 }
3399
3400 uint16_t float32_to_uint16(float32 a, float_status *s)
3401 {
3402     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3403 }
3404
3405 uint32_t float32_to_uint32(float32 a, float_status *s)
3406 {
3407     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3408 }
3409
3410 uint64_t float32_to_uint64(float32 a, float_status *s)
3411 {
3412     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3413 }
3414
3415 uint16_t float64_to_uint16(float64 a, float_status *s)
3416 {
3417     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3418 }
3419
3420 uint32_t float64_to_uint32(float64 a, float_status *s)
3421 {
3422     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3423 }
3424
3425 uint64_t float64_to_uint64(float64 a, float_status *s)
3426 {
3427     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3428 }
3429
3430 uint32_t float128_to_uint32(float128 a, float_status *s)
3431 {
3432     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3433 }
3434
3435 uint64_t float128_to_uint64(float128 a, float_status *s)
3436 {
3437     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3438 }
3439
3440 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3441 {
3442     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3443 }
3444
3445 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3446 {
3447     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3448 }
3449
3450 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3451 {
3452     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3453 }
3454
3455 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3456 {
3457     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3458 }
3459
3460 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3461 {
3462     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3463 }
3464
3465 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3466 {
3467     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3468 }
3469
3470 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3471 {
3472     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3473 }
3474
3475 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3476 {
3477     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3478 }
3479
3480 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3481 {
3482     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3483 }
3484
3485 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3486 {
3487     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3488 }
3489
3490 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3491 {
3492     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3493 }
3494
3495 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3496 {
3497     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3498 }
3499
3500 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3501 {
3502     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3503 }
3504
3505 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3506 {
3507     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3508 }
3509
3510 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3511 {
3512     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3513 }
3514
3515 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3516 {
3517     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3518 }
3519
3520 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3521 {
3522     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3523 }
3524
3525 /*
3526  * Signed integer to floating-point conversions
3527  */
3528
3529 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3530 {
3531     FloatParts64 p;
3532
3533     parts_sint_to_float(&p, a, scale, status);
3534     return float16_round_pack_canonical(&p, status);
3535 }
3536
3537 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3538 {
3539     return int64_to_float16_scalbn(a, scale, status);
3540 }
3541
3542 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3543 {
3544     return int64_to_float16_scalbn(a, scale, status);
3545 }
3546
3547 float16 int64_to_float16(int64_t a, float_status *status)
3548 {
3549     return int64_to_float16_scalbn(a, 0, status);
3550 }
3551
3552 float16 int32_to_float16(int32_t a, float_status *status)
3553 {
3554     return int64_to_float16_scalbn(a, 0, status);
3555 }
3556
3557 float16 int16_to_float16(int16_t a, float_status *status)
3558 {
3559     return int64_to_float16_scalbn(a, 0, status);
3560 }
3561
3562 float16 int8_to_float16(int8_t a, float_status *status)
3563 {
3564     return int64_to_float16_scalbn(a, 0, status);
3565 }
3566
3567 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3568 {
3569     FloatParts64 p;
3570
3571     /* Without scaling, there are no overflow concerns. */
3572     if (likely(scale == 0) && can_use_fpu(status)) {
3573         union_float32 ur;
3574         ur.h = a;
3575         return ur.s;
3576     }
3577
3578     parts64_sint_to_float(&p, a, scale, status);
3579     return float32_round_pack_canonical(&p, status);
3580 }
3581
3582 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3583 {
3584     return int64_to_float32_scalbn(a, scale, status);
3585 }
3586
3587 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3588 {
3589     return int64_to_float32_scalbn(a, scale, status);
3590 }
3591
3592 float32 int64_to_float32(int64_t a, float_status *status)
3593 {
3594     return int64_to_float32_scalbn(a, 0, status);
3595 }
3596
3597 float32 int32_to_float32(int32_t a, float_status *status)
3598 {
3599     return int64_to_float32_scalbn(a, 0, status);
3600 }
3601
3602 float32 int16_to_float32(int16_t a, float_status *status)
3603 {
3604     return int64_to_float32_scalbn(a, 0, status);
3605 }
3606
3607 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3608 {
3609     FloatParts64 p;
3610
3611     /* Without scaling, there are no overflow concerns. */
3612     if (likely(scale == 0) && can_use_fpu(status)) {
3613         union_float64 ur;
3614         ur.h = a;
3615         return ur.s;
3616     }
3617
3618     parts_sint_to_float(&p, a, scale, status);
3619     return float64_round_pack_canonical(&p, status);
3620 }
3621
3622 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3623 {
3624     return int64_to_float64_scalbn(a, scale, status);
3625 }
3626
3627 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3628 {
3629     return int64_to_float64_scalbn(a, scale, status);
3630 }
3631
3632 float64 int64_to_float64(int64_t a, float_status *status)
3633 {
3634     return int64_to_float64_scalbn(a, 0, status);
3635 }
3636
3637 float64 int32_to_float64(int32_t a, float_status *status)
3638 {
3639     return int64_to_float64_scalbn(a, 0, status);
3640 }
3641
3642 float64 int16_to_float64(int16_t a, float_status *status)
3643 {
3644     return int64_to_float64_scalbn(a, 0, status);
3645 }
3646
3647 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3648 {
3649     FloatParts64 p;
3650
3651     parts_sint_to_float(&p, a, scale, status);
3652     return bfloat16_round_pack_canonical(&p, status);
3653 }
3654
3655 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3656 {
3657     return int64_to_bfloat16_scalbn(a, scale, status);
3658 }
3659
3660 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3661 {
3662     return int64_to_bfloat16_scalbn(a, scale, status);
3663 }
3664
3665 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3666 {
3667     return int64_to_bfloat16_scalbn(a, 0, status);
3668 }
3669
3670 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3671 {
3672     return int64_to_bfloat16_scalbn(a, 0, status);
3673 }
3674
3675 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3676 {
3677     return int64_to_bfloat16_scalbn(a, 0, status);
3678 }
3679
3680 float128 int64_to_float128(int64_t a, float_status *status)
3681 {
3682     FloatParts128 p;
3683
3684     parts_sint_to_float(&p, a, 0, status);
3685     return float128_round_pack_canonical(&p, status);
3686 }
3687
3688 float128 int32_to_float128(int32_t a, float_status *status)
3689 {
3690     return int64_to_float128(a, status);
3691 }
3692
3693 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3694 {
3695     FloatParts128 p;
3696
3697     parts_sint_to_float(&p, a, 0, status);
3698     return floatx80_round_pack_canonical(&p, status);
3699 }
3700
3701 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3702 {
3703     return int64_to_floatx80(a, status);
3704 }
3705
3706 /*
3707  * Unsigned Integer to floating-point conversions
3708  */
3709
3710 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3711 {
3712     FloatParts64 p;
3713
3714     parts_uint_to_float(&p, a, scale, status);
3715     return float16_round_pack_canonical(&p, status);
3716 }
3717
3718 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3719 {
3720     return uint64_to_float16_scalbn(a, scale, status);
3721 }
3722
3723 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3724 {
3725     return uint64_to_float16_scalbn(a, scale, status);
3726 }
3727
3728 float16 uint64_to_float16(uint64_t a, float_status *status)
3729 {
3730     return uint64_to_float16_scalbn(a, 0, status);
3731 }
3732
3733 float16 uint32_to_float16(uint32_t a, float_status *status)
3734 {
3735     return uint64_to_float16_scalbn(a, 0, status);
3736 }
3737
3738 float16 uint16_to_float16(uint16_t a, float_status *status)
3739 {
3740     return uint64_to_float16_scalbn(a, 0, status);
3741 }
3742
3743 float16 uint8_to_float16(uint8_t a, float_status *status)
3744 {
3745     return uint64_to_float16_scalbn(a, 0, status);
3746 }
3747
3748 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3749 {
3750     FloatParts64 p;
3751
3752     /* Without scaling, there are no overflow concerns. */
3753     if (likely(scale == 0) && can_use_fpu(status)) {
3754         union_float32 ur;
3755         ur.h = a;
3756         return ur.s;
3757     }
3758
3759     parts_uint_to_float(&p, a, scale, status);
3760     return float32_round_pack_canonical(&p, status);
3761 }
3762
3763 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3764 {
3765     return uint64_to_float32_scalbn(a, scale, status);
3766 }
3767
3768 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3769 {
3770     return uint64_to_float32_scalbn(a, scale, status);
3771 }
3772
3773 float32 uint64_to_float32(uint64_t a, float_status *status)
3774 {
3775     return uint64_to_float32_scalbn(a, 0, status);
3776 }
3777
3778 float32 uint32_to_float32(uint32_t a, float_status *status)
3779 {
3780     return uint64_to_float32_scalbn(a, 0, status);
3781 }
3782
3783 float32 uint16_to_float32(uint16_t a, float_status *status)
3784 {
3785     return uint64_to_float32_scalbn(a, 0, status);
3786 }
3787
3788 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3789 {
3790     FloatParts64 p;
3791
3792     /* Without scaling, there are no overflow concerns. */
3793     if (likely(scale == 0) && can_use_fpu(status)) {
3794         union_float64 ur;
3795         ur.h = a;
3796         return ur.s;
3797     }
3798
3799     parts_uint_to_float(&p, a, scale, status);
3800     return float64_round_pack_canonical(&p, status);
3801 }
3802
3803 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3804 {
3805     return uint64_to_float64_scalbn(a, scale, status);
3806 }
3807
3808 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3809 {
3810     return uint64_to_float64_scalbn(a, scale, status);
3811 }
3812
3813 float64 uint64_to_float64(uint64_t a, float_status *status)
3814 {
3815     return uint64_to_float64_scalbn(a, 0, status);
3816 }
3817
3818 float64 uint32_to_float64(uint32_t a, float_status *status)
3819 {
3820     return uint64_to_float64_scalbn(a, 0, status);
3821 }
3822
3823 float64 uint16_to_float64(uint16_t a, float_status *status)
3824 {
3825     return uint64_to_float64_scalbn(a, 0, status);
3826 }
3827
3828 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3829 {
3830     FloatParts64 p;
3831
3832     parts_uint_to_float(&p, a, scale, status);
3833     return bfloat16_round_pack_canonical(&p, status);
3834 }
3835
3836 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3837 {
3838     return uint64_to_bfloat16_scalbn(a, scale, status);
3839 }
3840
3841 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3842 {
3843     return uint64_to_bfloat16_scalbn(a, scale, status);
3844 }
3845
3846 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3847 {
3848     return uint64_to_bfloat16_scalbn(a, 0, status);
3849 }
3850
3851 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3852 {
3853     return uint64_to_bfloat16_scalbn(a, 0, status);
3854 }
3855
3856 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3857 {
3858     return uint64_to_bfloat16_scalbn(a, 0, status);
3859 }
3860
3861 float128 uint64_to_float128(uint64_t a, float_status *status)
3862 {
3863     FloatParts128 p;
3864
3865     parts_uint_to_float(&p, a, 0, status);
3866     return float128_round_pack_canonical(&p, status);
3867 }
3868
3869 /*
3870  * Minimum and maximum
3871  */
3872
3873 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3874 {
3875     FloatParts64 pa, pb, *pr;
3876
3877     float16_unpack_canonical(&pa, a, s);
3878     float16_unpack_canonical(&pb, b, s);
3879     pr = parts_minmax(&pa, &pb, s, flags);
3880
3881     return float16_round_pack_canonical(pr, s);
3882 }
3883
3884 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3885                                 float_status *s, int flags)
3886 {
3887     FloatParts64 pa, pb, *pr;
3888
3889     bfloat16_unpack_canonical(&pa, a, s);
3890     bfloat16_unpack_canonical(&pb, b, s);
3891     pr = parts_minmax(&pa, &pb, s, flags);
3892
3893     return bfloat16_round_pack_canonical(pr, s);
3894 }
3895
3896 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3897 {
3898     FloatParts64 pa, pb, *pr;
3899
3900     float32_unpack_canonical(&pa, a, s);
3901     float32_unpack_canonical(&pb, b, s);
3902     pr = parts_minmax(&pa, &pb, s, flags);
3903
3904     return float32_round_pack_canonical(pr, s);
3905 }
3906
3907 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3908 {
3909     FloatParts64 pa, pb, *pr;
3910
3911     float64_unpack_canonical(&pa, a, s);
3912     float64_unpack_canonical(&pb, b, s);
3913     pr = parts_minmax(&pa, &pb, s, flags);
3914
3915     return float64_round_pack_canonical(pr, s);
3916 }
3917
3918 static float128 float128_minmax(float128 a, float128 b,
3919                                 float_status *s, int flags)
3920 {
3921     FloatParts128 pa, pb, *pr;
3922
3923     float128_unpack_canonical(&pa, a, s);
3924     float128_unpack_canonical(&pb, b, s);
3925     pr = parts_minmax(&pa, &pb, s, flags);
3926
3927     return float128_round_pack_canonical(pr, s);
3928 }
3929
3930 #define MINMAX_1(type, name, flags) \
3931     type type##_##name(type a, type b, float_status *s) \
3932     { return type##_minmax(a, b, s, flags); }
3933
3934 #define MINMAX_2(type) \
3935     MINMAX_1(type, max, 0)                                                \
3936     MINMAX_1(type, maxnum, minmax_isnum)                                  \
3937     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
3938     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
3939     MINMAX_1(type, min, minmax_ismin)                                     \
3940     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
3941     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
3942     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
3943
3944 MINMAX_2(float16)
3945 MINMAX_2(bfloat16)
3946 MINMAX_2(float32)
3947 MINMAX_2(float64)
3948 MINMAX_2(float128)
3949
3950 #undef MINMAX_1
3951 #undef MINMAX_2
3952
3953 /*
3954  * Floating point compare
3955  */
3956
3957 static FloatRelation QEMU_FLATTEN
3958 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3959 {
3960     FloatParts64 pa, pb;
3961
3962     float16_unpack_canonical(&pa, a, s);
3963     float16_unpack_canonical(&pb, b, s);
3964     return parts_compare(&pa, &pb, s, is_quiet);
3965 }
3966
3967 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3968 {
3969     return float16_do_compare(a, b, s, false);
3970 }
3971
3972 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3973 {
3974     return float16_do_compare(a, b, s, true);
3975 }
3976
3977 static FloatRelation QEMU_SOFTFLOAT_ATTR
3978 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3979 {
3980     FloatParts64 pa, pb;
3981
3982     float32_unpack_canonical(&pa, a, s);
3983     float32_unpack_canonical(&pb, b, s);
3984     return parts_compare(&pa, &pb, s, is_quiet);
3985 }
3986
3987 static FloatRelation QEMU_FLATTEN
3988 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3989 {
3990     union_float32 ua, ub;
3991
3992     ua.s = xa;
3993     ub.s = xb;
3994
3995     if (QEMU_NO_HARDFLOAT) {
3996         goto soft;
3997     }
3998
3999     float32_input_flush2(&ua.s, &ub.s, s);
4000     if (isgreaterequal(ua.h, ub.h)) {
4001         if (isgreater(ua.h, ub.h)) {
4002             return float_relation_greater;
4003         }
4004         return float_relation_equal;
4005     }
4006     if (likely(isless(ua.h, ub.h))) {
4007         return float_relation_less;
4008     }
4009     /*
4010      * The only condition remaining is unordered.
4011      * Fall through to set flags.
4012      */
4013  soft:
4014     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4015 }
4016
4017 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4018 {
4019     return float32_hs_compare(a, b, s, false);
4020 }
4021
4022 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4023 {
4024     return float32_hs_compare(a, b, s, true);
4025 }
4026
4027 static FloatRelation QEMU_SOFTFLOAT_ATTR
4028 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4029 {
4030     FloatParts64 pa, pb;
4031
4032     float64_unpack_canonical(&pa, a, s);
4033     float64_unpack_canonical(&pb, b, s);
4034     return parts_compare(&pa, &pb, s, is_quiet);
4035 }
4036
4037 static FloatRelation QEMU_FLATTEN
4038 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4039 {
4040     union_float64 ua, ub;
4041
4042     ua.s = xa;
4043     ub.s = xb;
4044
4045     if (QEMU_NO_HARDFLOAT) {
4046         goto soft;
4047     }
4048
4049     float64_input_flush2(&ua.s, &ub.s, s);
4050     if (isgreaterequal(ua.h, ub.h)) {
4051         if (isgreater(ua.h, ub.h)) {
4052             return float_relation_greater;
4053         }
4054         return float_relation_equal;
4055     }
4056     if (likely(isless(ua.h, ub.h))) {
4057         return float_relation_less;
4058     }
4059     /*
4060      * The only condition remaining is unordered.
4061      * Fall through to set flags.
4062      */
4063  soft:
4064     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4065 }
4066
4067 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4068 {
4069     return float64_hs_compare(a, b, s, false);
4070 }
4071
4072 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4073 {
4074     return float64_hs_compare(a, b, s, true);
4075 }
4076
4077 static FloatRelation QEMU_FLATTEN
4078 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4079 {
4080     FloatParts64 pa, pb;
4081
4082     bfloat16_unpack_canonical(&pa, a, s);
4083     bfloat16_unpack_canonical(&pb, b, s);
4084     return parts_compare(&pa, &pb, s, is_quiet);
4085 }
4086
4087 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4088 {
4089     return bfloat16_do_compare(a, b, s, false);
4090 }
4091
4092 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4093 {
4094     return bfloat16_do_compare(a, b, s, true);
4095 }
4096
4097 static FloatRelation QEMU_FLATTEN
4098 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4099 {
4100     FloatParts128 pa, pb;
4101
4102     float128_unpack_canonical(&pa, a, s);
4103     float128_unpack_canonical(&pb, b, s);
4104     return parts_compare(&pa, &pb, s, is_quiet);
4105 }
4106
4107 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4108 {
4109     return float128_do_compare(a, b, s, false);
4110 }
4111
4112 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4113 {
4114     return float128_do_compare(a, b, s, true);
4115 }
4116
4117 static FloatRelation QEMU_FLATTEN
4118 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4119 {
4120     FloatParts128 pa, pb;
4121
4122     if (!floatx80_unpack_canonical(&pa, a, s) ||
4123         !floatx80_unpack_canonical(&pb, b, s)) {
4124         return float_relation_unordered;
4125     }
4126     return parts_compare(&pa, &pb, s, is_quiet);
4127 }
4128
4129 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4130 {
4131     return floatx80_do_compare(a, b, s, false);
4132 }
4133
4134 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4135 {
4136     return floatx80_do_compare(a, b, s, true);
4137 }
4138
4139 /*
4140  * Scale by 2**N
4141  */
4142
4143 float16 float16_scalbn(float16 a, int n, float_status *status)
4144 {
4145     FloatParts64 p;
4146
4147     float16_unpack_canonical(&p, a, status);
4148     parts_scalbn(&p, n, status);
4149     return float16_round_pack_canonical(&p, status);
4150 }
4151
4152 float32 float32_scalbn(float32 a, int n, float_status *status)
4153 {
4154     FloatParts64 p;
4155
4156     float32_unpack_canonical(&p, a, status);
4157     parts_scalbn(&p, n, status);
4158     return float32_round_pack_canonical(&p, status);
4159 }
4160
4161 float64 float64_scalbn(float64 a, int n, float_status *status)
4162 {
4163     FloatParts64 p;
4164
4165     float64_unpack_canonical(&p, a, status);
4166     parts_scalbn(&p, n, status);
4167     return float64_round_pack_canonical(&p, status);
4168 }
4169
4170 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4171 {
4172     FloatParts64 p;
4173
4174     bfloat16_unpack_canonical(&p, a, status);
4175     parts_scalbn(&p, n, status);
4176     return bfloat16_round_pack_canonical(&p, status);
4177 }
4178
4179 float128 float128_scalbn(float128 a, int n, float_status *status)
4180 {
4181     FloatParts128 p;
4182
4183     float128_unpack_canonical(&p, a, status);
4184     parts_scalbn(&p, n, status);
4185     return float128_round_pack_canonical(&p, status);
4186 }
4187
4188 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4189 {
4190     FloatParts128 p;
4191
4192     if (!floatx80_unpack_canonical(&p, a, status)) {
4193         return floatx80_default_nan(status);
4194     }
4195     parts_scalbn(&p, n, status);
4196     return floatx80_round_pack_canonical(&p, status);
4197 }
4198
4199 /*
4200  * Square Root
4201  */
4202
4203 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4204 {
4205     FloatParts64 p;
4206
4207     float16_unpack_canonical(&p, a, status);
4208     parts_sqrt(&p, status, &float16_params);
4209     return float16_round_pack_canonical(&p, status);
4210 }
4211
4212 static float32 QEMU_SOFTFLOAT_ATTR
4213 soft_f32_sqrt(float32 a, float_status *status)
4214 {
4215     FloatParts64 p;
4216
4217     float32_unpack_canonical(&p, a, status);
4218     parts_sqrt(&p, status, &float32_params);
4219     return float32_round_pack_canonical(&p, status);
4220 }
4221
4222 static float64 QEMU_SOFTFLOAT_ATTR
4223 soft_f64_sqrt(float64 a, float_status *status)
4224 {
4225     FloatParts64 p;
4226
4227     float64_unpack_canonical(&p, a, status);
4228     parts_sqrt(&p, status, &float64_params);
4229     return float64_round_pack_canonical(&p, status);
4230 }
4231
4232 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4233 {
4234     union_float32 ua, ur;
4235
4236     ua.s = xa;
4237     if (unlikely(!can_use_fpu(s))) {
4238         goto soft;
4239     }
4240
4241     float32_input_flush1(&ua.s, s);
4242     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4243         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4244                        fpclassify(ua.h) == FP_ZERO) ||
4245                      signbit(ua.h))) {
4246             goto soft;
4247         }
4248     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4249                         float32_is_neg(ua.s))) {
4250         goto soft;
4251     }
4252     ur.h = sqrtf(ua.h);
4253     return ur.s;
4254
4255  soft:
4256     return soft_f32_sqrt(ua.s, s);
4257 }
4258
4259 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4260 {
4261     union_float64 ua, ur;
4262
4263     ua.s = xa;
4264     if (unlikely(!can_use_fpu(s))) {
4265         goto soft;
4266     }
4267
4268     float64_input_flush1(&ua.s, s);
4269     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4270         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4271                        fpclassify(ua.h) == FP_ZERO) ||
4272                      signbit(ua.h))) {
4273             goto soft;
4274         }
4275     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4276                         float64_is_neg(ua.s))) {
4277         goto soft;
4278     }
4279     ur.h = sqrt(ua.h);
4280     return ur.s;
4281
4282  soft:
4283     return soft_f64_sqrt(ua.s, s);
4284 }
4285
4286 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4287 {
4288     FloatParts64 p;
4289
4290     bfloat16_unpack_canonical(&p, a, status);
4291     parts_sqrt(&p, status, &bfloat16_params);
4292     return bfloat16_round_pack_canonical(&p, status);
4293 }
4294
4295 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4296 {
4297     FloatParts128 p;
4298
4299     float128_unpack_canonical(&p, a, status);
4300     parts_sqrt(&p, status, &float128_params);
4301     return float128_round_pack_canonical(&p, status);
4302 }
4303
4304 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4305 {
4306     FloatParts128 p;
4307
4308     if (!floatx80_unpack_canonical(&p, a, s)) {
4309         return floatx80_default_nan(s);
4310     }
4311     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4312     return floatx80_round_pack_canonical(&p, s);
4313 }
4314
4315 /*
4316  * log2
4317  */
4318 float32 float32_log2(float32 a, float_status *status)
4319 {
4320     FloatParts64 p;
4321
4322     float32_unpack_canonical(&p, a, status);
4323     parts_log2(&p, status, &float32_params);
4324     return float32_round_pack_canonical(&p, status);
4325 }
4326
4327 float64 float64_log2(float64 a, float_status *status)
4328 {
4329     FloatParts64 p;
4330
4331     float64_unpack_canonical(&p, a, status);
4332     parts_log2(&p, status, &float64_params);
4333     return float64_round_pack_canonical(&p, status);
4334 }
4335
4336 /*----------------------------------------------------------------------------
4337 | The pattern for a default generated NaN.
4338 *----------------------------------------------------------------------------*/
4339
4340 float16 float16_default_nan(float_status *status)
4341 {
4342     FloatParts64 p;
4343
4344     parts_default_nan(&p, status);
4345     p.frac >>= float16_params.frac_shift;
4346     return float16_pack_raw(&p);
4347 }
4348
4349 float32 float32_default_nan(float_status *status)
4350 {
4351     FloatParts64 p;
4352
4353     parts_default_nan(&p, status);
4354     p.frac >>= float32_params.frac_shift;
4355     return float32_pack_raw(&p);
4356 }
4357
4358 float64 float64_default_nan(float_status *status)
4359 {
4360     FloatParts64 p;
4361
4362     parts_default_nan(&p, status);
4363     p.frac >>= float64_params.frac_shift;
4364     return float64_pack_raw(&p);
4365 }
4366
4367 float128 float128_default_nan(float_status *status)
4368 {
4369     FloatParts128 p;
4370
4371     parts_default_nan(&p, status);
4372     frac_shr(&p, float128_params.frac_shift);
4373     return float128_pack_raw(&p);
4374 }
4375
4376 bfloat16 bfloat16_default_nan(float_status *status)
4377 {
4378     FloatParts64 p;
4379
4380     parts_default_nan(&p, status);
4381     p.frac >>= bfloat16_params.frac_shift;
4382     return bfloat16_pack_raw(&p);
4383 }
4384
4385 /*----------------------------------------------------------------------------
4386 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4387 *----------------------------------------------------------------------------*/
4388
4389 float16 float16_silence_nan(float16 a, float_status *status)
4390 {
4391     FloatParts64 p;
4392
4393     float16_unpack_raw(&p, a);
4394     p.frac <<= float16_params.frac_shift;
4395     parts_silence_nan(&p, status);
4396     p.frac >>= float16_params.frac_shift;
4397     return float16_pack_raw(&p);
4398 }
4399
4400 float32 float32_silence_nan(float32 a, float_status *status)
4401 {
4402     FloatParts64 p;
4403
4404     float32_unpack_raw(&p, a);
4405     p.frac <<= float32_params.frac_shift;
4406     parts_silence_nan(&p, status);
4407     p.frac >>= float32_params.frac_shift;
4408     return float32_pack_raw(&p);
4409 }
4410
4411 float64 float64_silence_nan(float64 a, float_status *status)
4412 {
4413     FloatParts64 p;
4414
4415     float64_unpack_raw(&p, a);
4416     p.frac <<= float64_params.frac_shift;
4417     parts_silence_nan(&p, status);
4418     p.frac >>= float64_params.frac_shift;
4419     return float64_pack_raw(&p);
4420 }
4421
4422 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4423 {
4424     FloatParts64 p;
4425
4426     bfloat16_unpack_raw(&p, a);
4427     p.frac <<= bfloat16_params.frac_shift;
4428     parts_silence_nan(&p, status);
4429     p.frac >>= bfloat16_params.frac_shift;
4430     return bfloat16_pack_raw(&p);
4431 }
4432
4433 float128 float128_silence_nan(float128 a, float_status *status)
4434 {
4435     FloatParts128 p;
4436
4437     float128_unpack_raw(&p, a);
4438     frac_shl(&p, float128_params.frac_shift);
4439     parts_silence_nan(&p, status);
4440     frac_shr(&p, float128_params.frac_shift);
4441     return float128_pack_raw(&p);
4442 }
4443
4444 /*----------------------------------------------------------------------------
4445 | If `a' is denormal and we are in flush-to-zero mode then set the
4446 | input-denormal exception and return zero. Otherwise just return the value.
4447 *----------------------------------------------------------------------------*/
4448
4449 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4450 {
4451     if (p.exp == 0 && p.frac != 0) {
4452         float_raise(float_flag_input_denormal, status);
4453         return true;
4454     }
4455
4456     return false;
4457 }
4458
4459 float16 float16_squash_input_denormal(float16 a, float_status *status)
4460 {
4461     if (status->flush_inputs_to_zero) {
4462         FloatParts64 p;
4463
4464         float16_unpack_raw(&p, a);
4465         if (parts_squash_denormal(p, status)) {
4466             return float16_set_sign(float16_zero, p.sign);
4467         }
4468     }
4469     return a;
4470 }
4471
4472 float32 float32_squash_input_denormal(float32 a, float_status *status)
4473 {
4474     if (status->flush_inputs_to_zero) {
4475         FloatParts64 p;
4476
4477         float32_unpack_raw(&p, a);
4478         if (parts_squash_denormal(p, status)) {
4479             return float32_set_sign(float32_zero, p.sign);
4480         }
4481     }
4482     return a;
4483 }
4484
4485 float64 float64_squash_input_denormal(float64 a, float_status *status)
4486 {
4487     if (status->flush_inputs_to_zero) {
4488         FloatParts64 p;
4489
4490         float64_unpack_raw(&p, a);
4491         if (parts_squash_denormal(p, status)) {
4492             return float64_set_sign(float64_zero, p.sign);
4493         }
4494     }
4495     return a;
4496 }
4497
4498 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4499 {
4500     if (status->flush_inputs_to_zero) {
4501         FloatParts64 p;
4502
4503         bfloat16_unpack_raw(&p, a);
4504         if (parts_squash_denormal(p, status)) {
4505             return bfloat16_set_sign(bfloat16_zero, p.sign);
4506         }
4507     }
4508     return a;
4509 }
4510
4511 /*----------------------------------------------------------------------------
4512 | Normalizes the subnormal extended double-precision floating-point value
4513 | represented by the denormalized significand `aSig'.  The normalized exponent
4514 | and significand are stored at the locations pointed to by `zExpPtr' and
4515 | `zSigPtr', respectively.
4516 *----------------------------------------------------------------------------*/
4517
4518 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4519                                 uint64_t *zSigPtr)
4520 {
4521     int8_t shiftCount;
4522
4523     shiftCount = clz64(aSig);
4524     *zSigPtr = aSig<<shiftCount;
4525     *zExpPtr = 1 - shiftCount;
4526 }
4527
4528 /*----------------------------------------------------------------------------
4529 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4530 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4531 | and returns the proper extended double-precision floating-point value
4532 | corresponding to the abstract input.  Ordinarily, the abstract value is
4533 | rounded and packed into the extended double-precision format, with the
4534 | inexact exception raised if the abstract input cannot be represented
4535 | exactly.  However, if the abstract value is too large, the overflow and
4536 | inexact exceptions are raised and an infinity or maximal finite value is
4537 | returned.  If the abstract value is too small, the input value is rounded to
4538 | a subnormal number, and the underflow and inexact exceptions are raised if
4539 | the abstract input cannot be represented exactly as a subnormal extended
4540 | double-precision floating-point number.
4541 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4542 | the result is rounded to the same number of bits as single or double
4543 | precision, respectively.  Otherwise, the result is rounded to the full
4544 | precision of the extended double-precision format.
4545 |     The input significand must be normalized or smaller.  If the input
4546 | significand is not normalized, `zExp' must be 0; in that case, the result
4547 | returned is a subnormal number, and it must not require rounding.  The
4548 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4549 | Floating-Point Arithmetic.
4550 *----------------------------------------------------------------------------*/
4551
4552 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4553                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4554                               float_status *status)
4555 {
4556     FloatRoundMode roundingMode;
4557     bool roundNearestEven, increment, isTiny;
4558     int64_t roundIncrement, roundMask, roundBits;
4559
4560     roundingMode = status->float_rounding_mode;
4561     roundNearestEven = ( roundingMode == float_round_nearest_even );
4562     switch (roundingPrecision) {
4563     case floatx80_precision_x:
4564         goto precision80;
4565     case floatx80_precision_d:
4566         roundIncrement = UINT64_C(0x0000000000000400);
4567         roundMask = UINT64_C(0x00000000000007FF);
4568         break;
4569     case floatx80_precision_s:
4570         roundIncrement = UINT64_C(0x0000008000000000);
4571         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4572         break;
4573     default:
4574         g_assert_not_reached();
4575     }
4576     zSig0 |= ( zSig1 != 0 );
4577     switch (roundingMode) {
4578     case float_round_nearest_even:
4579     case float_round_ties_away:
4580         break;
4581     case float_round_to_zero:
4582         roundIncrement = 0;
4583         break;
4584     case float_round_up:
4585         roundIncrement = zSign ? 0 : roundMask;
4586         break;
4587     case float_round_down:
4588         roundIncrement = zSign ? roundMask : 0;
4589         break;
4590     default:
4591         abort();
4592     }
4593     roundBits = zSig0 & roundMask;
4594     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4595         if (    ( 0x7FFE < zExp )
4596              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4597            ) {
4598             goto overflow;
4599         }
4600         if ( zExp <= 0 ) {
4601             if (status->flush_to_zero) {
4602                 float_raise(float_flag_output_denormal, status);
4603                 return packFloatx80(zSign, 0, 0);
4604             }
4605             isTiny = status->tininess_before_rounding
4606                   || (zExp < 0 )
4607                   || (zSig0 <= zSig0 + roundIncrement);
4608             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4609             zExp = 0;
4610             roundBits = zSig0 & roundMask;
4611             if (isTiny && roundBits) {
4612                 float_raise(float_flag_underflow, status);
4613             }
4614             if (roundBits) {
4615                 float_raise(float_flag_inexact, status);
4616             }
4617             zSig0 += roundIncrement;
4618             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4619             roundIncrement = roundMask + 1;
4620             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4621                 roundMask |= roundIncrement;
4622             }
4623             zSig0 &= ~ roundMask;
4624             return packFloatx80( zSign, zExp, zSig0 );
4625         }
4626     }
4627     if (roundBits) {
4628         float_raise(float_flag_inexact, status);
4629     }
4630     zSig0 += roundIncrement;
4631     if ( zSig0 < roundIncrement ) {
4632         ++zExp;
4633         zSig0 = UINT64_C(0x8000000000000000);
4634     }
4635     roundIncrement = roundMask + 1;
4636     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4637         roundMask |= roundIncrement;
4638     }
4639     zSig0 &= ~ roundMask;
4640     if ( zSig0 == 0 ) zExp = 0;
4641     return packFloatx80( zSign, zExp, zSig0 );
4642  precision80:
4643     switch (roundingMode) {
4644     case float_round_nearest_even:
4645     case float_round_ties_away:
4646         increment = ((int64_t)zSig1 < 0);
4647         break;
4648     case float_round_to_zero:
4649         increment = 0;
4650         break;
4651     case float_round_up:
4652         increment = !zSign && zSig1;
4653         break;
4654     case float_round_down:
4655         increment = zSign && zSig1;
4656         break;
4657     default:
4658         abort();
4659     }
4660     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4661         if (    ( 0x7FFE < zExp )
4662              || (    ( zExp == 0x7FFE )
4663                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4664                   && increment
4665                 )
4666            ) {
4667             roundMask = 0;
4668  overflow:
4669             float_raise(float_flag_overflow | float_flag_inexact, status);
4670             if (    ( roundingMode == float_round_to_zero )
4671                  || ( zSign && ( roundingMode == float_round_up ) )
4672                  || ( ! zSign && ( roundingMode == float_round_down ) )
4673                ) {
4674                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4675             }
4676             return packFloatx80(zSign,
4677                                 floatx80_infinity_high,
4678                                 floatx80_infinity_low);
4679         }
4680         if ( zExp <= 0 ) {
4681             isTiny = status->tininess_before_rounding
4682                   || (zExp < 0)
4683                   || !increment
4684                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4685             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4686             zExp = 0;
4687             if (isTiny && zSig1) {
4688                 float_raise(float_flag_underflow, status);
4689             }
4690             if (zSig1) {
4691                 float_raise(float_flag_inexact, status);
4692             }
4693             switch (roundingMode) {
4694             case float_round_nearest_even:
4695             case float_round_ties_away:
4696                 increment = ((int64_t)zSig1 < 0);
4697                 break;
4698             case float_round_to_zero:
4699                 increment = 0;
4700                 break;
4701             case float_round_up:
4702                 increment = !zSign && zSig1;
4703                 break;
4704             case float_round_down:
4705                 increment = zSign && zSig1;
4706                 break;
4707             default:
4708                 abort();
4709             }
4710             if ( increment ) {
4711                 ++zSig0;
4712                 if (!(zSig1 << 1) && roundNearestEven) {
4713                     zSig0 &= ~1;
4714                 }
4715                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4716             }
4717             return packFloatx80( zSign, zExp, zSig0 );
4718         }
4719     }
4720     if (zSig1) {
4721         float_raise(float_flag_inexact, status);
4722     }
4723     if ( increment ) {
4724         ++zSig0;
4725         if ( zSig0 == 0 ) {
4726             ++zExp;
4727             zSig0 = UINT64_C(0x8000000000000000);
4728         }
4729         else {
4730             if (!(zSig1 << 1) && roundNearestEven) {
4731                 zSig0 &= ~1;
4732             }
4733         }
4734     }
4735     else {
4736         if ( zSig0 == 0 ) zExp = 0;
4737     }
4738     return packFloatx80( zSign, zExp, zSig0 );
4739
4740 }
4741
4742 /*----------------------------------------------------------------------------
4743 | Takes an abstract floating-point value having sign `zSign', exponent
4744 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4745 | and returns the proper extended double-precision floating-point value
4746 | corresponding to the abstract input.  This routine is just like
4747 | `roundAndPackFloatx80' except that the input significand does not have to be
4748 | normalized.
4749 *----------------------------------------------------------------------------*/
4750
4751 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4752                                        bool zSign, int32_t zExp,
4753                                        uint64_t zSig0, uint64_t zSig1,
4754                                        float_status *status)
4755 {
4756     int8_t shiftCount;
4757
4758     if ( zSig0 == 0 ) {
4759         zSig0 = zSig1;
4760         zSig1 = 0;
4761         zExp -= 64;
4762     }
4763     shiftCount = clz64(zSig0);
4764     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4765     zExp -= shiftCount;
4766     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4767                                 zSig0, zSig1, status);
4768
4769 }
4770
4771 /*----------------------------------------------------------------------------
4772 | Returns the binary exponential of the single-precision floating-point value
4773 | `a'. The operation is performed according to the IEC/IEEE Standard for
4774 | Binary Floating-Point Arithmetic.
4775 |
4776 | Uses the following identities:
4777 |
4778 | 1. -------------------------------------------------------------------------
4779 |      x    x*ln(2)
4780 |     2  = e
4781 |
4782 | 2. -------------------------------------------------------------------------
4783 |                      2     3     4     5           n
4784 |      x        x     x     x     x     x           x
4785 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4786 |               1!    2!    3!    4!    5!          n!
4787 *----------------------------------------------------------------------------*/
4788
4789 static const float64 float32_exp2_coefficients[15] =
4790 {
4791     const_float64( 0x3ff0000000000000ll ), /*  1 */
4792     const_float64( 0x3fe0000000000000ll ), /*  2 */
4793     const_float64( 0x3fc5555555555555ll ), /*  3 */
4794     const_float64( 0x3fa5555555555555ll ), /*  4 */
4795     const_float64( 0x3f81111111111111ll ), /*  5 */
4796     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4797     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4798     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4799     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4800     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4801     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4802     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4803     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4804     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4805     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4806 };
4807
4808 float32 float32_exp2(float32 a, float_status *status)
4809 {
4810     FloatParts64 xp, xnp, tp, rp;
4811     int i;
4812
4813     float32_unpack_canonical(&xp, a, status);
4814     if (unlikely(xp.cls != float_class_normal)) {
4815         switch (xp.cls) {
4816         case float_class_snan:
4817         case float_class_qnan:
4818             parts_return_nan(&xp, status);
4819             return float32_round_pack_canonical(&xp, status);
4820         case float_class_inf:
4821             return xp.sign ? float32_zero : a;
4822         case float_class_zero:
4823             return float32_one;
4824         default:
4825             break;
4826         }
4827         g_assert_not_reached();
4828     }
4829
4830     float_raise(float_flag_inexact, status);
4831
4832     float64_unpack_canonical(&tp, float64_ln2, status);
4833     xp = *parts_mul(&xp, &tp, status);
4834     xnp = xp;
4835
4836     float64_unpack_canonical(&rp, float64_one, status);
4837     for (i = 0 ; i < 15 ; i++) {
4838         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4839         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4840         xnp = *parts_mul(&xnp, &xp, status);
4841     }
4842
4843     return float32_round_pack_canonical(&rp, status);
4844 }
4845
4846 /*----------------------------------------------------------------------------
4847 | Rounds the extended double-precision floating-point value `a'
4848 | to the precision provided by floatx80_rounding_precision and returns the
4849 | result as an extended double-precision floating-point value.
4850 | The operation is performed according to the IEC/IEEE Standard for Binary
4851 | Floating-Point Arithmetic.
4852 *----------------------------------------------------------------------------*/
4853
4854 floatx80 floatx80_round(floatx80 a, float_status *status)
4855 {
4856     FloatParts128 p;
4857
4858     if (!floatx80_unpack_canonical(&p, a, status)) {
4859         return floatx80_default_nan(status);
4860     }
4861     return floatx80_round_pack_canonical(&p, status);
4862 }
4863
4864 static void __attribute__((constructor)) softfloat_init(void)
4865 {
4866     union_float64 ua, ub, uc, ur;
4867
4868     if (QEMU_NO_HARDFLOAT) {
4869         return;
4870     }
4871     /*
4872      * Test that the host's FMA is not obviously broken. For example,
4873      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4874      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4875      */
4876     ua.s = 0x0020000000000001ULL;
4877     ub.s = 0x3ca0000000000000ULL;
4878     uc.s = 0x0020000000000000ULL;
4879     ur.h = fma(ua.h, ub.h, uc.h);
4880     if (ur.s != 0x0020000000000001ULL) {
4881         force_soft_fma = true;
4882     }
4883 }