fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439 };
 440
 441 /* Simple helpers for checking if, or what kind of, NaN we have */
 442 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 443 {
 444     return unlikely(c >= float_class_qnan);
 445 }
 446
 447 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 448 {
 449     return c == float_class_snan;
 450 }
 451
 452 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 453 {
 454     return c == float_class_qnan;
 455 }
 456
 457 /*
 458  * Structure holding all of the decomposed parts of a float.
 459  * The exponent is unbiased and the fraction is normalized.
 460  *
 461  * The fraction words are stored in big-endian word ordering,
 462  * so that truncation from a larger format to a smaller format
 463  * can be done simply by ignoring subsequent elements.
 464  */
 465
 466 typedef struct {
 467     FloatClass cls;
 468     bool sign;
 469     int32_t exp;
 470     union {
 471         /* Routines that know the structure may reference the singular name. */
 472         uint64_t frac;
 473         /*
 474          * Routines expanded with multiple structures reference "hi" and "lo"
 475          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 476          * both the same word and aliased here.
 477          */
 478         uint64_t frac_hi;
 479         uint64_t frac_lo;
 480     };
 481 } FloatParts64;
 482
 483 typedef struct {
 484     FloatClass cls;
 485     bool sign;
 486     int32_t exp;
 487     uint64_t frac_hi;
 488     uint64_t frac_lo;
 489 } FloatParts128;
 490
 491 typedef struct {
 492     FloatClass cls;
 493     bool sign;
 494     int32_t exp;
 495     uint64_t frac_hi;
 496     uint64_t frac_hm;  /* high-middle */
 497     uint64_t frac_lm;  /* low-middle */
 498     uint64_t frac_lo;
 499 } FloatParts256;
 500
 501 /* These apply to the most significant word of each FloatPartsN. */
 502 #define DECOMPOSED_BINARY_POINT    63
 503 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 504
 505 /* Structure holding all of the relevant parameters for a format.
 506  *   exp_size: the size of the exponent field
 507  *   exp_bias: the offset applied to the exponent field
 508  *   exp_max: the maximum normalised exponent
 509  *   frac_size: the size of the fraction field
 510  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 511  * The following are computed based the size of fraction
 512  *   round_mask: bits below lsb which must be rounded
 513  * The following optional modifiers are available:
 514  *   arm_althp: handle ARM Alternative Half Precision
 515  */
 516 typedef struct {
 517     int exp_size;
 518     int exp_bias;
 519     int exp_max;
 520     int frac_size;
 521     int frac_shift;
 522     bool arm_althp;
 523     uint64_t round_mask;
 524 } FloatFmt;
 525
 526 /* Expand fields based on the size of exponent and fraction */
 527 #define FLOAT_PARAMS_(E)                                \
 528     .exp_size       = E,                                \
 529     .exp_bias       = ((1 << E) - 1) >> 1,              \
 530     .exp_max        = (1 << E) - 1
 531
 532 #define FLOAT_PARAMS(E, F)                              \
 533     FLOAT_PARAMS_(E),                                   \
 534     .frac_size      = F,                                \
 535     .frac_shift     = (-F - 1) & 63,                    \
 536     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 537
 538 static const FloatFmt float16_params = {
 539     FLOAT_PARAMS(5, 10)
 540 };
 541
 542 static const FloatFmt float16_params_ahp = {
 543     FLOAT_PARAMS(5, 10),
 544     .arm_althp = true
 545 };
 546
 547 static const FloatFmt bfloat16_params = {
 548     FLOAT_PARAMS(8, 7)
 549 };
 550
 551 static const FloatFmt float32_params = {
 552     FLOAT_PARAMS(8, 23)
 553 };
 554
 555 static const FloatFmt float64_params = {
 556     FLOAT_PARAMS(11, 52)
 557 };
 558
 559 static const FloatFmt float128_params = {
 560     FLOAT_PARAMS(15, 112)
 561 };
 562
 563 #define FLOATX80_PARAMS(R)              \
 564     FLOAT_PARAMS_(15),                  \
 565     .frac_size = R == 64 ? 63 : R,      \
 566     .frac_shift = 0,                    \
 567     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 568
 569 static const FloatFmt floatx80_params[3] = {
 570     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 571     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 572     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 573 };
 574
 575 /* Unpack a float to parts, but do not canonicalize.  */
 576 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 577 {
 578     const int f_size = fmt->frac_size;
 579     const int e_size = fmt->exp_size;
 580
 581     *r = (FloatParts64) {
 582         .cls = float_class_unclassified,
 583         .sign = extract64(raw, f_size + e_size, 1),
 584         .exp = extract64(raw, f_size, e_size),
 585         .frac = extract64(raw, 0, f_size)
 586     };
 587 }
 588
 589 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 590 {
 591     unpack_raw64(p, &float16_params, f);
 592 }
 593
 594 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 595 {
 596     unpack_raw64(p, &bfloat16_params, f);
 597 }
 598
 599 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 600 {
 601     unpack_raw64(p, &float32_params, f);
 602 }
 603
 604 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 605 {
 606     unpack_raw64(p, &float64_params, f);
 607 }
 608
 609 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 610 {
 611     *p = (FloatParts128) {
 612         .cls = float_class_unclassified,
 613         .sign = extract32(f.high, 15, 1),
 614         .exp = extract32(f.high, 0, 15),
 615         .frac_hi = f.low
 616     };
 617 }
 618
 619 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 620 {
 621     const int f_size = float128_params.frac_size - 64;
 622     const int e_size = float128_params.exp_size;
 623
 624     *p = (FloatParts128) {
 625         .cls = float_class_unclassified,
 626         .sign = extract64(f.high, f_size + e_size, 1),
 627         .exp = extract64(f.high, f_size, e_size),
 628         .frac_hi = extract64(f.high, 0, f_size),
 629         .frac_lo = f.low,
 630     };
 631 }
 632
 633 /* Pack a float from parts, but do not canonicalize.  */
 634 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 635 {
 636     const int f_size = fmt->frac_size;
 637     const int e_size = fmt->exp_size;
 638     uint64_t ret;
 639
 640     ret = (uint64_t)p->sign << (f_size + e_size);
 641     ret = deposit64(ret, f_size, e_size, p->exp);
 642     ret = deposit64(ret, 0, f_size, p->frac);
 643     return ret;
 644 }
 645
 646 static inline float16 float16_pack_raw(const FloatParts64 *p)
 647 {
 648     return make_float16(pack_raw64(p, &float16_params));
 649 }
 650
 651 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 652 {
 653     return pack_raw64(p, &bfloat16_params);
 654 }
 655
 656 static inline float32 float32_pack_raw(const FloatParts64 *p)
 657 {
 658     return make_float32(pack_raw64(p, &float32_params));
 659 }
 660
 661 static inline float64 float64_pack_raw(const FloatParts64 *p)
 662 {
 663     return make_float64(pack_raw64(p, &float64_params));
 664 }
 665
 666 static float128 float128_pack_raw(const FloatParts128 *p)
 667 {
 668     const int f_size = float128_params.frac_size - 64;
 669     const int e_size = float128_params.exp_size;
 670     uint64_t hi;
 671
 672     hi = (uint64_t)p->sign << (f_size + e_size);
 673     hi = deposit64(hi, f_size, e_size, p->exp);
 674     hi = deposit64(hi, 0, f_size, p->frac_hi);
 675     return make_float128(hi, p->frac_lo);
 676 }
 677
 678 /*----------------------------------------------------------------------------
 679 | Functions and definitions to determine:  (1) whether tininess for underflow
 680 | is detected before or after rounding by default, (2) what (if anything)
 681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 683 | are propagated from function inputs to output.  These details are target-
 684 | specific.
 685 *----------------------------------------------------------------------------*/
 686 #include "softfloat-specialize.c.inc"
 687
 688 #define PARTS_GENERIC_64_128(NAME, P) \
 689     _Generic((P), FloatParts64 *: parts64_##NAME, \
 690                   FloatParts128 *: parts128_##NAME)
 691
 692 #define PARTS_GENERIC_64_128_256(NAME, P) \
 693     _Generic((P), FloatParts64 *: parts64_##NAME, \
 694                   FloatParts128 *: parts128_##NAME, \
 695                   FloatParts256 *: parts256_##NAME)
 696
 697 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 698 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 699
 700 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 701 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 702
 703 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 704
 705 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 706                                       float_status *s);
 707 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 708                                         float_status *s);
 709
 710 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 711
 712 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 713                                              FloatParts64 *c, float_status *s,
 714                                              int ab_mask, int abc_mask);
 715 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 716                                                FloatParts128 *b,
 717                                                FloatParts128 *c,
 718                                                float_status *s,
 719                                                int ab_mask, int abc_mask);
 720
 721 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 722     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 723
 724 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 725                                  const FloatFmt *fmt);
 726 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 727                                   const FloatFmt *fmt);
 728
 729 #define parts_canonicalize(A, S, F) \
 730     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 731
 732 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 733                                    const FloatFmt *fmt);
 734 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 735                                     const FloatFmt *fmt);
 736
 737 #define parts_uncanon_normal(A, S, F) \
 738     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 739
 740 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 741                             const FloatFmt *fmt);
 742 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 743                              const FloatFmt *fmt);
 744
 745 #define parts_uncanon(A, S, F) \
 746     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 747
 748 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 749 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 750 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 751
 752 #define parts_add_normal(A, B) \
 753     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 754
 755 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 756 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 757 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 758
 759 #define parts_sub_normal(A, B) \
 760     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 761
 762 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 763                                     float_status *s, bool subtract);
 764 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 765                                       float_status *s, bool subtract);
 766
 767 #define parts_addsub(A, B, S, Z) \
 768     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 769
 770 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 771                                  float_status *s);
 772 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 773                                    float_status *s);
 774
 775 #define parts_mul(A, B, S) \
 776     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 777
 778 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 779                                     FloatParts64 *c, int flags,
 780                                     float_status *s);
 781 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 782                                       FloatParts128 *c, int flags,
 783                                       float_status *s);
 784
 785 #define parts_muladd(A, B, C, Z, S) \
 786     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 787
 788 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 789                                  float_status *s);
 790 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 791                                    float_status *s);
 792
 793 #define parts_div(A, B, S) \
 794     PARTS_GENERIC_64_128(div, A)(A, B, S)
 795
 796 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 797                                     uint64_t *mod_quot, float_status *s);
 798 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 799                                       uint64_t *mod_quot, float_status *s);
 800
 801 #define parts_modrem(A, B, Q, S) \
 802     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 803
 804 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 805 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 806
 807 #define parts_sqrt(A, S, F) \
 808     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 809
 810 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 811                                         int scale, int frac_size);
 812 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 813                                          int scale, int frac_size);
 814
 815 #define parts_round_to_int_normal(A, R, C, F) \
 816     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 817
 818 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 819                                  int scale, float_status *s,
 820                                  const FloatFmt *fmt);
 821 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 822                                   int scale, float_status *s,
 823                                   const FloatFmt *fmt);
 824
 825 #define parts_round_to_int(A, R, C, S, F) \
 826     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 827
 828 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 829                                      int scale, int64_t min, int64_t max,
 830                                      float_status *s);
 831 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 832                                      int scale, int64_t min, int64_t max,
 833                                      float_status *s);
 834
 835 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 836     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 837
 838 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 839                                       int scale, uint64_t max,
 840                                       float_status *s);
 841 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 842                                        int scale, uint64_t max,
 843                                        float_status *s);
 844
 845 #define parts_float_to_uint(P, R, Z, M, S) \
 846     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 847
 848 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 849                                   int scale, float_status *s);
 850 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 851                                    int scale, float_status *s);
 852
 853 #define parts_sint_to_float(P, I, Z, S) \
 854     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 855
 856 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 857                                   int scale, float_status *s);
 858 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 859                                    int scale, float_status *s);
 860
 861 #define parts_uint_to_float(P, I, Z, S) \
 862     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 863
 864 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 865                                     float_status *s, int flags);
 866 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 867                                       float_status *s, int flags);
 868
 869 #define parts_minmax(A, B, S, F) \
 870     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 871
 872 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 873                            float_status *s, bool q);
 874 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 875                             float_status *s, bool q);
 876
 877 #define parts_compare(A, B, S, Q) \
 878     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 879
 880 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 881 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 882
 883 #define parts_scalbn(A, N, S) \
 884     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 885
 886 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 887 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 888
 889 #define parts_log2(A, S, F) \
 890     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 891
 892 /*
 893  * Helper functions for softfloat-parts.c.inc, per-size operations.
 894  */
 895
 896 #define FRAC_GENERIC_64_128(NAME, P) \
 897     _Generic((P), FloatParts64 *: frac64_##NAME, \
 898                   FloatParts128 *: frac128_##NAME)
 899
 900 #define FRAC_GENERIC_64_128_256(NAME, P) \
 901     _Generic((P), FloatParts64 *: frac64_##NAME, \
 902                   FloatParts128 *: frac128_##NAME, \
 903                   FloatParts256 *: frac256_##NAME)
 904
 905 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 906 {
 907     return uadd64_overflow(a->frac, b->frac, &r->frac);
 908 }
 909
 910 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 911 {
 912     bool c = 0;
 913     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 914     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 915     return c;
 916 }
 917
 918 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 919 {
 920     bool c = 0;
 921     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 922     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 923     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 924     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 925     return c;
 926 }
 927
 928 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 929
 930 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 931 {
 932     return uadd64_overflow(a->frac, c, &r->frac);
 933 }
 934
 935 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 936 {
 937     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 938     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 939 }
 940
 941 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 942
 943 static void frac64_allones(FloatParts64 *a)
 944 {
 945     a->frac = -1;
 946 }
 947
 948 static void frac128_allones(FloatParts128 *a)
 949 {
 950     a->frac_hi = a->frac_lo = -1;
 951 }
 952
 953 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 954
 955 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 956 {
 957     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 958 }
 959
 960 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 961 {
 962     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 963     if (ta == tb) {
 964         ta = a->frac_lo, tb = b->frac_lo;
 965         if (ta == tb) {
 966             return 0;
 967         }
 968     }
 969     return ta < tb ? -1 : 1;
 970 }
 971
 972 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 973
 974 static void frac64_clear(FloatParts64 *a)
 975 {
 976     a->frac = 0;
 977 }
 978
 979 static void frac128_clear(FloatParts128 *a)
 980 {
 981     a->frac_hi = a->frac_lo = 0;
 982 }
 983
 984 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 985
 986 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 987 {
 988     uint64_t n1, n0, r, q;
 989     bool ret;
 990
 991     /*
 992      * We want a 2*N / N-bit division to produce exactly an N-bit
 993      * result, so that we do not lose any precision and so that we
 994      * do not have to renormalize afterward.  If A.frac < B.frac,
 995      * then division would produce an (N-1)-bit result; shift A left
 996      * by one to produce the an N-bit result, and return true to
 997      * decrement the exponent to match.
 998      *
 999      * The udiv_qrnnd algorithm that we're using requires normalization,
1000      * i.e. the msb of the denominator must be set, which is already true.
1001      */
1002     ret = a->frac < b->frac;
1003     if (ret) {
1004         n0 = a->frac;
1005         n1 = 0;
1006     } else {
1007         n0 = a->frac >> 1;
1008         n1 = a->frac << 63;
1009     }
1010     q = udiv_qrnnd(&r, n0, n1, b->frac);
1011
1012     /* Set lsb if there is a remainder, to set inexact. */
1013     a->frac = q | (r != 0);
1014
1015     return ret;
1016 }
1017
1018 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1019 {
1020     uint64_t q0, q1, a0, a1, b0, b1;
1021     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1022     bool ret = false;
1023
1024     a0 = a->frac_hi, a1 = a->frac_lo;
1025     b0 = b->frac_hi, b1 = b->frac_lo;
1026
1027     ret = lt128(a0, a1, b0, b1);
1028     if (!ret) {
1029         a1 = shr_double(a0, a1, 1);
1030         a0 = a0 >> 1;
1031     }
1032
1033     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1034     q0 = estimateDiv128To64(a0, a1, b0);
1035
1036     /*
1037      * Estimate is high because B1 was not included (unless B1 == 0).
1038      * Reduce quotient and increase remainder until remainder is non-negative.
1039      * This loop will execute 0 to 2 times.
1040      */
1041     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1042     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1043     while (r0 != 0) {
1044         q0--;
1045         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1046     }
1047
1048     /* Repeat using the remainder, producing a second word of quotient. */
1049     q1 = estimateDiv128To64(r1, r2, b0);
1050     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1051     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1052     while (r1 != 0) {
1053         q1--;
1054         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1055     }
1056
1057     /* Any remainder indicates inexact; set sticky bit. */
1058     q1 |= (r2 | r3) != 0;
1059
1060     a->frac_hi = q0;
1061     a->frac_lo = q1;
1062     return ret;
1063 }
1064
1065 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1066
1067 static bool frac64_eqz(FloatParts64 *a)
1068 {
1069     return a->frac == 0;
1070 }
1071
1072 static bool frac128_eqz(FloatParts128 *a)
1073 {
1074     return (a->frac_hi | a->frac_lo) == 0;
1075 }
1076
1077 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1078
1079 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1080 {
1081     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1082 }
1083
1084 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1085 {
1086     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1087                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1088 }
1089
1090 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1091
1092 static void frac64_neg(FloatParts64 *a)
1093 {
1094     a->frac = -a->frac;
1095 }
1096
1097 static void frac128_neg(FloatParts128 *a)
1098 {
1099     bool c = 0;
1100     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1101     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1102 }
1103
1104 static void frac256_neg(FloatParts256 *a)
1105 {
1106     bool c = 0;
1107     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1108     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1109     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1110     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1111 }
1112
1113 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1114
1115 static int frac64_normalize(FloatParts64 *a)
1116 {
1117     if (a->frac) {
1118         int shift = clz64(a->frac);
1119         a->frac <<= shift;
1120         return shift;
1121     }
1122     return 64;
1123 }
1124
1125 static int frac128_normalize(FloatParts128 *a)
1126 {
1127     if (a->frac_hi) {
1128         int shl = clz64(a->frac_hi);
1129         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1130         a->frac_lo <<= shl;
1131         return shl;
1132     } else if (a->frac_lo) {
1133         int shl = clz64(a->frac_lo);
1134         a->frac_hi = a->frac_lo << shl;
1135         a->frac_lo = 0;
1136         return shl + 64;
1137     }
1138     return 128;
1139 }
1140
1141 static int frac256_normalize(FloatParts256 *a)
1142 {
1143     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1144     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1145     int ret, shl;
1146
1147     if (likely(a0)) {
1148         shl = clz64(a0);
1149         if (shl == 0) {
1150             return 0;
1151         }
1152         ret = shl;
1153     } else {
1154         if (a1) {
1155             ret = 64;
1156             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1157         } else if (a2) {
1158             ret = 128;
1159             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1160         } else if (a3) {
1161             ret = 192;
1162             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1163         } else {
1164             ret = 256;
1165             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1166             goto done;
1167         }
1168         shl = clz64(a0);
1169         if (shl == 0) {
1170             goto done;
1171         }
1172         ret += shl;
1173     }
1174
1175     a0 = shl_double(a0, a1, shl);
1176     a1 = shl_double(a1, a2, shl);
1177     a2 = shl_double(a2, a3, shl);
1178     a3 <<= shl;
1179
1180  done:
1181     a->frac_hi = a0;
1182     a->frac_hm = a1;
1183     a->frac_lm = a2;
1184     a->frac_lo = a3;
1185     return ret;
1186 }
1187
1188 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1189
1190 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1191 {
1192     uint64_t a0, a1, b0, t0, t1, q, quot;
1193     int exp_diff = a->exp - b->exp;
1194     int shift;
1195
1196     a0 = a->frac;
1197     a1 = 0;
1198
1199     if (exp_diff < -1) {
1200         if (mod_quot) {
1201             *mod_quot = 0;
1202         }
1203         return;
1204     }
1205     if (exp_diff == -1) {
1206         a0 >>= 1;
1207         exp_diff = 0;
1208     }
1209
1210     b0 = b->frac;
1211     quot = q = b0 <= a0;
1212     if (q) {
1213         a0 -= b0;
1214     }
1215
1216     exp_diff -= 64;
1217     while (exp_diff > 0) {
1218         q = estimateDiv128To64(a0, a1, b0);
1219         q = q > 2 ? q - 2 : 0;
1220         mul64To128(b0, q, &t0, &t1);
1221         sub128(a0, a1, t0, t1, &a0, &a1);
1222         shortShift128Left(a0, a1, 62, &a0, &a1);
1223         exp_diff -= 62;
1224         quot = (quot << 62) + q;
1225     }
1226
1227     exp_diff += 64;
1228     if (exp_diff > 0) {
1229         q = estimateDiv128To64(a0, a1, b0);
1230         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1231         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1232         sub128(a0, a1, t0, t1, &a0, &a1);
1233         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1234         while (le128(t0, t1, a0, a1)) {
1235             ++q;
1236             sub128(a0, a1, t0, t1, &a0, &a1);
1237         }
1238         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1239     } else {
1240         t0 = b0;
1241         t1 = 0;
1242     }
1243
1244     if (mod_quot) {
1245         *mod_quot = quot;
1246     } else {
1247         sub128(t0, t1, a0, a1, &t0, &t1);
1248         if (lt128(t0, t1, a0, a1) ||
1249             (eq128(t0, t1, a0, a1) && (q & 1))) {
1250             a0 = t0;
1251             a1 = t1;
1252             a->sign = !a->sign;
1253         }
1254     }
1255
1256     if (likely(a0)) {
1257         shift = clz64(a0);
1258         shortShift128Left(a0, a1, shift, &a0, &a1);
1259     } else if (likely(a1)) {
1260         shift = clz64(a1);
1261         a0 = a1 << shift;
1262         a1 = 0;
1263         shift += 64;
1264     } else {
1265         a->cls = float_class_zero;
1266         return;
1267     }
1268
1269     a->exp = b->exp + exp_diff - shift;
1270     a->frac = a0 | (a1 != 0);
1271 }
1272
1273 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1274                            uint64_t *mod_quot)
1275 {
1276     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1277     int exp_diff = a->exp - b->exp;
1278     int shift;
1279
1280     a0 = a->frac_hi;
1281     a1 = a->frac_lo;
1282     a2 = 0;
1283
1284     if (exp_diff < -1) {
1285         if (mod_quot) {
1286             *mod_quot = 0;
1287         }
1288         return;
1289     }
1290     if (exp_diff == -1) {
1291         shift128Right(a0, a1, 1, &a0, &a1);
1292         exp_diff = 0;
1293     }
1294
1295     b0 = b->frac_hi;
1296     b1 = b->frac_lo;
1297
1298     quot = q = le128(b0, b1, a0, a1);
1299     if (q) {
1300         sub128(a0, a1, b0, b1, &a0, &a1);
1301     }
1302
1303     exp_diff -= 64;
1304     while (exp_diff > 0) {
1305         q = estimateDiv128To64(a0, a1, b0);
1306         q = q > 4 ? q - 4 : 0;
1307         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1308         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1309         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1310         exp_diff -= 61;
1311         quot = (quot << 61) + q;
1312     }
1313
1314     exp_diff += 64;
1315     if (exp_diff > 0) {
1316         q = estimateDiv128To64(a0, a1, b0);
1317         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1318         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1319         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1320         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1321         while (le192(t0, t1, t2, a0, a1, a2)) {
1322             ++q;
1323             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1324         }
1325         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1326     } else {
1327         t0 = b0;
1328         t1 = b1;
1329         t2 = 0;
1330     }
1331
1332     if (mod_quot) {
1333         *mod_quot = quot;
1334     } else {
1335         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1336         if (lt192(t0, t1, t2, a0, a1, a2) ||
1337             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1338             a0 = t0;
1339             a1 = t1;
1340             a2 = t2;
1341             a->sign = !a->sign;
1342         }
1343     }
1344
1345     if (likely(a0)) {
1346         shift = clz64(a0);
1347         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1348     } else if (likely(a1)) {
1349         shift = clz64(a1);
1350         shortShift128Left(a1, a2, shift, &a0, &a1);
1351         a2 = 0;
1352         shift += 64;
1353     } else if (likely(a2)) {
1354         shift = clz64(a2);
1355         a0 = a2 << shift;
1356         a1 = a2 = 0;
1357         shift += 128;
1358     } else {
1359         a->cls = float_class_zero;
1360         return;
1361     }
1362
1363     a->exp = b->exp + exp_diff - shift;
1364     a->frac_hi = a0;
1365     a->frac_lo = a1 | (a2 != 0);
1366 }
1367
1368 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1369
1370 static void frac64_shl(FloatParts64 *a, int c)
1371 {
1372     a->frac <<= c;
1373 }
1374
1375 static void frac128_shl(FloatParts128 *a, int c)
1376 {
1377     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1378
1379     if (c & 64) {
1380         a0 = a1, a1 = 0;
1381     }
1382
1383     c &= 63;
1384     if (c) {
1385         a0 = shl_double(a0, a1, c);
1386         a1 = a1 << c;
1387     }
1388
1389     a->frac_hi = a0;
1390     a->frac_lo = a1;
1391 }
1392
1393 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1394
1395 static void frac64_shr(FloatParts64 *a, int c)
1396 {
1397     a->frac >>= c;
1398 }
1399
1400 static void frac128_shr(FloatParts128 *a, int c)
1401 {
1402     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1403
1404     if (c & 64) {
1405         a1 = a0, a0 = 0;
1406     }
1407
1408     c &= 63;
1409     if (c) {
1410         a1 = shr_double(a0, a1, c);
1411         a0 = a0 >> c;
1412     }
1413
1414     a->frac_hi = a0;
1415     a->frac_lo = a1;
1416 }
1417
1418 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1419
1420 static void frac64_shrjam(FloatParts64 *a, int c)
1421 {
1422     uint64_t a0 = a->frac;
1423
1424     if (likely(c != 0)) {
1425         if (likely(c < 64)) {
1426             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1427         } else {
1428             a0 = a0 != 0;
1429         }
1430         a->frac = a0;
1431     }
1432 }
1433
1434 static void frac128_shrjam(FloatParts128 *a, int c)
1435 {
1436     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1437     uint64_t sticky = 0;
1438
1439     if (unlikely(c == 0)) {
1440         return;
1441     } else if (likely(c < 64)) {
1442         /* nothing */
1443     } else if (likely(c < 128)) {
1444         sticky = a1;
1445         a1 = a0;
1446         a0 = 0;
1447         c &= 63;
1448         if (c == 0) {
1449             goto done;
1450         }
1451     } else {
1452         sticky = a0 | a1;
1453         a0 = a1 = 0;
1454         goto done;
1455     }
1456
1457     sticky |= shr_double(a1, 0, c);
1458     a1 = shr_double(a0, a1, c);
1459     a0 = a0 >> c;
1460
1461  done:
1462     a->frac_lo = a1 | (sticky != 0);
1463     a->frac_hi = a0;
1464 }
1465
1466 static void frac256_shrjam(FloatParts256 *a, int c)
1467 {
1468     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1469     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1470     uint64_t sticky = 0;
1471
1472     if (unlikely(c == 0)) {
1473         return;
1474     } else if (likely(c < 64)) {
1475         /* nothing */
1476     } else if (likely(c < 256)) {
1477         if (unlikely(c & 128)) {
1478             sticky |= a2 | a3;
1479             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1480         }
1481         if (unlikely(c & 64)) {
1482             sticky |= a3;
1483             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1484         }
1485         c &= 63;
1486         if (c == 0) {
1487             goto done;
1488         }
1489     } else {
1490         sticky = a0 | a1 | a2 | a3;
1491         a0 = a1 = a2 = a3 = 0;
1492         goto done;
1493     }
1494
1495     sticky |= shr_double(a3, 0, c);
1496     a3 = shr_double(a2, a3, c);
1497     a2 = shr_double(a1, a2, c);
1498     a1 = shr_double(a0, a1, c);
1499     a0 = a0 >> c;
1500
1501  done:
1502     a->frac_lo = a3 | (sticky != 0);
1503     a->frac_lm = a2;
1504     a->frac_hm = a1;
1505     a->frac_hi = a0;
1506 }
1507
1508 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1509
1510 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1511 {
1512     return usub64_overflow(a->frac, b->frac, &r->frac);
1513 }
1514
1515 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1516 {
1517     bool c = 0;
1518     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1519     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1520     return c;
1521 }
1522
1523 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1524 {
1525     bool c = 0;
1526     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1527     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1528     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1529     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1530     return c;
1531 }
1532
1533 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1534
1535 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1536 {
1537     r->frac = a->frac_hi | (a->frac_lo != 0);
1538 }
1539
1540 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1541 {
1542     r->frac_hi = a->frac_hi;
1543     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1544 }
1545
1546 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1547
1548 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1549 {
1550     r->frac_hi = a->frac;
1551     r->frac_lo = 0;
1552 }
1553
1554 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1555 {
1556     r->frac_hi = a->frac_hi;
1557     r->frac_hm = a->frac_lo;
1558     r->frac_lm = 0;
1559     r->frac_lo = 0;
1560 }
1561
1562 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1563
1564 /*
1565  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1566  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1567  * and thus MIT licenced.
1568  */
1569 static const uint16_t rsqrt_tab[128] = {
1570     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1571     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1572     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1573     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1574     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1575     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1576     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1577     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1578     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1579     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1580     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1581     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1582     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1583     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1584     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1585     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1586 };
1587
1588 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1589 #define FloatPartsN    glue(FloatParts,N)
1590 #define FloatPartsW    glue(FloatParts,W)
1591
1592 #define N 64
1593 #define W 128
1594
1595 #include "softfloat-parts-addsub.c.inc"
1596 #include "softfloat-parts.c.inc"
1597
1598 #undef  N
1599 #undef  W
1600 #define N 128
1601 #define W 256
1602
1603 #include "softfloat-parts-addsub.c.inc"
1604 #include "softfloat-parts.c.inc"
1605
1606 #undef  N
1607 #undef  W
1608 #define N            256
1609
1610 #include "softfloat-parts-addsub.c.inc"
1611
1612 #undef  N
1613 #undef  W
1614 #undef  partsN
1615 #undef  FloatPartsN
1616 #undef  FloatPartsW
1617
1618 /*
1619  * Pack/unpack routines with a specific FloatFmt.
1620  */
1621
1622 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1623                                       float_status *s, const FloatFmt *params)
1624 {
1625     float16_unpack_raw(p, f);
1626     parts_canonicalize(p, s, params);
1627 }
1628
1629 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1630                                      float_status *s)
1631 {
1632     float16a_unpack_canonical(p, f, s, &float16_params);
1633 }
1634
1635 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1636                                       float_status *s)
1637 {
1638     bfloat16_unpack_raw(p, f);
1639     parts_canonicalize(p, s, &bfloat16_params);
1640 }
1641
1642 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1643                                              float_status *s,
1644                                              const FloatFmt *params)
1645 {
1646     parts_uncanon(p, s, params);
1647     return float16_pack_raw(p);
1648 }
1649
1650 static float16 float16_round_pack_canonical(FloatParts64 *p,
1651                                             float_status *s)
1652 {
1653     return float16a_round_pack_canonical(p, s, &float16_params);
1654 }
1655
1656 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1657                                               float_status *s)
1658 {
1659     parts_uncanon(p, s, &bfloat16_params);
1660     return bfloat16_pack_raw(p);
1661 }
1662
1663 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1664                                      float_status *s)
1665 {
1666     float32_unpack_raw(p, f);
1667     parts_canonicalize(p, s, &float32_params);
1668 }
1669
1670 static float32 float32_round_pack_canonical(FloatParts64 *p,
1671                                             float_status *s)
1672 {
1673     parts_uncanon(p, s, &float32_params);
1674     return float32_pack_raw(p);
1675 }
1676
1677 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1678                                      float_status *s)
1679 {
1680     float64_unpack_raw(p, f);
1681     parts_canonicalize(p, s, &float64_params);
1682 }
1683
1684 static float64 float64_round_pack_canonical(FloatParts64 *p,
1685                                             float_status *s)
1686 {
1687     parts_uncanon(p, s, &float64_params);
1688     return float64_pack_raw(p);
1689 }
1690
1691 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1692                                       float_status *s)
1693 {
1694     float128_unpack_raw(p, f);
1695     parts_canonicalize(p, s, &float128_params);
1696 }
1697
1698 static float128 float128_round_pack_canonical(FloatParts128 *p,
1699                                               float_status *s)
1700 {
1701     parts_uncanon(p, s, &float128_params);
1702     return float128_pack_raw(p);
1703 }
1704
1705 /* Returns false if the encoding is invalid. */
1706 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1707                                       float_status *s)
1708 {
1709     /* Ensure rounding precision is set before beginning. */
1710     switch (s->floatx80_rounding_precision) {
1711     case floatx80_precision_x:
1712     case floatx80_precision_d:
1713     case floatx80_precision_s:
1714         break;
1715     default:
1716         g_assert_not_reached();
1717     }
1718
1719     if (unlikely(floatx80_invalid_encoding(f))) {
1720         float_raise(float_flag_invalid, s);
1721         return false;
1722     }
1723
1724     floatx80_unpack_raw(p, f);
1725
1726     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1727         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1728     } else {
1729         /* The explicit integer bit is ignored, after invalid checks. */
1730         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1731         p->cls = (p->frac_hi == 0 ? float_class_inf
1732                   : parts_is_snan_frac(p->frac_hi, s)
1733                   ? float_class_snan : float_class_qnan);
1734     }
1735     return true;
1736 }
1737
1738 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1739                                               float_status *s)
1740 {
1741     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1742     uint64_t frac;
1743     int exp;
1744
1745     switch (p->cls) {
1746     case float_class_normal:
1747         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1748             parts_uncanon_normal(p, s, fmt);
1749             frac = p->frac_hi;
1750             exp = p->exp;
1751         } else {
1752             FloatParts64 p64;
1753
1754             p64.sign = p->sign;
1755             p64.exp = p->exp;
1756             frac_truncjam(&p64, p);
1757             parts_uncanon_normal(&p64, s, fmt);
1758             frac = p64.frac;
1759             exp = p64.exp;
1760         }
1761         if (exp != fmt->exp_max) {
1762             break;
1763         }
1764         /* rounded to inf -- fall through to set frac correctly */
1765
1766     case float_class_inf:
1767         /* x86 and m68k differ in the setting of the integer bit. */
1768         frac = floatx80_infinity_low;
1769         exp = fmt->exp_max;
1770         break;
1771
1772     case float_class_zero:
1773         frac = 0;
1774         exp = 0;
1775         break;
1776
1777     case float_class_snan:
1778     case float_class_qnan:
1779         /* NaNs have the integer bit set. */
1780         frac = p->frac_hi | (1ull << 63);
1781         exp = fmt->exp_max;
1782         break;
1783
1784     default:
1785         g_assert_not_reached();
1786     }
1787
1788     return packFloatx80(p->sign, exp, frac);
1789 }
1790
1791 /*
1792  * Addition and subtraction
1793  */
1794
1795 static float16 QEMU_FLATTEN
1796 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1797 {
1798     FloatParts64 pa, pb, *pr;
1799
1800     float16_unpack_canonical(&pa, a, status);
1801     float16_unpack_canonical(&pb, b, status);
1802     pr = parts_addsub(&pa, &pb, status, subtract);
1803
1804     return float16_round_pack_canonical(pr, status);
1805 }
1806
1807 float16 float16_add(float16 a, float16 b, float_status *status)
1808 {
1809     return float16_addsub(a, b, status, false);
1810 }
1811
1812 float16 float16_sub(float16 a, float16 b, float_status *status)
1813 {
1814     return float16_addsub(a, b, status, true);
1815 }
1816
1817 static float32 QEMU_SOFTFLOAT_ATTR
1818 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1819 {
1820     FloatParts64 pa, pb, *pr;
1821
1822     float32_unpack_canonical(&pa, a, status);
1823     float32_unpack_canonical(&pb, b, status);
1824     pr = parts_addsub(&pa, &pb, status, subtract);
1825
1826     return float32_round_pack_canonical(pr, status);
1827 }
1828
1829 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1830 {
1831     return soft_f32_addsub(a, b, status, false);
1832 }
1833
1834 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1835 {
1836     return soft_f32_addsub(a, b, status, true);
1837 }
1838
1839 static float64 QEMU_SOFTFLOAT_ATTR
1840 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1841 {
1842     FloatParts64 pa, pb, *pr;
1843
1844     float64_unpack_canonical(&pa, a, status);
1845     float64_unpack_canonical(&pb, b, status);
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847
1848     return float64_round_pack_canonical(pr, status);
1849 }
1850
1851 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1852 {
1853     return soft_f64_addsub(a, b, status, false);
1854 }
1855
1856 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1857 {
1858     return soft_f64_addsub(a, b, status, true);
1859 }
1860
1861 static float hard_f32_add(float a, float b)
1862 {
1863     return a + b;
1864 }
1865
1866 static float hard_f32_sub(float a, float b)
1867 {
1868     return a - b;
1869 }
1870
1871 static double hard_f64_add(double a, double b)
1872 {
1873     return a + b;
1874 }
1875
1876 static double hard_f64_sub(double a, double b)
1877 {
1878     return a - b;
1879 }
1880
1881 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1882 {
1883     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1884         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1885     }
1886     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1887 }
1888
1889 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1890 {
1891     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1892         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1893     } else {
1894         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1895     }
1896 }
1897
1898 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1899                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1900 {
1901     return float32_gen2(a, b, s, hard, soft,
1902                         f32_is_zon2, f32_addsubmul_post);
1903 }
1904
1905 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1906                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1907 {
1908     return float64_gen2(a, b, s, hard, soft,
1909                         f64_is_zon2, f64_addsubmul_post);
1910 }
1911
1912 float32 QEMU_FLATTEN
1913 float32_add(float32 a, float32 b, float_status *s)
1914 {
1915     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1916 }
1917
1918 float32 QEMU_FLATTEN
1919 float32_sub(float32 a, float32 b, float_status *s)
1920 {
1921     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1922 }
1923
1924 float64 QEMU_FLATTEN
1925 float64_add(float64 a, float64 b, float_status *s)
1926 {
1927     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1928 }
1929
1930 float64 QEMU_FLATTEN
1931 float64_sub(float64 a, float64 b, float_status *s)
1932 {
1933     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1934 }
1935
1936 static bfloat16 QEMU_FLATTEN
1937 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1938 {
1939     FloatParts64 pa, pb, *pr;
1940
1941     bfloat16_unpack_canonical(&pa, a, status);
1942     bfloat16_unpack_canonical(&pb, b, status);
1943     pr = parts_addsub(&pa, &pb, status, subtract);
1944
1945     return bfloat16_round_pack_canonical(pr, status);
1946 }
1947
1948 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1949 {
1950     return bfloat16_addsub(a, b, status, false);
1951 }
1952
1953 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1954 {
1955     return bfloat16_addsub(a, b, status, true);
1956 }
1957
1958 static float128 QEMU_FLATTEN
1959 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1960 {
1961     FloatParts128 pa, pb, *pr;
1962
1963     float128_unpack_canonical(&pa, a, status);
1964     float128_unpack_canonical(&pb, b, status);
1965     pr = parts_addsub(&pa, &pb, status, subtract);
1966
1967     return float128_round_pack_canonical(pr, status);
1968 }
1969
1970 float128 float128_add(float128 a, float128 b, float_status *status)
1971 {
1972     return float128_addsub(a, b, status, false);
1973 }
1974
1975 float128 float128_sub(float128 a, float128 b, float_status *status)
1976 {
1977     return float128_addsub(a, b, status, true);
1978 }
1979
1980 static floatx80 QEMU_FLATTEN
1981 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1982 {
1983     FloatParts128 pa, pb, *pr;
1984
1985     if (!floatx80_unpack_canonical(&pa, a, status) ||
1986         !floatx80_unpack_canonical(&pb, b, status)) {
1987         return floatx80_default_nan(status);
1988     }
1989
1990     pr = parts_addsub(&pa, &pb, status, subtract);
1991     return floatx80_round_pack_canonical(pr, status);
1992 }
1993
1994 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1995 {
1996     return floatx80_addsub(a, b, status, false);
1997 }
1998
1999 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2000 {
2001     return floatx80_addsub(a, b, status, true);
2002 }
2003
2004 /*
2005  * Multiplication
2006  */
2007
2008 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2009 {
2010     FloatParts64 pa, pb, *pr;
2011
2012     float16_unpack_canonical(&pa, a, status);
2013     float16_unpack_canonical(&pb, b, status);
2014     pr = parts_mul(&pa, &pb, status);
2015
2016     return float16_round_pack_canonical(pr, status);
2017 }
2018
2019 static float32 QEMU_SOFTFLOAT_ATTR
2020 soft_f32_mul(float32 a, float32 b, float_status *status)
2021 {
2022     FloatParts64 pa, pb, *pr;
2023
2024     float32_unpack_canonical(&pa, a, status);
2025     float32_unpack_canonical(&pb, b, status);
2026     pr = parts_mul(&pa, &pb, status);
2027
2028     return float32_round_pack_canonical(pr, status);
2029 }
2030
2031 static float64 QEMU_SOFTFLOAT_ATTR
2032 soft_f64_mul(float64 a, float64 b, float_status *status)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035
2036     float64_unpack_canonical(&pa, a, status);
2037     float64_unpack_canonical(&pb, b, status);
2038     pr = parts_mul(&pa, &pb, status);
2039
2040     return float64_round_pack_canonical(pr, status);
2041 }
2042
2043 static float hard_f32_mul(float a, float b)
2044 {
2045     return a * b;
2046 }
2047
2048 static double hard_f64_mul(double a, double b)
2049 {
2050     return a * b;
2051 }
2052
2053 float32 QEMU_FLATTEN
2054 float32_mul(float32 a, float32 b, float_status *s)
2055 {
2056     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2057                         f32_is_zon2, f32_addsubmul_post);
2058 }
2059
2060 float64 QEMU_FLATTEN
2061 float64_mul(float64 a, float64 b, float_status *s)
2062 {
2063     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2064                         f64_is_zon2, f64_addsubmul_post);
2065 }
2066
2067 bfloat16 QEMU_FLATTEN
2068 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2069 {
2070     FloatParts64 pa, pb, *pr;
2071
2072     bfloat16_unpack_canonical(&pa, a, status);
2073     bfloat16_unpack_canonical(&pb, b, status);
2074     pr = parts_mul(&pa, &pb, status);
2075
2076     return bfloat16_round_pack_canonical(pr, status);
2077 }
2078
2079 float128 QEMU_FLATTEN
2080 float128_mul(float128 a, float128 b, float_status *status)
2081 {
2082     FloatParts128 pa, pb, *pr;
2083
2084     float128_unpack_canonical(&pa, a, status);
2085     float128_unpack_canonical(&pb, b, status);
2086     pr = parts_mul(&pa, &pb, status);
2087
2088     return float128_round_pack_canonical(pr, status);
2089 }
2090
2091 floatx80 QEMU_FLATTEN
2092 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2093 {
2094     FloatParts128 pa, pb, *pr;
2095
2096     if (!floatx80_unpack_canonical(&pa, a, status) ||
2097         !floatx80_unpack_canonical(&pb, b, status)) {
2098         return floatx80_default_nan(status);
2099     }
2100
2101     pr = parts_mul(&pa, &pb, status);
2102     return floatx80_round_pack_canonical(pr, status);
2103 }
2104
2105 /*
2106  * Fused multiply-add
2107  */
2108
2109 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2110                                     int flags, float_status *status)
2111 {
2112     FloatParts64 pa, pb, pc, *pr;
2113
2114     float16_unpack_canonical(&pa, a, status);
2115     float16_unpack_canonical(&pb, b, status);
2116     float16_unpack_canonical(&pc, c, status);
2117     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2118
2119     return float16_round_pack_canonical(pr, status);
2120 }
2121
2122 static float32 QEMU_SOFTFLOAT_ATTR
2123 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2124                 float_status *status)
2125 {
2126     FloatParts64 pa, pb, pc, *pr;
2127
2128     float32_unpack_canonical(&pa, a, status);
2129     float32_unpack_canonical(&pb, b, status);
2130     float32_unpack_canonical(&pc, c, status);
2131     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2132
2133     return float32_round_pack_canonical(pr, status);
2134 }
2135
2136 static float64 QEMU_SOFTFLOAT_ATTR
2137 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2138                 float_status *status)
2139 {
2140     FloatParts64 pa, pb, pc, *pr;
2141
2142     float64_unpack_canonical(&pa, a, status);
2143     float64_unpack_canonical(&pb, b, status);
2144     float64_unpack_canonical(&pc, c, status);
2145     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2146
2147     return float64_round_pack_canonical(pr, status);
2148 }
2149
2150 static bool force_soft_fma;
2151
2152 float32 QEMU_FLATTEN
2153 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2154 {
2155     union_float32 ua, ub, uc, ur;
2156
2157     ua.s = xa;
2158     ub.s = xb;
2159     uc.s = xc;
2160
2161     if (unlikely(!can_use_fpu(s))) {
2162         goto soft;
2163     }
2164     if (unlikely(flags & float_muladd_halve_result)) {
2165         goto soft;
2166     }
2167
2168     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2169     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2170         goto soft;
2171     }
2172
2173     if (unlikely(force_soft_fma)) {
2174         goto soft;
2175     }
2176
2177     /*
2178      * When (a || b) == 0, there's no need to check for under/over flow,
2179      * since we know the addend is (normal || 0) and the product is 0.
2180      */
2181     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2182         union_float32 up;
2183         bool prod_sign;
2184
2185         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2186         prod_sign ^= !!(flags & float_muladd_negate_product);
2187         up.s = float32_set_sign(float32_zero, prod_sign);
2188
2189         if (flags & float_muladd_negate_c) {
2190             uc.h = -uc.h;
2191         }
2192         ur.h = up.h + uc.h;
2193     } else {
2194         union_float32 ua_orig = ua;
2195         union_float32 uc_orig = uc;
2196
2197         if (flags & float_muladd_negate_product) {
2198             ua.h = -ua.h;
2199         }
2200         if (flags & float_muladd_negate_c) {
2201             uc.h = -uc.h;
2202         }
2203
2204         ur.h = fmaf(ua.h, ub.h, uc.h);
2205
2206         if (unlikely(f32_is_inf(ur))) {
2207             float_raise(float_flag_overflow, s);
2208         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2209             ua = ua_orig;
2210             uc = uc_orig;
2211             goto soft;
2212         }
2213     }
2214     if (flags & float_muladd_negate_result) {
2215         return float32_chs(ur.s);
2216     }
2217     return ur.s;
2218
2219  soft:
2220     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2221 }
2222
2223 float64 QEMU_FLATTEN
2224 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2225 {
2226     union_float64 ua, ub, uc, ur;
2227
2228     ua.s = xa;
2229     ub.s = xb;
2230     uc.s = xc;
2231
2232     if (unlikely(!can_use_fpu(s))) {
2233         goto soft;
2234     }
2235     if (unlikely(flags & float_muladd_halve_result)) {
2236         goto soft;
2237     }
2238
2239     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2240     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2241         goto soft;
2242     }
2243
2244     if (unlikely(force_soft_fma)) {
2245         goto soft;
2246     }
2247
2248     /*
2249      * When (a || b) == 0, there's no need to check for under/over flow,
2250      * since we know the addend is (normal || 0) and the product is 0.
2251      */
2252     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2253         union_float64 up;
2254         bool prod_sign;
2255
2256         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2257         prod_sign ^= !!(flags & float_muladd_negate_product);
2258         up.s = float64_set_sign(float64_zero, prod_sign);
2259
2260         if (flags & float_muladd_negate_c) {
2261             uc.h = -uc.h;
2262         }
2263         ur.h = up.h + uc.h;
2264     } else {
2265         union_float64 ua_orig = ua;
2266         union_float64 uc_orig = uc;
2267
2268         if (flags & float_muladd_negate_product) {
2269             ua.h = -ua.h;
2270         }
2271         if (flags & float_muladd_negate_c) {
2272             uc.h = -uc.h;
2273         }
2274
2275         ur.h = fma(ua.h, ub.h, uc.h);
2276
2277         if (unlikely(f64_is_inf(ur))) {
2278             float_raise(float_flag_overflow, s);
2279         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2280             ua = ua_orig;
2281             uc = uc_orig;
2282             goto soft;
2283         }
2284     }
2285     if (flags & float_muladd_negate_result) {
2286         return float64_chs(ur.s);
2287     }
2288     return ur.s;
2289
2290  soft:
2291     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2292 }
2293
2294 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2295                                       int flags, float_status *status)
2296 {
2297     FloatParts64 pa, pb, pc, *pr;
2298
2299     bfloat16_unpack_canonical(&pa, a, status);
2300     bfloat16_unpack_canonical(&pb, b, status);
2301     bfloat16_unpack_canonical(&pc, c, status);
2302     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2303
2304     return bfloat16_round_pack_canonical(pr, status);
2305 }
2306
2307 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2308                                       int flags, float_status *status)
2309 {
2310     FloatParts128 pa, pb, pc, *pr;
2311
2312     float128_unpack_canonical(&pa, a, status);
2313     float128_unpack_canonical(&pb, b, status);
2314     float128_unpack_canonical(&pc, c, status);
2315     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2316
2317     return float128_round_pack_canonical(pr, status);
2318 }
2319
2320 /*
2321  * Division
2322  */
2323
2324 float16 float16_div(float16 a, float16 b, float_status *status)
2325 {
2326     FloatParts64 pa, pb, *pr;
2327
2328     float16_unpack_canonical(&pa, a, status);
2329     float16_unpack_canonical(&pb, b, status);
2330     pr = parts_div(&pa, &pb, status);
2331
2332     return float16_round_pack_canonical(pr, status);
2333 }
2334
2335 static float32 QEMU_SOFTFLOAT_ATTR
2336 soft_f32_div(float32 a, float32 b, float_status *status)
2337 {
2338     FloatParts64 pa, pb, *pr;
2339
2340     float32_unpack_canonical(&pa, a, status);
2341     float32_unpack_canonical(&pb, b, status);
2342     pr = parts_div(&pa, &pb, status);
2343
2344     return float32_round_pack_canonical(pr, status);
2345 }
2346
2347 static float64 QEMU_SOFTFLOAT_ATTR
2348 soft_f64_div(float64 a, float64 b, float_status *status)
2349 {
2350     FloatParts64 pa, pb, *pr;
2351
2352     float64_unpack_canonical(&pa, a, status);
2353     float64_unpack_canonical(&pb, b, status);
2354     pr = parts_div(&pa, &pb, status);
2355
2356     return float64_round_pack_canonical(pr, status);
2357 }
2358
2359 static float hard_f32_div(float a, float b)
2360 {
2361     return a / b;
2362 }
2363
2364 static double hard_f64_div(double a, double b)
2365 {
2366     return a / b;
2367 }
2368
2369 static bool f32_div_pre(union_float32 a, union_float32 b)
2370 {
2371     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2372         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2373                fpclassify(b.h) == FP_NORMAL;
2374     }
2375     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2376 }
2377
2378 static bool f64_div_pre(union_float64 a, union_float64 b)
2379 {
2380     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2381         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2382                fpclassify(b.h) == FP_NORMAL;
2383     }
2384     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2385 }
2386
2387 static bool f32_div_post(union_float32 a, union_float32 b)
2388 {
2389     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2390         return fpclassify(a.h) != FP_ZERO;
2391     }
2392     return !float32_is_zero(a.s);
2393 }
2394
2395 static bool f64_div_post(union_float64 a, union_float64 b)
2396 {
2397     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2398         return fpclassify(a.h) != FP_ZERO;
2399     }
2400     return !float64_is_zero(a.s);
2401 }
2402
2403 float32 QEMU_FLATTEN
2404 float32_div(float32 a, float32 b, float_status *s)
2405 {
2406     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2407                         f32_div_pre, f32_div_post);
2408 }
2409
2410 float64 QEMU_FLATTEN
2411 float64_div(float64 a, float64 b, float_status *s)
2412 {
2413     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2414                         f64_div_pre, f64_div_post);
2415 }
2416
2417 bfloat16 QEMU_FLATTEN
2418 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2419 {
2420     FloatParts64 pa, pb, *pr;
2421
2422     bfloat16_unpack_canonical(&pa, a, status);
2423     bfloat16_unpack_canonical(&pb, b, status);
2424     pr = parts_div(&pa, &pb, status);
2425
2426     return bfloat16_round_pack_canonical(pr, status);
2427 }
2428
2429 float128 QEMU_FLATTEN
2430 float128_div(float128 a, float128 b, float_status *status)
2431 {
2432     FloatParts128 pa, pb, *pr;
2433
2434     float128_unpack_canonical(&pa, a, status);
2435     float128_unpack_canonical(&pb, b, status);
2436     pr = parts_div(&pa, &pb, status);
2437
2438     return float128_round_pack_canonical(pr, status);
2439 }
2440
2441 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2442 {
2443     FloatParts128 pa, pb, *pr;
2444
2445     if (!floatx80_unpack_canonical(&pa, a, status) ||
2446         !floatx80_unpack_canonical(&pb, b, status)) {
2447         return floatx80_default_nan(status);
2448     }
2449
2450     pr = parts_div(&pa, &pb, status);
2451     return floatx80_round_pack_canonical(pr, status);
2452 }
2453
2454 /*
2455  * Remainder
2456  */
2457
2458 float32 float32_rem(float32 a, float32 b, float_status *status)
2459 {
2460     FloatParts64 pa, pb, *pr;
2461
2462     float32_unpack_canonical(&pa, a, status);
2463     float32_unpack_canonical(&pb, b, status);
2464     pr = parts_modrem(&pa, &pb, NULL, status);
2465
2466     return float32_round_pack_canonical(pr, status);
2467 }
2468
2469 float64 float64_rem(float64 a, float64 b, float_status *status)
2470 {
2471     FloatParts64 pa, pb, *pr;
2472
2473     float64_unpack_canonical(&pa, a, status);
2474     float64_unpack_canonical(&pb, b, status);
2475     pr = parts_modrem(&pa, &pb, NULL, status);
2476
2477     return float64_round_pack_canonical(pr, status);
2478 }
2479
2480 float128 float128_rem(float128 a, float128 b, float_status *status)
2481 {
2482     FloatParts128 pa, pb, *pr;
2483
2484     float128_unpack_canonical(&pa, a, status);
2485     float128_unpack_canonical(&pb, b, status);
2486     pr = parts_modrem(&pa, &pb, NULL, status);
2487
2488     return float128_round_pack_canonical(pr, status);
2489 }
2490
2491 /*
2492  * Returns the remainder of the extended double-precision floating-point value
2493  * `a' with respect to the corresponding value `b'.
2494  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2495  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2496  * the remainder based on truncating the quotient toward zero instead and
2497  * *quotient is set to the low 64 bits of the absolute value of the integer
2498  * quotient.
2499  */
2500 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2501                          uint64_t *quotient, float_status *status)
2502 {
2503     FloatParts128 pa, pb, *pr;
2504
2505     *quotient = 0;
2506     if (!floatx80_unpack_canonical(&pa, a, status) ||
2507         !floatx80_unpack_canonical(&pb, b, status)) {
2508         return floatx80_default_nan(status);
2509     }
2510     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2511
2512     return floatx80_round_pack_canonical(pr, status);
2513 }
2514
2515 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2516 {
2517     uint64_t quotient;
2518     return floatx80_modrem(a, b, false, &quotient, status);
2519 }
2520
2521 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2522 {
2523     uint64_t quotient;
2524     return floatx80_modrem(a, b, true, &quotient, status);
2525 }
2526
2527 /*
2528  * Float to Float conversions
2529  *
2530  * Returns the result of converting one float format to another. The
2531  * conversion is performed according to the IEC/IEEE Standard for
2532  * Binary Floating-Point Arithmetic.
2533  *
2534  * Usually this only needs to take care of raising invalid exceptions
2535  * and handling the conversion on NaNs.
2536  */
2537
2538 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2539 {
2540     switch (a->cls) {
2541     case float_class_qnan:
2542     case float_class_snan:
2543         /*
2544          * There is no NaN in the destination format.  Raise Invalid
2545          * and return a zero with the sign of the input NaN.
2546          */
2547         float_raise(float_flag_invalid, s);
2548         a->cls = float_class_zero;
2549         break;
2550
2551     case float_class_inf:
2552         /*
2553          * There is no Inf in the destination format.  Raise Invalid
2554          * and return the maximum normal with the correct sign.
2555          */
2556         float_raise(float_flag_invalid, s);
2557         a->cls = float_class_normal;
2558         a->exp = float16_params_ahp.exp_max;
2559         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2560                                   float16_params_ahp.frac_size + 1);
2561         break;
2562
2563     case float_class_normal:
2564     case float_class_zero:
2565         break;
2566
2567     default:
2568         g_assert_not_reached();
2569     }
2570 }
2571
2572 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2573 {
2574     if (is_nan(a->cls)) {
2575         parts_return_nan(a, s);
2576     }
2577 }
2578
2579 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2580 {
2581     if (is_nan(a->cls)) {
2582         parts_return_nan(a, s);
2583     }
2584 }
2585
2586 #define parts_float_to_float(P, S) \
2587     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2588
2589 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2590                                         float_status *s)
2591 {
2592     a->cls = b->cls;
2593     a->sign = b->sign;
2594     a->exp = b->exp;
2595
2596     if (a->cls == float_class_normal) {
2597         frac_truncjam(a, b);
2598     } else if (is_nan(a->cls)) {
2599         /* Discard the low bits of the NaN. */
2600         a->frac = b->frac_hi;
2601         parts_return_nan(a, s);
2602     }
2603 }
2604
2605 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2606                                        float_status *s)
2607 {
2608     a->cls = b->cls;
2609     a->sign = b->sign;
2610     a->exp = b->exp;
2611     frac_widen(a, b);
2612
2613     if (is_nan(a->cls)) {
2614         parts_return_nan(a, s);
2615     }
2616 }
2617
2618 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2619 {
2620     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2621     FloatParts64 p;
2622
2623     float16a_unpack_canonical(&p, a, s, fmt16);
2624     parts_float_to_float(&p, s);
2625     return float32_round_pack_canonical(&p, s);
2626 }
2627
2628 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2629 {
2630     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2631     FloatParts64 p;
2632
2633     float16a_unpack_canonical(&p, a, s, fmt16);
2634     parts_float_to_float(&p, s);
2635     return float64_round_pack_canonical(&p, s);
2636 }
2637
2638 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2639 {
2640     FloatParts64 p;
2641     const FloatFmt *fmt;
2642
2643     float32_unpack_canonical(&p, a, s);
2644     if (ieee) {
2645         parts_float_to_float(&p, s);
2646         fmt = &float16_params;
2647     } else {
2648         parts_float_to_ahp(&p, s);
2649         fmt = &float16_params_ahp;
2650     }
2651     return float16a_round_pack_canonical(&p, s, fmt);
2652 }
2653
2654 static float64 QEMU_SOFTFLOAT_ATTR
2655 soft_float32_to_float64(float32 a, float_status *s)
2656 {
2657     FloatParts64 p;
2658
2659     float32_unpack_canonical(&p, a, s);
2660     parts_float_to_float(&p, s);
2661     return float64_round_pack_canonical(&p, s);
2662 }
2663
2664 float64 float32_to_float64(float32 a, float_status *s)
2665 {
2666     if (likely(float32_is_normal(a))) {
2667         /* Widening conversion can never produce inexact results.  */
2668         union_float32 uf;
2669         union_float64 ud;
2670         uf.s = a;
2671         ud.h = uf.h;
2672         return ud.s;
2673     } else if (float32_is_zero(a)) {
2674         return float64_set_sign(float64_zero, float32_is_neg(a));
2675     } else {
2676         return soft_float32_to_float64(a, s);
2677     }
2678 }
2679
2680 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2681 {
2682     FloatParts64 p;
2683     const FloatFmt *fmt;
2684
2685     float64_unpack_canonical(&p, a, s);
2686     if (ieee) {
2687         parts_float_to_float(&p, s);
2688         fmt = &float16_params;
2689     } else {
2690         parts_float_to_ahp(&p, s);
2691         fmt = &float16_params_ahp;
2692     }
2693     return float16a_round_pack_canonical(&p, s, fmt);
2694 }
2695
2696 float32 float64_to_float32(float64 a, float_status *s)
2697 {
2698     FloatParts64 p;
2699
2700     float64_unpack_canonical(&p, a, s);
2701     parts_float_to_float(&p, s);
2702     return float32_round_pack_canonical(&p, s);
2703 }
2704
2705 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2706 {
2707     FloatParts64 p;
2708
2709     bfloat16_unpack_canonical(&p, a, s);
2710     parts_float_to_float(&p, s);
2711     return float32_round_pack_canonical(&p, s);
2712 }
2713
2714 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2715 {
2716     FloatParts64 p;
2717
2718     bfloat16_unpack_canonical(&p, a, s);
2719     parts_float_to_float(&p, s);
2720     return float64_round_pack_canonical(&p, s);
2721 }
2722
2723 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2724 {
2725     FloatParts64 p;
2726
2727     float32_unpack_canonical(&p, a, s);
2728     parts_float_to_float(&p, s);
2729     return bfloat16_round_pack_canonical(&p, s);
2730 }
2731
2732 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2733 {
2734     FloatParts64 p;
2735
2736     float64_unpack_canonical(&p, a, s);
2737     parts_float_to_float(&p, s);
2738     return bfloat16_round_pack_canonical(&p, s);
2739 }
2740
2741 float32 float128_to_float32(float128 a, float_status *s)
2742 {
2743     FloatParts64 p64;
2744     FloatParts128 p128;
2745
2746     float128_unpack_canonical(&p128, a, s);
2747     parts_float_to_float_narrow(&p64, &p128, s);
2748     return float32_round_pack_canonical(&p64, s);
2749 }
2750
2751 float64 float128_to_float64(float128 a, float_status *s)
2752 {
2753     FloatParts64 p64;
2754     FloatParts128 p128;
2755
2756     float128_unpack_canonical(&p128, a, s);
2757     parts_float_to_float_narrow(&p64, &p128, s);
2758     return float64_round_pack_canonical(&p64, s);
2759 }
2760
2761 float128 float32_to_float128(float32 a, float_status *s)
2762 {
2763     FloatParts64 p64;
2764     FloatParts128 p128;
2765
2766     float32_unpack_canonical(&p64, a, s);
2767     parts_float_to_float_widen(&p128, &p64, s);
2768     return float128_round_pack_canonical(&p128, s);
2769 }
2770
2771 float128 float64_to_float128(float64 a, float_status *s)
2772 {
2773     FloatParts64 p64;
2774     FloatParts128 p128;
2775
2776     float64_unpack_canonical(&p64, a, s);
2777     parts_float_to_float_widen(&p128, &p64, s);
2778     return float128_round_pack_canonical(&p128, s);
2779 }
2780
2781 float32 floatx80_to_float32(floatx80 a, float_status *s)
2782 {
2783     FloatParts64 p64;
2784     FloatParts128 p128;
2785
2786     if (floatx80_unpack_canonical(&p128, a, s)) {
2787         parts_float_to_float_narrow(&p64, &p128, s);
2788     } else {
2789         parts_default_nan(&p64, s);
2790     }
2791     return float32_round_pack_canonical(&p64, s);
2792 }
2793
2794 float64 floatx80_to_float64(floatx80 a, float_status *s)
2795 {
2796     FloatParts64 p64;
2797     FloatParts128 p128;
2798
2799     if (floatx80_unpack_canonical(&p128, a, s)) {
2800         parts_float_to_float_narrow(&p64, &p128, s);
2801     } else {
2802         parts_default_nan(&p64, s);
2803     }
2804     return float64_round_pack_canonical(&p64, s);
2805 }
2806
2807 float128 floatx80_to_float128(floatx80 a, float_status *s)
2808 {
2809     FloatParts128 p;
2810
2811     if (floatx80_unpack_canonical(&p, a, s)) {
2812         parts_float_to_float(&p, s);
2813     } else {
2814         parts_default_nan(&p, s);
2815     }
2816     return float128_round_pack_canonical(&p, s);
2817 }
2818
2819 floatx80 float32_to_floatx80(float32 a, float_status *s)
2820 {
2821     FloatParts64 p64;
2822     FloatParts128 p128;
2823
2824     float32_unpack_canonical(&p64, a, s);
2825     parts_float_to_float_widen(&p128, &p64, s);
2826     return floatx80_round_pack_canonical(&p128, s);
2827 }
2828
2829 floatx80 float64_to_floatx80(float64 a, float_status *s)
2830 {
2831     FloatParts64 p64;
2832     FloatParts128 p128;
2833
2834     float64_unpack_canonical(&p64, a, s);
2835     parts_float_to_float_widen(&p128, &p64, s);
2836     return floatx80_round_pack_canonical(&p128, s);
2837 }
2838
2839 floatx80 float128_to_floatx80(float128 a, float_status *s)
2840 {
2841     FloatParts128 p;
2842
2843     float128_unpack_canonical(&p, a, s);
2844     parts_float_to_float(&p, s);
2845     return floatx80_round_pack_canonical(&p, s);
2846 }
2847
2848 /*
2849  * Round to integral value
2850  */
2851
2852 float16 float16_round_to_int(float16 a, float_status *s)
2853 {
2854     FloatParts64 p;
2855
2856     float16_unpack_canonical(&p, a, s);
2857     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2858     return float16_round_pack_canonical(&p, s);
2859 }
2860
2861 float32 float32_round_to_int(float32 a, float_status *s)
2862 {
2863     FloatParts64 p;
2864
2865     float32_unpack_canonical(&p, a, s);
2866     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2867     return float32_round_pack_canonical(&p, s);
2868 }
2869
2870 float64 float64_round_to_int(float64 a, float_status *s)
2871 {
2872     FloatParts64 p;
2873
2874     float64_unpack_canonical(&p, a, s);
2875     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2876     return float64_round_pack_canonical(&p, s);
2877 }
2878
2879 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2880 {
2881     FloatParts64 p;
2882
2883     bfloat16_unpack_canonical(&p, a, s);
2884     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2885     return bfloat16_round_pack_canonical(&p, s);
2886 }
2887
2888 float128 float128_round_to_int(float128 a, float_status *s)
2889 {
2890     FloatParts128 p;
2891
2892     float128_unpack_canonical(&p, a, s);
2893     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2894     return float128_round_pack_canonical(&p, s);
2895 }
2896
2897 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2898 {
2899     FloatParts128 p;
2900
2901     if (!floatx80_unpack_canonical(&p, a, status)) {
2902         return floatx80_default_nan(status);
2903     }
2904
2905     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2906                        &floatx80_params[status->floatx80_rounding_precision]);
2907     return floatx80_round_pack_canonical(&p, status);
2908 }
2909
2910 /*
2911  * Floating-point to signed integer conversions
2912  */
2913
2914 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2915                               float_status *s)
2916 {
2917     FloatParts64 p;
2918
2919     float16_unpack_canonical(&p, a, s);
2920     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2921 }
2922
2923 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2924                                 float_status *s)
2925 {
2926     FloatParts64 p;
2927
2928     float16_unpack_canonical(&p, a, s);
2929     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2930 }
2931
2932 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2933                                 float_status *s)
2934 {
2935     FloatParts64 p;
2936
2937     float16_unpack_canonical(&p, a, s);
2938     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2939 }
2940
2941 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2942                                 float_status *s)
2943 {
2944     FloatParts64 p;
2945
2946     float16_unpack_canonical(&p, a, s);
2947     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2948 }
2949
2950 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2951                                 float_status *s)
2952 {
2953     FloatParts64 p;
2954
2955     float32_unpack_canonical(&p, a, s);
2956     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2957 }
2958
2959 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2960                                 float_status *s)
2961 {
2962     FloatParts64 p;
2963
2964     float32_unpack_canonical(&p, a, s);
2965     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2966 }
2967
2968 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2969                                 float_status *s)
2970 {
2971     FloatParts64 p;
2972
2973     float32_unpack_canonical(&p, a, s);
2974     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2975 }
2976
2977 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2978                                 float_status *s)
2979 {
2980     FloatParts64 p;
2981
2982     float64_unpack_canonical(&p, a, s);
2983     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2984 }
2985
2986 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2987                                 float_status *s)
2988 {
2989     FloatParts64 p;
2990
2991     float64_unpack_canonical(&p, a, s);
2992     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2993 }
2994
2995 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2996                                 float_status *s)
2997 {
2998     FloatParts64 p;
2999
3000     float64_unpack_canonical(&p, a, s);
3001     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3002 }
3003
3004 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3005                                  float_status *s)
3006 {
3007     FloatParts64 p;
3008
3009     bfloat16_unpack_canonical(&p, a, s);
3010     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3011 }
3012
3013 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3014                                  float_status *s)
3015 {
3016     FloatParts64 p;
3017
3018     bfloat16_unpack_canonical(&p, a, s);
3019     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3020 }
3021
3022 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3023                                  float_status *s)
3024 {
3025     FloatParts64 p;
3026
3027     bfloat16_unpack_canonical(&p, a, s);
3028     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3029 }
3030
3031 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3032                                         int scale, float_status *s)
3033 {
3034     FloatParts128 p;
3035
3036     float128_unpack_canonical(&p, a, s);
3037     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3038 }
3039
3040 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3041                                         int scale, float_status *s)
3042 {
3043     FloatParts128 p;
3044
3045     float128_unpack_canonical(&p, a, s);
3046     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3047 }
3048
3049 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3050                                         int scale, float_status *s)
3051 {
3052     FloatParts128 p;
3053
3054     if (!floatx80_unpack_canonical(&p, a, s)) {
3055         parts_default_nan(&p, s);
3056     }
3057     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3058 }
3059
3060 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3061                                         int scale, float_status *s)
3062 {
3063     FloatParts128 p;
3064
3065     if (!floatx80_unpack_canonical(&p, a, s)) {
3066         parts_default_nan(&p, s);
3067     }
3068     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3069 }
3070
3071 int8_t float16_to_int8(float16 a, float_status *s)
3072 {
3073     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3074 }
3075
3076 int16_t float16_to_int16(float16 a, float_status *s)
3077 {
3078     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3079 }
3080
3081 int32_t float16_to_int32(float16 a, float_status *s)
3082 {
3083     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3084 }
3085
3086 int64_t float16_to_int64(float16 a, float_status *s)
3087 {
3088     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3089 }
3090
3091 int16_t float32_to_int16(float32 a, float_status *s)
3092 {
3093     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3094 }
3095
3096 int32_t float32_to_int32(float32 a, float_status *s)
3097 {
3098     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3099 }
3100
3101 int64_t float32_to_int64(float32 a, float_status *s)
3102 {
3103     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3104 }
3105
3106 int16_t float64_to_int16(float64 a, float_status *s)
3107 {
3108     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3109 }
3110
3111 int32_t float64_to_int32(float64 a, float_status *s)
3112 {
3113     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3114 }
3115
3116 int64_t float64_to_int64(float64 a, float_status *s)
3117 {
3118     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3119 }
3120
3121 int32_t float128_to_int32(float128 a, float_status *s)
3122 {
3123     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3124 }
3125
3126 int64_t float128_to_int64(float128 a, float_status *s)
3127 {
3128     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3129 }
3130
3131 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3132 {
3133     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3134 }
3135
3136 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3137 {
3138     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3139 }
3140
3141 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3142 {
3143     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3144 }
3145
3146 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3147 {
3148     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3149 }
3150
3151 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3152 {
3153     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3154 }
3155
3156 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3157 {
3158     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3159 }
3160
3161 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3162 {
3163     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3164 }
3165
3166 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3167 {
3168     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3169 }
3170
3171 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3172 {
3173     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3174 }
3175
3176 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3177 {
3178     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3179 }
3180
3181 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3182 {
3183     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3184 }
3185
3186 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3187 {
3188     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3189 }
3190
3191 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3192 {
3193     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3194 }
3195
3196 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3197 {
3198     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3199 }
3200
3201 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3202 {
3203     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3204 }
3205
3206 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3207 {
3208     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3209 }
3210
3211 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3212 {
3213     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3214 }
3215
3216 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3217 {
3218     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3219 }
3220
3221 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3222 {
3223     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3224 }
3225
3226 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3227 {
3228     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3229 }
3230
3231 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3232 {
3233     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3234 }
3235
3236 /*
3237  * Floating-point to unsigned integer conversions
3238  */
3239
3240 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3241                                 float_status *s)
3242 {
3243     FloatParts64 p;
3244
3245     float16_unpack_canonical(&p, a, s);
3246     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3247 }
3248
3249 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3250                                   float_status *s)
3251 {
3252     FloatParts64 p;
3253
3254     float16_unpack_canonical(&p, a, s);
3255     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3256 }
3257
3258 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3259                                   float_status *s)
3260 {
3261     FloatParts64 p;
3262
3263     float16_unpack_canonical(&p, a, s);
3264     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3265 }
3266
3267 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3268                                   float_status *s)
3269 {
3270     FloatParts64 p;
3271
3272     float16_unpack_canonical(&p, a, s);
3273     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3274 }
3275
3276 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3277                                   float_status *s)
3278 {
3279     FloatParts64 p;
3280
3281     float32_unpack_canonical(&p, a, s);
3282     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3283 }
3284
3285 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3286                                   float_status *s)
3287 {
3288     FloatParts64 p;
3289
3290     float32_unpack_canonical(&p, a, s);
3291     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3292 }
3293
3294 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3295                                   float_status *s)
3296 {
3297     FloatParts64 p;
3298
3299     float32_unpack_canonical(&p, a, s);
3300     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3301 }
3302
3303 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3304                                   float_status *s)
3305 {
3306     FloatParts64 p;
3307
3308     float64_unpack_canonical(&p, a, s);
3309     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3310 }
3311
3312 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3313                                   float_status *s)
3314 {
3315     FloatParts64 p;
3316
3317     float64_unpack_canonical(&p, a, s);
3318     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3319 }
3320
3321 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3322                                   float_status *s)
3323 {
3324     FloatParts64 p;
3325
3326     float64_unpack_canonical(&p, a, s);
3327     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3328 }
3329
3330 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3331                                    int scale, float_status *s)
3332 {
3333     FloatParts64 p;
3334
3335     bfloat16_unpack_canonical(&p, a, s);
3336     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3337 }
3338
3339 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3340                                    int scale, float_status *s)
3341 {
3342     FloatParts64 p;
3343
3344     bfloat16_unpack_canonical(&p, a, s);
3345     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3346 }
3347
3348 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3349                                    int scale, float_status *s)
3350 {
3351     FloatParts64 p;
3352
3353     bfloat16_unpack_canonical(&p, a, s);
3354     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3355 }
3356
3357 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3358                                           int scale, float_status *s)
3359 {
3360     FloatParts128 p;
3361
3362     float128_unpack_canonical(&p, a, s);
3363     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3364 }
3365
3366 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3367                                           int scale, float_status *s)
3368 {
3369     FloatParts128 p;
3370
3371     float128_unpack_canonical(&p, a, s);
3372     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3373 }
3374
3375 uint8_t float16_to_uint8(float16 a, float_status *s)
3376 {
3377     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3378 }
3379
3380 uint16_t float16_to_uint16(float16 a, float_status *s)
3381 {
3382     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3383 }
3384
3385 uint32_t float16_to_uint32(float16 a, float_status *s)
3386 {
3387     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3388 }
3389
3390 uint64_t float16_to_uint64(float16 a, float_status *s)
3391 {
3392     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3393 }
3394
3395 uint16_t float32_to_uint16(float32 a, float_status *s)
3396 {
3397     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3398 }
3399
3400 uint32_t float32_to_uint32(float32 a, float_status *s)
3401 {
3402     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3403 }
3404
3405 uint64_t float32_to_uint64(float32 a, float_status *s)
3406 {
3407     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3408 }
3409
3410 uint16_t float64_to_uint16(float64 a, float_status *s)
3411 {
3412     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3413 }
3414
3415 uint32_t float64_to_uint32(float64 a, float_status *s)
3416 {
3417     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3418 }
3419
3420 uint64_t float64_to_uint64(float64 a, float_status *s)
3421 {
3422     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3423 }
3424
3425 uint32_t float128_to_uint32(float128 a, float_status *s)
3426 {
3427     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3428 }
3429
3430 uint64_t float128_to_uint64(float128 a, float_status *s)
3431 {
3432     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3433 }
3434
3435 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3436 {
3437     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3438 }
3439
3440 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3441 {
3442     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3443 }
3444
3445 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3446 {
3447     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3448 }
3449
3450 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3451 {
3452     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3453 }
3454
3455 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3456 {
3457     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3458 }
3459
3460 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3461 {
3462     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3463 }
3464
3465 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3466 {
3467     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3468 }
3469
3470 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3471 {
3472     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3473 }
3474
3475 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3476 {
3477     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3478 }
3479
3480 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3481 {
3482     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3483 }
3484
3485 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3486 {
3487     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3488 }
3489
3490 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3491 {
3492     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3493 }
3494
3495 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3496 {
3497     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3498 }
3499
3500 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3501 {
3502     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3503 }
3504
3505 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3506 {
3507     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3508 }
3509
3510 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3511 {
3512     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3513 }
3514
3515 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3516 {
3517     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3518 }
3519
3520 /*
3521  * Signed integer to floating-point conversions
3522  */
3523
3524 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3525 {
3526     FloatParts64 p;
3527
3528     parts_sint_to_float(&p, a, scale, status);
3529     return float16_round_pack_canonical(&p, status);
3530 }
3531
3532 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3533 {
3534     return int64_to_float16_scalbn(a, scale, status);
3535 }
3536
3537 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3538 {
3539     return int64_to_float16_scalbn(a, scale, status);
3540 }
3541
3542 float16 int64_to_float16(int64_t a, float_status *status)
3543 {
3544     return int64_to_float16_scalbn(a, 0, status);
3545 }
3546
3547 float16 int32_to_float16(int32_t a, float_status *status)
3548 {
3549     return int64_to_float16_scalbn(a, 0, status);
3550 }
3551
3552 float16 int16_to_float16(int16_t a, float_status *status)
3553 {
3554     return int64_to_float16_scalbn(a, 0, status);
3555 }
3556
3557 float16 int8_to_float16(int8_t a, float_status *status)
3558 {
3559     return int64_to_float16_scalbn(a, 0, status);
3560 }
3561
3562 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3563 {
3564     FloatParts64 p;
3565
3566     /* Without scaling, there are no overflow concerns. */
3567     if (likely(scale == 0) && can_use_fpu(status)) {
3568         union_float32 ur;
3569         ur.h = a;
3570         return ur.s;
3571     }
3572
3573     parts64_sint_to_float(&p, a, scale, status);
3574     return float32_round_pack_canonical(&p, status);
3575 }
3576
3577 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3578 {
3579     return int64_to_float32_scalbn(a, scale, status);
3580 }
3581
3582 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3583 {
3584     return int64_to_float32_scalbn(a, scale, status);
3585 }
3586
3587 float32 int64_to_float32(int64_t a, float_status *status)
3588 {
3589     return int64_to_float32_scalbn(a, 0, status);
3590 }
3591
3592 float32 int32_to_float32(int32_t a, float_status *status)
3593 {
3594     return int64_to_float32_scalbn(a, 0, status);
3595 }
3596
3597 float32 int16_to_float32(int16_t a, float_status *status)
3598 {
3599     return int64_to_float32_scalbn(a, 0, status);
3600 }
3601
3602 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3603 {
3604     FloatParts64 p;
3605
3606     /* Without scaling, there are no overflow concerns. */
3607     if (likely(scale == 0) && can_use_fpu(status)) {
3608         union_float64 ur;
3609         ur.h = a;
3610         return ur.s;
3611     }
3612
3613     parts_sint_to_float(&p, a, scale, status);
3614     return float64_round_pack_canonical(&p, status);
3615 }
3616
3617 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3618 {
3619     return int64_to_float64_scalbn(a, scale, status);
3620 }
3621
3622 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3623 {
3624     return int64_to_float64_scalbn(a, scale, status);
3625 }
3626
3627 float64 int64_to_float64(int64_t a, float_status *status)
3628 {
3629     return int64_to_float64_scalbn(a, 0, status);
3630 }
3631
3632 float64 int32_to_float64(int32_t a, float_status *status)
3633 {
3634     return int64_to_float64_scalbn(a, 0, status);
3635 }
3636
3637 float64 int16_to_float64(int16_t a, float_status *status)
3638 {
3639     return int64_to_float64_scalbn(a, 0, status);
3640 }
3641
3642 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3643 {
3644     FloatParts64 p;
3645
3646     parts_sint_to_float(&p, a, scale, status);
3647     return bfloat16_round_pack_canonical(&p, status);
3648 }
3649
3650 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3651 {
3652     return int64_to_bfloat16_scalbn(a, scale, status);
3653 }
3654
3655 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3656 {
3657     return int64_to_bfloat16_scalbn(a, scale, status);
3658 }
3659
3660 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3661 {
3662     return int64_to_bfloat16_scalbn(a, 0, status);
3663 }
3664
3665 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3666 {
3667     return int64_to_bfloat16_scalbn(a, 0, status);
3668 }
3669
3670 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3671 {
3672     return int64_to_bfloat16_scalbn(a, 0, status);
3673 }
3674
3675 float128 int64_to_float128(int64_t a, float_status *status)
3676 {
3677     FloatParts128 p;
3678
3679     parts_sint_to_float(&p, a, 0, status);
3680     return float128_round_pack_canonical(&p, status);
3681 }
3682
3683 float128 int32_to_float128(int32_t a, float_status *status)
3684 {
3685     return int64_to_float128(a, status);
3686 }
3687
3688 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3689 {
3690     FloatParts128 p;
3691
3692     parts_sint_to_float(&p, a, 0, status);
3693     return floatx80_round_pack_canonical(&p, status);
3694 }
3695
3696 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3697 {
3698     return int64_to_floatx80(a, status);
3699 }
3700
3701 /*
3702  * Unsigned Integer to floating-point conversions
3703  */
3704
3705 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3706 {
3707     FloatParts64 p;
3708
3709     parts_uint_to_float(&p, a, scale, status);
3710     return float16_round_pack_canonical(&p, status);
3711 }
3712
3713 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3714 {
3715     return uint64_to_float16_scalbn(a, scale, status);
3716 }
3717
3718 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3719 {
3720     return uint64_to_float16_scalbn(a, scale, status);
3721 }
3722
3723 float16 uint64_to_float16(uint64_t a, float_status *status)
3724 {
3725     return uint64_to_float16_scalbn(a, 0, status);
3726 }
3727
3728 float16 uint32_to_float16(uint32_t a, float_status *status)
3729 {
3730     return uint64_to_float16_scalbn(a, 0, status);
3731 }
3732
3733 float16 uint16_to_float16(uint16_t a, float_status *status)
3734 {
3735     return uint64_to_float16_scalbn(a, 0, status);
3736 }
3737
3738 float16 uint8_to_float16(uint8_t a, float_status *status)
3739 {
3740     return uint64_to_float16_scalbn(a, 0, status);
3741 }
3742
3743 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3744 {
3745     FloatParts64 p;
3746
3747     /* Without scaling, there are no overflow concerns. */
3748     if (likely(scale == 0) && can_use_fpu(status)) {
3749         union_float32 ur;
3750         ur.h = a;
3751         return ur.s;
3752     }
3753
3754     parts_uint_to_float(&p, a, scale, status);
3755     return float32_round_pack_canonical(&p, status);
3756 }
3757
3758 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3759 {
3760     return uint64_to_float32_scalbn(a, scale, status);
3761 }
3762
3763 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3764 {
3765     return uint64_to_float32_scalbn(a, scale, status);
3766 }
3767
3768 float32 uint64_to_float32(uint64_t a, float_status *status)
3769 {
3770     return uint64_to_float32_scalbn(a, 0, status);
3771 }
3772
3773 float32 uint32_to_float32(uint32_t a, float_status *status)
3774 {
3775     return uint64_to_float32_scalbn(a, 0, status);
3776 }
3777
3778 float32 uint16_to_float32(uint16_t a, float_status *status)
3779 {
3780     return uint64_to_float32_scalbn(a, 0, status);
3781 }
3782
3783 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3784 {
3785     FloatParts64 p;
3786
3787     /* Without scaling, there are no overflow concerns. */
3788     if (likely(scale == 0) && can_use_fpu(status)) {
3789         union_float64 ur;
3790         ur.h = a;
3791         return ur.s;
3792     }
3793
3794     parts_uint_to_float(&p, a, scale, status);
3795     return float64_round_pack_canonical(&p, status);
3796 }
3797
3798 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3799 {
3800     return uint64_to_float64_scalbn(a, scale, status);
3801 }
3802
3803 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3804 {
3805     return uint64_to_float64_scalbn(a, scale, status);
3806 }
3807
3808 float64 uint64_to_float64(uint64_t a, float_status *status)
3809 {
3810     return uint64_to_float64_scalbn(a, 0, status);
3811 }
3812
3813 float64 uint32_to_float64(uint32_t a, float_status *status)
3814 {
3815     return uint64_to_float64_scalbn(a, 0, status);
3816 }
3817
3818 float64 uint16_to_float64(uint16_t a, float_status *status)
3819 {
3820     return uint64_to_float64_scalbn(a, 0, status);
3821 }
3822
3823 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3824 {
3825     FloatParts64 p;
3826
3827     parts_uint_to_float(&p, a, scale, status);
3828     return bfloat16_round_pack_canonical(&p, status);
3829 }
3830
3831 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3832 {
3833     return uint64_to_bfloat16_scalbn(a, scale, status);
3834 }
3835
3836 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3837 {
3838     return uint64_to_bfloat16_scalbn(a, scale, status);
3839 }
3840
3841 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3842 {
3843     return uint64_to_bfloat16_scalbn(a, 0, status);
3844 }
3845
3846 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3847 {
3848     return uint64_to_bfloat16_scalbn(a, 0, status);
3849 }
3850
3851 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3852 {
3853     return uint64_to_bfloat16_scalbn(a, 0, status);
3854 }
3855
3856 float128 uint64_to_float128(uint64_t a, float_status *status)
3857 {
3858     FloatParts128 p;
3859
3860     parts_uint_to_float(&p, a, 0, status);
3861     return float128_round_pack_canonical(&p, status);
3862 }
3863
3864 /*
3865  * Minimum and maximum
3866  */
3867
3868 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3869 {
3870     FloatParts64 pa, pb, *pr;
3871
3872     float16_unpack_canonical(&pa, a, s);
3873     float16_unpack_canonical(&pb, b, s);
3874     pr = parts_minmax(&pa, &pb, s, flags);
3875
3876     return float16_round_pack_canonical(pr, s);
3877 }
3878
3879 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3880                                 float_status *s, int flags)
3881 {
3882     FloatParts64 pa, pb, *pr;
3883
3884     bfloat16_unpack_canonical(&pa, a, s);
3885     bfloat16_unpack_canonical(&pb, b, s);
3886     pr = parts_minmax(&pa, &pb, s, flags);
3887
3888     return bfloat16_round_pack_canonical(pr, s);
3889 }
3890
3891 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3892 {
3893     FloatParts64 pa, pb, *pr;
3894
3895     float32_unpack_canonical(&pa, a, s);
3896     float32_unpack_canonical(&pb, b, s);
3897     pr = parts_minmax(&pa, &pb, s, flags);
3898
3899     return float32_round_pack_canonical(pr, s);
3900 }
3901
3902 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3903 {
3904     FloatParts64 pa, pb, *pr;
3905
3906     float64_unpack_canonical(&pa, a, s);
3907     float64_unpack_canonical(&pb, b, s);
3908     pr = parts_minmax(&pa, &pb, s, flags);
3909
3910     return float64_round_pack_canonical(pr, s);
3911 }
3912
3913 static float128 float128_minmax(float128 a, float128 b,
3914                                 float_status *s, int flags)
3915 {
3916     FloatParts128 pa, pb, *pr;
3917
3918     float128_unpack_canonical(&pa, a, s);
3919     float128_unpack_canonical(&pb, b, s);
3920     pr = parts_minmax(&pa, &pb, s, flags);
3921
3922     return float128_round_pack_canonical(pr, s);
3923 }
3924
3925 #define MINMAX_1(type, name, flags) \
3926     type type##_##name(type a, type b, float_status *s) \
3927     { return type##_minmax(a, b, s, flags); }
3928
3929 #define MINMAX_2(type) \
3930     MINMAX_1(type, max, 0)                                      \
3931     MINMAX_1(type, maxnum, minmax_isnum)                        \
3932     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3933     MINMAX_1(type, min, minmax_ismin)                           \
3934     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3935     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3936
3937 MINMAX_2(float16)
3938 MINMAX_2(bfloat16)
3939 MINMAX_2(float32)
3940 MINMAX_2(float64)
3941 MINMAX_2(float128)
3942
3943 #undef MINMAX_1
3944 #undef MINMAX_2
3945
3946 /*
3947  * Floating point compare
3948  */
3949
3950 static FloatRelation QEMU_FLATTEN
3951 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3952 {
3953     FloatParts64 pa, pb;
3954
3955     float16_unpack_canonical(&pa, a, s);
3956     float16_unpack_canonical(&pb, b, s);
3957     return parts_compare(&pa, &pb, s, is_quiet);
3958 }
3959
3960 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3961 {
3962     return float16_do_compare(a, b, s, false);
3963 }
3964
3965 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3966 {
3967     return float16_do_compare(a, b, s, true);
3968 }
3969
3970 static FloatRelation QEMU_SOFTFLOAT_ATTR
3971 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3972 {
3973     FloatParts64 pa, pb;
3974
3975     float32_unpack_canonical(&pa, a, s);
3976     float32_unpack_canonical(&pb, b, s);
3977     return parts_compare(&pa, &pb, s, is_quiet);
3978 }
3979
3980 static FloatRelation QEMU_FLATTEN
3981 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3982 {
3983     union_float32 ua, ub;
3984
3985     ua.s = xa;
3986     ub.s = xb;
3987
3988     if (QEMU_NO_HARDFLOAT) {
3989         goto soft;
3990     }
3991
3992     float32_input_flush2(&ua.s, &ub.s, s);
3993     if (isgreaterequal(ua.h, ub.h)) {
3994         if (isgreater(ua.h, ub.h)) {
3995             return float_relation_greater;
3996         }
3997         return float_relation_equal;
3998     }
3999     if (likely(isless(ua.h, ub.h))) {
4000         return float_relation_less;
4001     }
4002     /*
4003      * The only condition remaining is unordered.
4004      * Fall through to set flags.
4005      */
4006  soft:
4007     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4008 }
4009
4010 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4011 {
4012     return float32_hs_compare(a, b, s, false);
4013 }
4014
4015 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4016 {
4017     return float32_hs_compare(a, b, s, true);
4018 }
4019
4020 static FloatRelation QEMU_SOFTFLOAT_ATTR
4021 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4022 {
4023     FloatParts64 pa, pb;
4024
4025     float64_unpack_canonical(&pa, a, s);
4026     float64_unpack_canonical(&pb, b, s);
4027     return parts_compare(&pa, &pb, s, is_quiet);
4028 }
4029
4030 static FloatRelation QEMU_FLATTEN
4031 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4032 {
4033     union_float64 ua, ub;
4034
4035     ua.s = xa;
4036     ub.s = xb;
4037
4038     if (QEMU_NO_HARDFLOAT) {
4039         goto soft;
4040     }
4041
4042     float64_input_flush2(&ua.s, &ub.s, s);
4043     if (isgreaterequal(ua.h, ub.h)) {
4044         if (isgreater(ua.h, ub.h)) {
4045             return float_relation_greater;
4046         }
4047         return float_relation_equal;
4048     }
4049     if (likely(isless(ua.h, ub.h))) {
4050         return float_relation_less;
4051     }
4052     /*
4053      * The only condition remaining is unordered.
4054      * Fall through to set flags.
4055      */
4056  soft:
4057     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4058 }
4059
4060 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4061 {
4062     return float64_hs_compare(a, b, s, false);
4063 }
4064
4065 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4066 {
4067     return float64_hs_compare(a, b, s, true);
4068 }
4069
4070 static FloatRelation QEMU_FLATTEN
4071 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4072 {
4073     FloatParts64 pa, pb;
4074
4075     bfloat16_unpack_canonical(&pa, a, s);
4076     bfloat16_unpack_canonical(&pb, b, s);
4077     return parts_compare(&pa, &pb, s, is_quiet);
4078 }
4079
4080 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4081 {
4082     return bfloat16_do_compare(a, b, s, false);
4083 }
4084
4085 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4086 {
4087     return bfloat16_do_compare(a, b, s, true);
4088 }
4089
4090 static FloatRelation QEMU_FLATTEN
4091 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4092 {
4093     FloatParts128 pa, pb;
4094
4095     float128_unpack_canonical(&pa, a, s);
4096     float128_unpack_canonical(&pb, b, s);
4097     return parts_compare(&pa, &pb, s, is_quiet);
4098 }
4099
4100 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4101 {
4102     return float128_do_compare(a, b, s, false);
4103 }
4104
4105 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4106 {
4107     return float128_do_compare(a, b, s, true);
4108 }
4109
4110 static FloatRelation QEMU_FLATTEN
4111 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4112 {
4113     FloatParts128 pa, pb;
4114
4115     if (!floatx80_unpack_canonical(&pa, a, s) ||
4116         !floatx80_unpack_canonical(&pb, b, s)) {
4117         return float_relation_unordered;
4118     }
4119     return parts_compare(&pa, &pb, s, is_quiet);
4120 }
4121
4122 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4123 {
4124     return floatx80_do_compare(a, b, s, false);
4125 }
4126
4127 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4128 {
4129     return floatx80_do_compare(a, b, s, true);
4130 }
4131
4132 /*
4133  * Scale by 2**N
4134  */
4135
4136 float16 float16_scalbn(float16 a, int n, float_status *status)
4137 {
4138     FloatParts64 p;
4139
4140     float16_unpack_canonical(&p, a, status);
4141     parts_scalbn(&p, n, status);
4142     return float16_round_pack_canonical(&p, status);
4143 }
4144
4145 float32 float32_scalbn(float32 a, int n, float_status *status)
4146 {
4147     FloatParts64 p;
4148
4149     float32_unpack_canonical(&p, a, status);
4150     parts_scalbn(&p, n, status);
4151     return float32_round_pack_canonical(&p, status);
4152 }
4153
4154 float64 float64_scalbn(float64 a, int n, float_status *status)
4155 {
4156     FloatParts64 p;
4157
4158     float64_unpack_canonical(&p, a, status);
4159     parts_scalbn(&p, n, status);
4160     return float64_round_pack_canonical(&p, status);
4161 }
4162
4163 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4164 {
4165     FloatParts64 p;
4166
4167     bfloat16_unpack_canonical(&p, a, status);
4168     parts_scalbn(&p, n, status);
4169     return bfloat16_round_pack_canonical(&p, status);
4170 }
4171
4172 float128 float128_scalbn(float128 a, int n, float_status *status)
4173 {
4174     FloatParts128 p;
4175
4176     float128_unpack_canonical(&p, a, status);
4177     parts_scalbn(&p, n, status);
4178     return float128_round_pack_canonical(&p, status);
4179 }
4180
4181 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4182 {
4183     FloatParts128 p;
4184
4185     if (!floatx80_unpack_canonical(&p, a, status)) {
4186         return floatx80_default_nan(status);
4187     }
4188     parts_scalbn(&p, n, status);
4189     return floatx80_round_pack_canonical(&p, status);
4190 }
4191
4192 /*
4193  * Square Root
4194  */
4195
4196 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4197 {
4198     FloatParts64 p;
4199
4200     float16_unpack_canonical(&p, a, status);
4201     parts_sqrt(&p, status, &float16_params);
4202     return float16_round_pack_canonical(&p, status);
4203 }
4204
4205 static float32 QEMU_SOFTFLOAT_ATTR
4206 soft_f32_sqrt(float32 a, float_status *status)
4207 {
4208     FloatParts64 p;
4209
4210     float32_unpack_canonical(&p, a, status);
4211     parts_sqrt(&p, status, &float32_params);
4212     return float32_round_pack_canonical(&p, status);
4213 }
4214
4215 static float64 QEMU_SOFTFLOAT_ATTR
4216 soft_f64_sqrt(float64 a, float_status *status)
4217 {
4218     FloatParts64 p;
4219
4220     float64_unpack_canonical(&p, a, status);
4221     parts_sqrt(&p, status, &float64_params);
4222     return float64_round_pack_canonical(&p, status);
4223 }
4224
4225 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4226 {
4227     union_float32 ua, ur;
4228
4229     ua.s = xa;
4230     if (unlikely(!can_use_fpu(s))) {
4231         goto soft;
4232     }
4233
4234     float32_input_flush1(&ua.s, s);
4235     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4236         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4237                        fpclassify(ua.h) == FP_ZERO) ||
4238                      signbit(ua.h))) {
4239             goto soft;
4240         }
4241     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4242                         float32_is_neg(ua.s))) {
4243         goto soft;
4244     }
4245     ur.h = sqrtf(ua.h);
4246     return ur.s;
4247
4248  soft:
4249     return soft_f32_sqrt(ua.s, s);
4250 }
4251
4252 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4253 {
4254     union_float64 ua, ur;
4255
4256     ua.s = xa;
4257     if (unlikely(!can_use_fpu(s))) {
4258         goto soft;
4259     }
4260
4261     float64_input_flush1(&ua.s, s);
4262     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4263         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4264                        fpclassify(ua.h) == FP_ZERO) ||
4265                      signbit(ua.h))) {
4266             goto soft;
4267         }
4268     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4269                         float64_is_neg(ua.s))) {
4270         goto soft;
4271     }
4272     ur.h = sqrt(ua.h);
4273     return ur.s;
4274
4275  soft:
4276     return soft_f64_sqrt(ua.s, s);
4277 }
4278
4279 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4280 {
4281     FloatParts64 p;
4282
4283     bfloat16_unpack_canonical(&p, a, status);
4284     parts_sqrt(&p, status, &bfloat16_params);
4285     return bfloat16_round_pack_canonical(&p, status);
4286 }
4287
4288 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4289 {
4290     FloatParts128 p;
4291
4292     float128_unpack_canonical(&p, a, status);
4293     parts_sqrt(&p, status, &float128_params);
4294     return float128_round_pack_canonical(&p, status);
4295 }
4296
4297 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4298 {
4299     FloatParts128 p;
4300
4301     if (!floatx80_unpack_canonical(&p, a, s)) {
4302         return floatx80_default_nan(s);
4303     }
4304     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4305     return floatx80_round_pack_canonical(&p, s);
4306 }
4307
4308 /*
4309  * log2
4310  */
4311 float32 float32_log2(float32 a, float_status *status)
4312 {
4313     FloatParts64 p;
4314
4315     float32_unpack_canonical(&p, a, status);
4316     parts_log2(&p, status, &float32_params);
4317     return float32_round_pack_canonical(&p, status);
4318 }
4319
4320 float64 float64_log2(float64 a, float_status *status)
4321 {
4322     FloatParts64 p;
4323
4324     float64_unpack_canonical(&p, a, status);
4325     parts_log2(&p, status, &float64_params);
4326     return float64_round_pack_canonical(&p, status);
4327 }
4328
4329 /*----------------------------------------------------------------------------
4330 | The pattern for a default generated NaN.
4331 *----------------------------------------------------------------------------*/
4332
4333 float16 float16_default_nan(float_status *status)
4334 {
4335     FloatParts64 p;
4336
4337     parts_default_nan(&p, status);
4338     p.frac >>= float16_params.frac_shift;
4339     return float16_pack_raw(&p);
4340 }
4341
4342 float32 float32_default_nan(float_status *status)
4343 {
4344     FloatParts64 p;
4345
4346     parts_default_nan(&p, status);
4347     p.frac >>= float32_params.frac_shift;
4348     return float32_pack_raw(&p);
4349 }
4350
4351 float64 float64_default_nan(float_status *status)
4352 {
4353     FloatParts64 p;
4354
4355     parts_default_nan(&p, status);
4356     p.frac >>= float64_params.frac_shift;
4357     return float64_pack_raw(&p);
4358 }
4359
4360 float128 float128_default_nan(float_status *status)
4361 {
4362     FloatParts128 p;
4363
4364     parts_default_nan(&p, status);
4365     frac_shr(&p, float128_params.frac_shift);
4366     return float128_pack_raw(&p);
4367 }
4368
4369 bfloat16 bfloat16_default_nan(float_status *status)
4370 {
4371     FloatParts64 p;
4372
4373     parts_default_nan(&p, status);
4374     p.frac >>= bfloat16_params.frac_shift;
4375     return bfloat16_pack_raw(&p);
4376 }
4377
4378 /*----------------------------------------------------------------------------
4379 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4380 *----------------------------------------------------------------------------*/
4381
4382 float16 float16_silence_nan(float16 a, float_status *status)
4383 {
4384     FloatParts64 p;
4385
4386     float16_unpack_raw(&p, a);
4387     p.frac <<= float16_params.frac_shift;
4388     parts_silence_nan(&p, status);
4389     p.frac >>= float16_params.frac_shift;
4390     return float16_pack_raw(&p);
4391 }
4392
4393 float32 float32_silence_nan(float32 a, float_status *status)
4394 {
4395     FloatParts64 p;
4396
4397     float32_unpack_raw(&p, a);
4398     p.frac <<= float32_params.frac_shift;
4399     parts_silence_nan(&p, status);
4400     p.frac >>= float32_params.frac_shift;
4401     return float32_pack_raw(&p);
4402 }
4403
4404 float64 float64_silence_nan(float64 a, float_status *status)
4405 {
4406     FloatParts64 p;
4407
4408     float64_unpack_raw(&p, a);
4409     p.frac <<= float64_params.frac_shift;
4410     parts_silence_nan(&p, status);
4411     p.frac >>= float64_params.frac_shift;
4412     return float64_pack_raw(&p);
4413 }
4414
4415 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4416 {
4417     FloatParts64 p;
4418
4419     bfloat16_unpack_raw(&p, a);
4420     p.frac <<= bfloat16_params.frac_shift;
4421     parts_silence_nan(&p, status);
4422     p.frac >>= bfloat16_params.frac_shift;
4423     return bfloat16_pack_raw(&p);
4424 }
4425
4426 float128 float128_silence_nan(float128 a, float_status *status)
4427 {
4428     FloatParts128 p;
4429
4430     float128_unpack_raw(&p, a);
4431     frac_shl(&p, float128_params.frac_shift);
4432     parts_silence_nan(&p, status);
4433     frac_shr(&p, float128_params.frac_shift);
4434     return float128_pack_raw(&p);
4435 }
4436
4437 /*----------------------------------------------------------------------------
4438 | If `a' is denormal and we are in flush-to-zero mode then set the
4439 | input-denormal exception and return zero. Otherwise just return the value.
4440 *----------------------------------------------------------------------------*/
4441
4442 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4443 {
4444     if (p.exp == 0 && p.frac != 0) {
4445         float_raise(float_flag_input_denormal, status);
4446         return true;
4447     }
4448
4449     return false;
4450 }
4451
4452 float16 float16_squash_input_denormal(float16 a, float_status *status)
4453 {
4454     if (status->flush_inputs_to_zero) {
4455         FloatParts64 p;
4456
4457         float16_unpack_raw(&p, a);
4458         if (parts_squash_denormal(p, status)) {
4459             return float16_set_sign(float16_zero, p.sign);
4460         }
4461     }
4462     return a;
4463 }
4464
4465 float32 float32_squash_input_denormal(float32 a, float_status *status)
4466 {
4467     if (status->flush_inputs_to_zero) {
4468         FloatParts64 p;
4469
4470         float32_unpack_raw(&p, a);
4471         if (parts_squash_denormal(p, status)) {
4472             return float32_set_sign(float32_zero, p.sign);
4473         }
4474     }
4475     return a;
4476 }
4477
4478 float64 float64_squash_input_denormal(float64 a, float_status *status)
4479 {
4480     if (status->flush_inputs_to_zero) {
4481         FloatParts64 p;
4482
4483         float64_unpack_raw(&p, a);
4484         if (parts_squash_denormal(p, status)) {
4485             return float64_set_sign(float64_zero, p.sign);
4486         }
4487     }
4488     return a;
4489 }
4490
4491 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4492 {
4493     if (status->flush_inputs_to_zero) {
4494         FloatParts64 p;
4495
4496         bfloat16_unpack_raw(&p, a);
4497         if (parts_squash_denormal(p, status)) {
4498             return bfloat16_set_sign(bfloat16_zero, p.sign);
4499         }
4500     }
4501     return a;
4502 }
4503
4504 /*----------------------------------------------------------------------------
4505 | Normalizes the subnormal extended double-precision floating-point value
4506 | represented by the denormalized significand `aSig'.  The normalized exponent
4507 | and significand are stored at the locations pointed to by `zExpPtr' and
4508 | `zSigPtr', respectively.
4509 *----------------------------------------------------------------------------*/
4510
4511 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4512                                 uint64_t *zSigPtr)
4513 {
4514     int8_t shiftCount;
4515
4516     shiftCount = clz64(aSig);
4517     *zSigPtr = aSig<<shiftCount;
4518     *zExpPtr = 1 - shiftCount;
4519 }
4520
4521 /*----------------------------------------------------------------------------
4522 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4523 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4524 | and returns the proper extended double-precision floating-point value
4525 | corresponding to the abstract input.  Ordinarily, the abstract value is
4526 | rounded and packed into the extended double-precision format, with the
4527 | inexact exception raised if the abstract input cannot be represented
4528 | exactly.  However, if the abstract value is too large, the overflow and
4529 | inexact exceptions are raised and an infinity or maximal finite value is
4530 | returned.  If the abstract value is too small, the input value is rounded to
4531 | a subnormal number, and the underflow and inexact exceptions are raised if
4532 | the abstract input cannot be represented exactly as a subnormal extended
4533 | double-precision floating-point number.
4534 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4535 | the result is rounded to the same number of bits as single or double
4536 | precision, respectively.  Otherwise, the result is rounded to the full
4537 | precision of the extended double-precision format.
4538 |     The input significand must be normalized or smaller.  If the input
4539 | significand is not normalized, `zExp' must be 0; in that case, the result
4540 | returned is a subnormal number, and it must not require rounding.  The
4541 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4542 | Floating-Point Arithmetic.
4543 *----------------------------------------------------------------------------*/
4544
4545 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4546                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4547                               float_status *status)
4548 {
4549     FloatRoundMode roundingMode;
4550     bool roundNearestEven, increment, isTiny;
4551     int64_t roundIncrement, roundMask, roundBits;
4552
4553     roundingMode = status->float_rounding_mode;
4554     roundNearestEven = ( roundingMode == float_round_nearest_even );
4555     switch (roundingPrecision) {
4556     case floatx80_precision_x:
4557         goto precision80;
4558     case floatx80_precision_d:
4559         roundIncrement = UINT64_C(0x0000000000000400);
4560         roundMask = UINT64_C(0x00000000000007FF);
4561         break;
4562     case floatx80_precision_s:
4563         roundIncrement = UINT64_C(0x0000008000000000);
4564         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4565         break;
4566     default:
4567         g_assert_not_reached();
4568     }
4569     zSig0 |= ( zSig1 != 0 );
4570     switch (roundingMode) {
4571     case float_round_nearest_even:
4572     case float_round_ties_away:
4573         break;
4574     case float_round_to_zero:
4575         roundIncrement = 0;
4576         break;
4577     case float_round_up:
4578         roundIncrement = zSign ? 0 : roundMask;
4579         break;
4580     case float_round_down:
4581         roundIncrement = zSign ? roundMask : 0;
4582         break;
4583     default:
4584         abort();
4585     }
4586     roundBits = zSig0 & roundMask;
4587     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4588         if (    ( 0x7FFE < zExp )
4589              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4590            ) {
4591             goto overflow;
4592         }
4593         if ( zExp <= 0 ) {
4594             if (status->flush_to_zero) {
4595                 float_raise(float_flag_output_denormal, status);
4596                 return packFloatx80(zSign, 0, 0);
4597             }
4598             isTiny = status->tininess_before_rounding
4599                   || (zExp < 0 )
4600                   || (zSig0 <= zSig0 + roundIncrement);
4601             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4602             zExp = 0;
4603             roundBits = zSig0 & roundMask;
4604             if (isTiny && roundBits) {
4605                 float_raise(float_flag_underflow, status);
4606             }
4607             if (roundBits) {
4608                 float_raise(float_flag_inexact, status);
4609             }
4610             zSig0 += roundIncrement;
4611             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4612             roundIncrement = roundMask + 1;
4613             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4614                 roundMask |= roundIncrement;
4615             }
4616             zSig0 &= ~ roundMask;
4617             return packFloatx80( zSign, zExp, zSig0 );
4618         }
4619     }
4620     if (roundBits) {
4621         float_raise(float_flag_inexact, status);
4622     }
4623     zSig0 += roundIncrement;
4624     if ( zSig0 < roundIncrement ) {
4625         ++zExp;
4626         zSig0 = UINT64_C(0x8000000000000000);
4627     }
4628     roundIncrement = roundMask + 1;
4629     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4630         roundMask |= roundIncrement;
4631     }
4632     zSig0 &= ~ roundMask;
4633     if ( zSig0 == 0 ) zExp = 0;
4634     return packFloatx80( zSign, zExp, zSig0 );
4635  precision80:
4636     switch (roundingMode) {
4637     case float_round_nearest_even:
4638     case float_round_ties_away:
4639         increment = ((int64_t)zSig1 < 0);
4640         break;
4641     case float_round_to_zero:
4642         increment = 0;
4643         break;
4644     case float_round_up:
4645         increment = !zSign && zSig1;
4646         break;
4647     case float_round_down:
4648         increment = zSign && zSig1;
4649         break;
4650     default:
4651         abort();
4652     }
4653     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4654         if (    ( 0x7FFE < zExp )
4655              || (    ( zExp == 0x7FFE )
4656                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4657                   && increment
4658                 )
4659            ) {
4660             roundMask = 0;
4661  overflow:
4662             float_raise(float_flag_overflow | float_flag_inexact, status);
4663             if (    ( roundingMode == float_round_to_zero )
4664                  || ( zSign && ( roundingMode == float_round_up ) )
4665                  || ( ! zSign && ( roundingMode == float_round_down ) )
4666                ) {
4667                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4668             }
4669             return packFloatx80(zSign,
4670                                 floatx80_infinity_high,
4671                                 floatx80_infinity_low);
4672         }
4673         if ( zExp <= 0 ) {
4674             isTiny = status->tininess_before_rounding
4675                   || (zExp < 0)
4676                   || !increment
4677                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4678             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4679             zExp = 0;
4680             if (isTiny && zSig1) {
4681                 float_raise(float_flag_underflow, status);
4682             }
4683             if (zSig1) {
4684                 float_raise(float_flag_inexact, status);
4685             }
4686             switch (roundingMode) {
4687             case float_round_nearest_even:
4688             case float_round_ties_away:
4689                 increment = ((int64_t)zSig1 < 0);
4690                 break;
4691             case float_round_to_zero:
4692                 increment = 0;
4693                 break;
4694             case float_round_up:
4695                 increment = !zSign && zSig1;
4696                 break;
4697             case float_round_down:
4698                 increment = zSign && zSig1;
4699                 break;
4700             default:
4701                 abort();
4702             }
4703             if ( increment ) {
4704                 ++zSig0;
4705                 if (!(zSig1 << 1) && roundNearestEven) {
4706                     zSig0 &= ~1;
4707                 }
4708                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4709             }
4710             return packFloatx80( zSign, zExp, zSig0 );
4711         }
4712     }
4713     if (zSig1) {
4714         float_raise(float_flag_inexact, status);
4715     }
4716     if ( increment ) {
4717         ++zSig0;
4718         if ( zSig0 == 0 ) {
4719             ++zExp;
4720             zSig0 = UINT64_C(0x8000000000000000);
4721         }
4722         else {
4723             if (!(zSig1 << 1) && roundNearestEven) {
4724                 zSig0 &= ~1;
4725             }
4726         }
4727     }
4728     else {
4729         if ( zSig0 == 0 ) zExp = 0;
4730     }
4731     return packFloatx80( zSign, zExp, zSig0 );
4732
4733 }
4734
4735 /*----------------------------------------------------------------------------
4736 | Takes an abstract floating-point value having sign `zSign', exponent
4737 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4738 | and returns the proper extended double-precision floating-point value
4739 | corresponding to the abstract input.  This routine is just like
4740 | `roundAndPackFloatx80' except that the input significand does not have to be
4741 | normalized.
4742 *----------------------------------------------------------------------------*/
4743
4744 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4745                                        bool zSign, int32_t zExp,
4746                                        uint64_t zSig0, uint64_t zSig1,
4747                                        float_status *status)
4748 {
4749     int8_t shiftCount;
4750
4751     if ( zSig0 == 0 ) {
4752         zSig0 = zSig1;
4753         zSig1 = 0;
4754         zExp -= 64;
4755     }
4756     shiftCount = clz64(zSig0);
4757     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4758     zExp -= shiftCount;
4759     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4760                                 zSig0, zSig1, status);
4761
4762 }
4763
4764 /*----------------------------------------------------------------------------
4765 | Returns the binary exponential of the single-precision floating-point value
4766 | `a'. The operation is performed according to the IEC/IEEE Standard for
4767 | Binary Floating-Point Arithmetic.
4768 |
4769 | Uses the following identities:
4770 |
4771 | 1. -------------------------------------------------------------------------
4772 |      x    x*ln(2)
4773 |     2  = e
4774 |
4775 | 2. -------------------------------------------------------------------------
4776 |                      2     3     4     5           n
4777 |      x        x     x     x     x     x           x
4778 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4779 |               1!    2!    3!    4!    5!          n!
4780 *----------------------------------------------------------------------------*/
4781
4782 static const float64 float32_exp2_coefficients[15] =
4783 {
4784     const_float64( 0x3ff0000000000000ll ), /*  1 */
4785     const_float64( 0x3fe0000000000000ll ), /*  2 */
4786     const_float64( 0x3fc5555555555555ll ), /*  3 */
4787     const_float64( 0x3fa5555555555555ll ), /*  4 */
4788     const_float64( 0x3f81111111111111ll ), /*  5 */
4789     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4790     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4791     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4792     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4793     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4794     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4795     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4796     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4797     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4798     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4799 };
4800
4801 float32 float32_exp2(float32 a, float_status *status)
4802 {
4803     FloatParts64 xp, xnp, tp, rp;
4804     int i;
4805
4806     float32_unpack_canonical(&xp, a, status);
4807     if (unlikely(xp.cls != float_class_normal)) {
4808         switch (xp.cls) {
4809         case float_class_snan:
4810         case float_class_qnan:
4811             parts_return_nan(&xp, status);
4812             return float32_round_pack_canonical(&xp, status);
4813         case float_class_inf:
4814             return xp.sign ? float32_zero : a;
4815         case float_class_zero:
4816             return float32_one;
4817         default:
4818             break;
4819         }
4820         g_assert_not_reached();
4821     }
4822
4823     float_raise(float_flag_inexact, status);
4824
4825     float64_unpack_canonical(&tp, float64_ln2, status);
4826     xp = *parts_mul(&xp, &tp, status);
4827     xnp = xp;
4828
4829     float64_unpack_canonical(&rp, float64_one, status);
4830     for (i = 0 ; i < 15 ; i++) {
4831         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4832         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4833         xnp = *parts_mul(&xnp, &xp, status);
4834     }
4835
4836     return float32_round_pack_canonical(&rp, status);
4837 }
4838
4839 /*----------------------------------------------------------------------------
4840 | Rounds the extended double-precision floating-point value `a'
4841 | to the precision provided by floatx80_rounding_precision and returns the
4842 | result as an extended double-precision floating-point value.
4843 | The operation is performed according to the IEC/IEEE Standard for Binary
4844 | Floating-Point Arithmetic.
4845 *----------------------------------------------------------------------------*/
4846
4847 floatx80 floatx80_round(floatx80 a, float_status *status)
4848 {
4849     FloatParts128 p;
4850
4851     if (!floatx80_unpack_canonical(&p, a, status)) {
4852         return floatx80_default_nan(status);
4853     }
4854     return floatx80_round_pack_canonical(&p, status);
4855 }
4856
4857 static void __attribute__((constructor)) softfloat_init(void)
4858 {
4859     union_float64 ua, ub, uc, ur;
4860
4861     if (QEMU_NO_HARDFLOAT) {
4862         return;
4863     }
4864     /*
4865      * Test that the host's FMA is not obviously broken. For example,
4866      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4867      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4868      */
4869     ua.s = 0x0020000000000001ULL;
4870     ub.s = 0x3ca0000000000000ULL;
4871     uc.s = 0x0020000000000000ULL;
4872     ur.h = fma(ua.h, ub.h, uc.h);
4873     if (ur.s != 0x0020000000000001ULL) {
4874         force_soft_fma = true;
4875     }
4876 }