fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485
 486 /* Simple helpers for checking if, or what kind of, NaN we have */
 487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 488 {
 489     return unlikely(c >= float_class_qnan);
 490 }
 491
 492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 493 {
 494     return c == float_class_snan;
 495 }
 496
 497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 498 {
 499     return c == float_class_qnan;
 500 }
 501
 502 /*
 503  * Structure holding all of the decomposed parts of a float.
 504  * The exponent is unbiased and the fraction is normalized.
 505  *
 506  * The fraction words are stored in big-endian word ordering,
 507  * so that truncation from a larger format to a smaller format
 508  * can be done simply by ignoring subsequent elements.
 509  */
 510
 511 typedef struct {
 512     FloatClass cls;
 513     bool sign;
 514     int32_t exp;
 515     union {
 516         /* Routines that know the structure may reference the singular name. */
 517         uint64_t frac;
 518         /*
 519          * Routines expanded with multiple structures reference "hi" and "lo"
 520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 521          * both the same word and aliased here.
 522          */
 523         uint64_t frac_hi;
 524         uint64_t frac_lo;
 525     };
 526 } FloatParts64;
 527
 528 typedef struct {
 529     FloatClass cls;
 530     bool sign;
 531     int32_t exp;
 532     uint64_t frac_hi;
 533     uint64_t frac_lo;
 534 } FloatParts128;
 535
 536 typedef struct {
 537     FloatClass cls;
 538     bool sign;
 539     int32_t exp;
 540     uint64_t frac_hi;
 541     uint64_t frac_hm;  /* high-middle */
 542     uint64_t frac_lm;  /* low-middle */
 543     uint64_t frac_lo;
 544 } FloatParts256;
 545
 546 /* These apply to the most significant word of each FloatPartsN. */
 547 #define DECOMPOSED_BINARY_POINT    63
 548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 549
 550 /* Structure holding all of the relevant parameters for a format.
 551  *   exp_size: the size of the exponent field
 552  *   exp_bias: the offset applied to the exponent field
 553  *   exp_max: the maximum normalised exponent
 554  *   frac_size: the size of the fraction field
 555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 556  * The following are computed based the size of fraction
 557  *   frac_lsb: least significant bit of fraction
 558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
 559  *   round_mask/roundeven_mask: masks used for rounding
 560  * The following optional modifiers are available:
 561  *   arm_althp: handle ARM Alternative Half Precision
 562  */
 563 typedef struct {
 564     int exp_size;
 565     int exp_bias;
 566     int exp_max;
 567     int frac_size;
 568     int frac_shift;
 569     uint64_t frac_lsb;
 570     uint64_t frac_lsbm1;
 571     uint64_t round_mask;
 572     uint64_t roundeven_mask;
 573     bool arm_althp;
 574 } FloatFmt;
 575
 576 /* Expand fields based on the size of exponent and fraction */
 577 #define FLOAT_PARAMS(E, F)                                           \
 578     .exp_size       = E,                                             \
 579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
 580     .exp_max        = (1 << E) - 1,                                  \
 581     .frac_size      = F,                                             \
 582     .frac_shift     = (-F - 1) & 63,                                 \
 583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
 584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
 585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
 586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 587
 588 static const FloatFmt float16_params = {
 589     FLOAT_PARAMS(5, 10)
 590 };
 591
 592 static const FloatFmt float16_params_ahp = {
 593     FLOAT_PARAMS(5, 10),
 594     .arm_althp = true
 595 };
 596
 597 static const FloatFmt bfloat16_params = {
 598     FLOAT_PARAMS(8, 7)
 599 };
 600
 601 static const FloatFmt float32_params = {
 602     FLOAT_PARAMS(8, 23)
 603 };
 604
 605 static const FloatFmt float64_params = {
 606     FLOAT_PARAMS(11, 52)
 607 };
 608
 609 static const FloatFmt float128_params = {
 610     FLOAT_PARAMS(15, 112)
 611 };
 612
 613 /* Unpack a float to parts, but do not canonicalize.  */
 614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 615 {
 616     const int f_size = fmt->frac_size;
 617     const int e_size = fmt->exp_size;
 618
 619     *r = (FloatParts64) {
 620         .cls = float_class_unclassified,
 621         .sign = extract64(raw, f_size + e_size, 1),
 622         .exp = extract64(raw, f_size, e_size),
 623         .frac = extract64(raw, 0, f_size)
 624     };
 625 }
 626
 627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 628 {
 629     unpack_raw64(p, &float16_params, f);
 630 }
 631
 632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 633 {
 634     unpack_raw64(p, &bfloat16_params, f);
 635 }
 636
 637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 638 {
 639     unpack_raw64(p, &float32_params, f);
 640 }
 641
 642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 643 {
 644     unpack_raw64(p, &float64_params, f);
 645 }
 646
 647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 648 {
 649     const int f_size = float128_params.frac_size - 64;
 650     const int e_size = float128_params.exp_size;
 651
 652     *p = (FloatParts128) {
 653         .cls = float_class_unclassified,
 654         .sign = extract64(f.high, f_size + e_size, 1),
 655         .exp = extract64(f.high, f_size, e_size),
 656         .frac_hi = extract64(f.high, 0, f_size),
 657         .frac_lo = f.low,
 658     };
 659 }
 660
 661 /* Pack a float from parts, but do not canonicalize.  */
 662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 663 {
 664     const int f_size = fmt->frac_size;
 665     const int e_size = fmt->exp_size;
 666     uint64_t ret;
 667
 668     ret = (uint64_t)p->sign << (f_size + e_size);
 669     ret = deposit64(ret, f_size, e_size, p->exp);
 670     ret = deposit64(ret, 0, f_size, p->frac);
 671     return ret;
 672 }
 673
 674 static inline float16 float16_pack_raw(const FloatParts64 *p)
 675 {
 676     return make_float16(pack_raw64(p, &float16_params));
 677 }
 678
 679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 680 {
 681     return pack_raw64(p, &bfloat16_params);
 682 }
 683
 684 static inline float32 float32_pack_raw(const FloatParts64 *p)
 685 {
 686     return make_float32(pack_raw64(p, &float32_params));
 687 }
 688
 689 static inline float64 float64_pack_raw(const FloatParts64 *p)
 690 {
 691     return make_float64(pack_raw64(p, &float64_params));
 692 }
 693
 694 static float128 float128_pack_raw(const FloatParts128 *p)
 695 {
 696     const int f_size = float128_params.frac_size - 64;
 697     const int e_size = float128_params.exp_size;
 698     uint64_t hi;
 699
 700     hi = (uint64_t)p->sign << (f_size + e_size);
 701     hi = deposit64(hi, f_size, e_size, p->exp);
 702     hi = deposit64(hi, 0, f_size, p->frac_hi);
 703     return make_float128(hi, p->frac_lo);
 704 }
 705
 706 /*----------------------------------------------------------------------------
 707 | Functions and definitions to determine:  (1) whether tininess for underflow
 708 | is detected before or after rounding by default, (2) what (if anything)
 709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 711 | are propagated from function inputs to output.  These details are target-
 712 | specific.
 713 *----------------------------------------------------------------------------*/
 714 #include "softfloat-specialize.c.inc"
 715
 716 #define PARTS_GENERIC_64_128(NAME, P) \
 717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 718
 719 #define PARTS_GENERIC_64_128_256(NAME, P) \
 720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 722
 723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 725
 726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 728
 729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 730
 731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 732                                       float_status *s);
 733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 734                                         float_status *s);
 735
 736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 737
 738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 739                                              FloatParts64 *c, float_status *s,
 740                                              int ab_mask, int abc_mask);
 741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 742                                                FloatParts128 *b,
 743                                                FloatParts128 *c,
 744                                                float_status *s,
 745                                                int ab_mask, int abc_mask);
 746
 747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 749
 750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 751                                  const FloatFmt *fmt);
 752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 753                                   const FloatFmt *fmt);
 754
 755 #define parts_canonicalize(A, S, F) \
 756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 757
 758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 759                             const FloatFmt *fmt);
 760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 761                              const FloatFmt *fmt);
 762
 763 #define parts_uncanon(A, S, F) \
 764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 765
 766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 769
 770 #define parts_add_normal(A, B) \
 771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 772
 773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 776
 777 #define parts_sub_normal(A, B) \
 778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 779
 780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 781                                     float_status *s, bool subtract);
 782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 783                                       float_status *s, bool subtract);
 784
 785 #define parts_addsub(A, B, S, Z) \
 786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 787
 788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 789                                  float_status *s);
 790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 791                                    float_status *s);
 792
 793 #define parts_mul(A, B, S) \
 794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 795
 796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 797                                     FloatParts64 *c, int flags,
 798                                     float_status *s);
 799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 800                                       FloatParts128 *c, int flags,
 801                                       float_status *s);
 802
 803 #define parts_muladd(A, B, C, Z, S) \
 804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 805
 806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 807                                  float_status *s);
 808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 809                                    float_status *s);
 810
 811 #define parts_div(A, B, S) \
 812     PARTS_GENERIC_64_128(div, A)(A, B, S)
 813
 814 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 815                                         int scale, int frac_size);
 816 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 817                                          int scale, int frac_size);
 818
 819 #define parts_round_to_int_normal(A, R, C, F) \
 820     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 821
 822 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 823                                  int scale, float_status *s,
 824                                  const FloatFmt *fmt);
 825 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 826                                   int scale, float_status *s,
 827                                   const FloatFmt *fmt);
 828
 829 #define parts_round_to_int(A, R, C, S, F) \
 830     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 831
 832 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 833                                      int scale, int64_t min, int64_t max,
 834                                      float_status *s);
 835 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 836                                      int scale, int64_t min, int64_t max,
 837                                      float_status *s);
 838
 839 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 840     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 841
 842 /*
 843  * Helper functions for softfloat-parts.c.inc, per-size operations.
 844  */
 845
 846 #define FRAC_GENERIC_64_128(NAME, P) \
 847     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 848
 849 #define FRAC_GENERIC_64_128_256(NAME, P) \
 850     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 851                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 852
 853 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 854 {
 855     return uadd64_overflow(a->frac, b->frac, &r->frac);
 856 }
 857
 858 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 859 {
 860     bool c = 0;
 861     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 862     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 863     return c;
 864 }
 865
 866 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 867 {
 868     bool c = 0;
 869     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 870     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 871     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 872     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 873     return c;
 874 }
 875
 876 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 877
 878 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 879 {
 880     return uadd64_overflow(a->frac, c, &r->frac);
 881 }
 882
 883 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 884 {
 885     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 886     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 887 }
 888
 889 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 890
 891 static void frac64_allones(FloatParts64 *a)
 892 {
 893     a->frac = -1;
 894 }
 895
 896 static void frac128_allones(FloatParts128 *a)
 897 {
 898     a->frac_hi = a->frac_lo = -1;
 899 }
 900
 901 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 902
 903 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 904 {
 905     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 906 }
 907
 908 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 909 {
 910     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 911     if (ta == tb) {
 912         ta = a->frac_lo, tb = b->frac_lo;
 913         if (ta == tb) {
 914             return 0;
 915         }
 916     }
 917     return ta < tb ? -1 : 1;
 918 }
 919
 920 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 921
 922 static void frac64_clear(FloatParts64 *a)
 923 {
 924     a->frac = 0;
 925 }
 926
 927 static void frac128_clear(FloatParts128 *a)
 928 {
 929     a->frac_hi = a->frac_lo = 0;
 930 }
 931
 932 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 933
 934 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 935 {
 936     uint64_t n1, n0, r, q;
 937     bool ret;
 938
 939     /*
 940      * We want a 2*N / N-bit division to produce exactly an N-bit
 941      * result, so that we do not lose any precision and so that we
 942      * do not have to renormalize afterward.  If A.frac < B.frac,
 943      * then division would produce an (N-1)-bit result; shift A left
 944      * by one to produce the an N-bit result, and return true to
 945      * decrement the exponent to match.
 946      *
 947      * The udiv_qrnnd algorithm that we're using requires normalization,
 948      * i.e. the msb of the denominator must be set, which is already true.
 949      */
 950     ret = a->frac < b->frac;
 951     if (ret) {
 952         n0 = a->frac;
 953         n1 = 0;
 954     } else {
 955         n0 = a->frac >> 1;
 956         n1 = a->frac << 63;
 957     }
 958     q = udiv_qrnnd(&r, n0, n1, b->frac);
 959
 960     /* Set lsb if there is a remainder, to set inexact. */
 961     a->frac = q | (r != 0);
 962
 963     return ret;
 964 }
 965
 966 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
 967 {
 968     uint64_t q0, q1, a0, a1, b0, b1;
 969     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
 970     bool ret = false;
 971
 972     a0 = a->frac_hi, a1 = a->frac_lo;
 973     b0 = b->frac_hi, b1 = b->frac_lo;
 974
 975     ret = lt128(a0, a1, b0, b1);
 976     if (!ret) {
 977         a1 = shr_double(a0, a1, 1);
 978         a0 = a0 >> 1;
 979     }
 980
 981     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
 982     q0 = estimateDiv128To64(a0, a1, b0);
 983
 984     /*
 985      * Estimate is high because B1 was not included (unless B1 == 0).
 986      * Reduce quotient and increase remainder until remainder is non-negative.
 987      * This loop will execute 0 to 2 times.
 988      */
 989     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
 990     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
 991     while (r0 != 0) {
 992         q0--;
 993         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
 994     }
 995
 996     /* Repeat using the remainder, producing a second word of quotient. */
 997     q1 = estimateDiv128To64(r1, r2, b0);
 998     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
 999     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1000     while (r1 != 0) {
1001         q1--;
1002         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1003     }
1004
1005     /* Any remainder indicates inexact; set sticky bit. */
1006     q1 |= (r2 | r3) != 0;
1007
1008     a->frac_hi = q0;
1009     a->frac_lo = q1;
1010     return ret;
1011 }
1012
1013 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1014
1015 static bool frac64_eqz(FloatParts64 *a)
1016 {
1017     return a->frac == 0;
1018 }
1019
1020 static bool frac128_eqz(FloatParts128 *a)
1021 {
1022     return (a->frac_hi | a->frac_lo) == 0;
1023 }
1024
1025 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1026
1027 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1028 {
1029     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1030 }
1031
1032 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1033 {
1034     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1035                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1036 }
1037
1038 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1039
1040 static void frac64_neg(FloatParts64 *a)
1041 {
1042     a->frac = -a->frac;
1043 }
1044
1045 static void frac128_neg(FloatParts128 *a)
1046 {
1047     bool c = 0;
1048     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1049     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1050 }
1051
1052 static void frac256_neg(FloatParts256 *a)
1053 {
1054     bool c = 0;
1055     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1056     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1057     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1058     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1059 }
1060
1061 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1062
1063 static int frac64_normalize(FloatParts64 *a)
1064 {
1065     if (a->frac) {
1066         int shift = clz64(a->frac);
1067         a->frac <<= shift;
1068         return shift;
1069     }
1070     return 64;
1071 }
1072
1073 static int frac128_normalize(FloatParts128 *a)
1074 {
1075     if (a->frac_hi) {
1076         int shl = clz64(a->frac_hi);
1077         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1078         a->frac_lo <<= shl;
1079         return shl;
1080     } else if (a->frac_lo) {
1081         int shl = clz64(a->frac_lo);
1082         a->frac_hi = a->frac_lo << shl;
1083         a->frac_lo = 0;
1084         return shl + 64;
1085     }
1086     return 128;
1087 }
1088
1089 static int frac256_normalize(FloatParts256 *a)
1090 {
1091     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1092     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1093     int ret, shl;
1094
1095     if (likely(a0)) {
1096         shl = clz64(a0);
1097         if (shl == 0) {
1098             return 0;
1099         }
1100         ret = shl;
1101     } else {
1102         if (a1) {
1103             ret = 64;
1104             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1105         } else if (a2) {
1106             ret = 128;
1107             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1108         } else if (a3) {
1109             ret = 192;
1110             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1111         } else {
1112             ret = 256;
1113             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1114             goto done;
1115         }
1116         shl = clz64(a0);
1117         if (shl == 0) {
1118             goto done;
1119         }
1120         ret += shl;
1121     }
1122
1123     a0 = shl_double(a0, a1, shl);
1124     a1 = shl_double(a1, a2, shl);
1125     a2 = shl_double(a2, a3, shl);
1126     a3 <<= shl;
1127
1128  done:
1129     a->frac_hi = a0;
1130     a->frac_hm = a1;
1131     a->frac_lm = a2;
1132     a->frac_lo = a3;
1133     return ret;
1134 }
1135
1136 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1137
1138 static void frac64_shl(FloatParts64 *a, int c)
1139 {
1140     a->frac <<= c;
1141 }
1142
1143 static void frac128_shl(FloatParts128 *a, int c)
1144 {
1145     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1146
1147     if (c & 64) {
1148         a0 = a1, a1 = 0;
1149     }
1150
1151     c &= 63;
1152     if (c) {
1153         a0 = shl_double(a0, a1, c);
1154         a1 = a1 << c;
1155     }
1156
1157     a->frac_hi = a0;
1158     a->frac_lo = a1;
1159 }
1160
1161 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1162
1163 static void frac64_shr(FloatParts64 *a, int c)
1164 {
1165     a->frac >>= c;
1166 }
1167
1168 static void frac128_shr(FloatParts128 *a, int c)
1169 {
1170     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1171
1172     if (c & 64) {
1173         a1 = a0, a0 = 0;
1174     }
1175
1176     c &= 63;
1177     if (c) {
1178         a1 = shr_double(a0, a1, c);
1179         a0 = a0 >> c;
1180     }
1181
1182     a->frac_hi = a0;
1183     a->frac_lo = a1;
1184 }
1185
1186 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1187
1188 static void frac64_shrjam(FloatParts64 *a, int c)
1189 {
1190     uint64_t a0 = a->frac;
1191
1192     if (likely(c != 0)) {
1193         if (likely(c < 64)) {
1194             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1195         } else {
1196             a0 = a0 != 0;
1197         }
1198         a->frac = a0;
1199     }
1200 }
1201
1202 static void frac128_shrjam(FloatParts128 *a, int c)
1203 {
1204     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1205     uint64_t sticky = 0;
1206
1207     if (unlikely(c == 0)) {
1208         return;
1209     } else if (likely(c < 64)) {
1210         /* nothing */
1211     } else if (likely(c < 128)) {
1212         sticky = a1;
1213         a1 = a0;
1214         a0 = 0;
1215         c &= 63;
1216         if (c == 0) {
1217             goto done;
1218         }
1219     } else {
1220         sticky = a0 | a1;
1221         a0 = a1 = 0;
1222         goto done;
1223     }
1224
1225     sticky |= shr_double(a1, 0, c);
1226     a1 = shr_double(a0, a1, c);
1227     a0 = a0 >> c;
1228
1229  done:
1230     a->frac_lo = a1 | (sticky != 0);
1231     a->frac_hi = a0;
1232 }
1233
1234 static void frac256_shrjam(FloatParts256 *a, int c)
1235 {
1236     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1237     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1238     uint64_t sticky = 0;
1239
1240     if (unlikely(c == 0)) {
1241         return;
1242     } else if (likely(c < 64)) {
1243         /* nothing */
1244     } else if (likely(c < 256)) {
1245         if (unlikely(c & 128)) {
1246             sticky |= a2 | a3;
1247             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1248         }
1249         if (unlikely(c & 64)) {
1250             sticky |= a3;
1251             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1252         }
1253         c &= 63;
1254         if (c == 0) {
1255             goto done;
1256         }
1257     } else {
1258         sticky = a0 | a1 | a2 | a3;
1259         a0 = a1 = a2 = a3 = 0;
1260         goto done;
1261     }
1262
1263     sticky |= shr_double(a3, 0, c);
1264     a3 = shr_double(a2, a3, c);
1265     a2 = shr_double(a1, a2, c);
1266     a1 = shr_double(a0, a1, c);
1267     a0 = a0 >> c;
1268
1269  done:
1270     a->frac_lo = a3 | (sticky != 0);
1271     a->frac_lm = a2;
1272     a->frac_hm = a1;
1273     a->frac_hi = a0;
1274 }
1275
1276 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1277
1278 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1279 {
1280     return usub64_overflow(a->frac, b->frac, &r->frac);
1281 }
1282
1283 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1284 {
1285     bool c = 0;
1286     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1287     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1288     return c;
1289 }
1290
1291 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1292 {
1293     bool c = 0;
1294     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1295     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1296     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1297     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1298     return c;
1299 }
1300
1301 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1302
1303 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1304 {
1305     r->frac = a->frac_hi | (a->frac_lo != 0);
1306 }
1307
1308 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1309 {
1310     r->frac_hi = a->frac_hi;
1311     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1312 }
1313
1314 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1315
1316 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1317 {
1318     r->frac_hi = a->frac;
1319     r->frac_lo = 0;
1320 }
1321
1322 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1323 {
1324     r->frac_hi = a->frac_hi;
1325     r->frac_hm = a->frac_lo;
1326     r->frac_lm = 0;
1327     r->frac_lo = 0;
1328 }
1329
1330 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1331
1332 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1333 #define FloatPartsN    glue(FloatParts,N)
1334 #define FloatPartsW    glue(FloatParts,W)
1335
1336 #define N 64
1337 #define W 128
1338
1339 #include "softfloat-parts-addsub.c.inc"
1340 #include "softfloat-parts.c.inc"
1341
1342 #undef  N
1343 #undef  W
1344 #define N 128
1345 #define W 256
1346
1347 #include "softfloat-parts-addsub.c.inc"
1348 #include "softfloat-parts.c.inc"
1349
1350 #undef  N
1351 #undef  W
1352 #define N            256
1353
1354 #include "softfloat-parts-addsub.c.inc"
1355
1356 #undef  N
1357 #undef  W
1358 #undef  partsN
1359 #undef  FloatPartsN
1360 #undef  FloatPartsW
1361
1362 /*
1363  * Pack/unpack routines with a specific FloatFmt.
1364  */
1365
1366 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1367                                       float_status *s, const FloatFmt *params)
1368 {
1369     float16_unpack_raw(p, f);
1370     parts_canonicalize(p, s, params);
1371 }
1372
1373 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1374                                      float_status *s)
1375 {
1376     float16a_unpack_canonical(p, f, s, &float16_params);
1377 }
1378
1379 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1380                                       float_status *s)
1381 {
1382     bfloat16_unpack_raw(p, f);
1383     parts_canonicalize(p, s, &bfloat16_params);
1384 }
1385
1386 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1387                                              float_status *s,
1388                                              const FloatFmt *params)
1389 {
1390     parts_uncanon(p, s, params);
1391     return float16_pack_raw(p);
1392 }
1393
1394 static float16 float16_round_pack_canonical(FloatParts64 *p,
1395                                             float_status *s)
1396 {
1397     return float16a_round_pack_canonical(p, s, &float16_params);
1398 }
1399
1400 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1401                                               float_status *s)
1402 {
1403     parts_uncanon(p, s, &bfloat16_params);
1404     return bfloat16_pack_raw(p);
1405 }
1406
1407 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1408                                      float_status *s)
1409 {
1410     float32_unpack_raw(p, f);
1411     parts_canonicalize(p, s, &float32_params);
1412 }
1413
1414 static float32 float32_round_pack_canonical(FloatParts64 *p,
1415                                             float_status *s)
1416 {
1417     parts_uncanon(p, s, &float32_params);
1418     return float32_pack_raw(p);
1419 }
1420
1421 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1422                                      float_status *s)
1423 {
1424     float64_unpack_raw(p, f);
1425     parts_canonicalize(p, s, &float64_params);
1426 }
1427
1428 static float64 float64_round_pack_canonical(FloatParts64 *p,
1429                                             float_status *s)
1430 {
1431     parts_uncanon(p, s, &float64_params);
1432     return float64_pack_raw(p);
1433 }
1434
1435 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1436                                       float_status *s)
1437 {
1438     float128_unpack_raw(p, f);
1439     parts_canonicalize(p, s, &float128_params);
1440 }
1441
1442 static float128 float128_round_pack_canonical(FloatParts128 *p,
1443                                               float_status *s)
1444 {
1445     parts_uncanon(p, s, &float128_params);
1446     return float128_pack_raw(p);
1447 }
1448
1449 /*
1450  * Addition and subtraction
1451  */
1452
1453 static float16 QEMU_FLATTEN
1454 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1455 {
1456     FloatParts64 pa, pb, *pr;
1457
1458     float16_unpack_canonical(&pa, a, status);
1459     float16_unpack_canonical(&pb, b, status);
1460     pr = parts_addsub(&pa, &pb, status, subtract);
1461
1462     return float16_round_pack_canonical(pr, status);
1463 }
1464
1465 float16 float16_add(float16 a, float16 b, float_status *status)
1466 {
1467     return float16_addsub(a, b, status, false);
1468 }
1469
1470 float16 float16_sub(float16 a, float16 b, float_status *status)
1471 {
1472     return float16_addsub(a, b, status, true);
1473 }
1474
1475 static float32 QEMU_SOFTFLOAT_ATTR
1476 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1477 {
1478     FloatParts64 pa, pb, *pr;
1479
1480     float32_unpack_canonical(&pa, a, status);
1481     float32_unpack_canonical(&pb, b, status);
1482     pr = parts_addsub(&pa, &pb, status, subtract);
1483
1484     return float32_round_pack_canonical(pr, status);
1485 }
1486
1487 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1488 {
1489     return soft_f32_addsub(a, b, status, false);
1490 }
1491
1492 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1493 {
1494     return soft_f32_addsub(a, b, status, true);
1495 }
1496
1497 static float64 QEMU_SOFTFLOAT_ATTR
1498 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1499 {
1500     FloatParts64 pa, pb, *pr;
1501
1502     float64_unpack_canonical(&pa, a, status);
1503     float64_unpack_canonical(&pb, b, status);
1504     pr = parts_addsub(&pa, &pb, status, subtract);
1505
1506     return float64_round_pack_canonical(pr, status);
1507 }
1508
1509 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1510 {
1511     return soft_f64_addsub(a, b, status, false);
1512 }
1513
1514 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1515 {
1516     return soft_f64_addsub(a, b, status, true);
1517 }
1518
1519 static float hard_f32_add(float a, float b)
1520 {
1521     return a + b;
1522 }
1523
1524 static float hard_f32_sub(float a, float b)
1525 {
1526     return a - b;
1527 }
1528
1529 static double hard_f64_add(double a, double b)
1530 {
1531     return a + b;
1532 }
1533
1534 static double hard_f64_sub(double a, double b)
1535 {
1536     return a - b;
1537 }
1538
1539 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1540 {
1541     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1542         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1543     }
1544     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1545 }
1546
1547 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1548 {
1549     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1550         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1551     } else {
1552         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1553     }
1554 }
1555
1556 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1557                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1558 {
1559     return float32_gen2(a, b, s, hard, soft,
1560                         f32_is_zon2, f32_addsubmul_post);
1561 }
1562
1563 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1564                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1565 {
1566     return float64_gen2(a, b, s, hard, soft,
1567                         f64_is_zon2, f64_addsubmul_post);
1568 }
1569
1570 float32 QEMU_FLATTEN
1571 float32_add(float32 a, float32 b, float_status *s)
1572 {
1573     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1574 }
1575
1576 float32 QEMU_FLATTEN
1577 float32_sub(float32 a, float32 b, float_status *s)
1578 {
1579     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1580 }
1581
1582 float64 QEMU_FLATTEN
1583 float64_add(float64 a, float64 b, float_status *s)
1584 {
1585     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1586 }
1587
1588 float64 QEMU_FLATTEN
1589 float64_sub(float64 a, float64 b, float_status *s)
1590 {
1591     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1592 }
1593
1594 static bfloat16 QEMU_FLATTEN
1595 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1596 {
1597     FloatParts64 pa, pb, *pr;
1598
1599     bfloat16_unpack_canonical(&pa, a, status);
1600     bfloat16_unpack_canonical(&pb, b, status);
1601     pr = parts_addsub(&pa, &pb, status, subtract);
1602
1603     return bfloat16_round_pack_canonical(pr, status);
1604 }
1605
1606 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1607 {
1608     return bfloat16_addsub(a, b, status, false);
1609 }
1610
1611 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1612 {
1613     return bfloat16_addsub(a, b, status, true);
1614 }
1615
1616 static float128 QEMU_FLATTEN
1617 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1618 {
1619     FloatParts128 pa, pb, *pr;
1620
1621     float128_unpack_canonical(&pa, a, status);
1622     float128_unpack_canonical(&pb, b, status);
1623     pr = parts_addsub(&pa, &pb, status, subtract);
1624
1625     return float128_round_pack_canonical(pr, status);
1626 }
1627
1628 float128 float128_add(float128 a, float128 b, float_status *status)
1629 {
1630     return float128_addsub(a, b, status, false);
1631 }
1632
1633 float128 float128_sub(float128 a, float128 b, float_status *status)
1634 {
1635     return float128_addsub(a, b, status, true);
1636 }
1637
1638 /*
1639  * Multiplication
1640  */
1641
1642 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1643 {
1644     FloatParts64 pa, pb, *pr;
1645
1646     float16_unpack_canonical(&pa, a, status);
1647     float16_unpack_canonical(&pb, b, status);
1648     pr = parts_mul(&pa, &pb, status);
1649
1650     return float16_round_pack_canonical(pr, status);
1651 }
1652
1653 static float32 QEMU_SOFTFLOAT_ATTR
1654 soft_f32_mul(float32 a, float32 b, float_status *status)
1655 {
1656     FloatParts64 pa, pb, *pr;
1657
1658     float32_unpack_canonical(&pa, a, status);
1659     float32_unpack_canonical(&pb, b, status);
1660     pr = parts_mul(&pa, &pb, status);
1661
1662     return float32_round_pack_canonical(pr, status);
1663 }
1664
1665 static float64 QEMU_SOFTFLOAT_ATTR
1666 soft_f64_mul(float64 a, float64 b, float_status *status)
1667 {
1668     FloatParts64 pa, pb, *pr;
1669
1670     float64_unpack_canonical(&pa, a, status);
1671     float64_unpack_canonical(&pb, b, status);
1672     pr = parts_mul(&pa, &pb, status);
1673
1674     return float64_round_pack_canonical(pr, status);
1675 }
1676
1677 static float hard_f32_mul(float a, float b)
1678 {
1679     return a * b;
1680 }
1681
1682 static double hard_f64_mul(double a, double b)
1683 {
1684     return a * b;
1685 }
1686
1687 float32 QEMU_FLATTEN
1688 float32_mul(float32 a, float32 b, float_status *s)
1689 {
1690     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1691                         f32_is_zon2, f32_addsubmul_post);
1692 }
1693
1694 float64 QEMU_FLATTEN
1695 float64_mul(float64 a, float64 b, float_status *s)
1696 {
1697     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1698                         f64_is_zon2, f64_addsubmul_post);
1699 }
1700
1701 bfloat16 QEMU_FLATTEN
1702 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1703 {
1704     FloatParts64 pa, pb, *pr;
1705
1706     bfloat16_unpack_canonical(&pa, a, status);
1707     bfloat16_unpack_canonical(&pb, b, status);
1708     pr = parts_mul(&pa, &pb, status);
1709
1710     return bfloat16_round_pack_canonical(pr, status);
1711 }
1712
1713 float128 QEMU_FLATTEN
1714 float128_mul(float128 a, float128 b, float_status *status)
1715 {
1716     FloatParts128 pa, pb, *pr;
1717
1718     float128_unpack_canonical(&pa, a, status);
1719     float128_unpack_canonical(&pb, b, status);
1720     pr = parts_mul(&pa, &pb, status);
1721
1722     return float128_round_pack_canonical(pr, status);
1723 }
1724
1725 /*
1726  * Fused multiply-add
1727  */
1728
1729 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1730                                     int flags, float_status *status)
1731 {
1732     FloatParts64 pa, pb, pc, *pr;
1733
1734     float16_unpack_canonical(&pa, a, status);
1735     float16_unpack_canonical(&pb, b, status);
1736     float16_unpack_canonical(&pc, c, status);
1737     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1738
1739     return float16_round_pack_canonical(pr, status);
1740 }
1741
1742 static float32 QEMU_SOFTFLOAT_ATTR
1743 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1744                 float_status *status)
1745 {
1746     FloatParts64 pa, pb, pc, *pr;
1747
1748     float32_unpack_canonical(&pa, a, status);
1749     float32_unpack_canonical(&pb, b, status);
1750     float32_unpack_canonical(&pc, c, status);
1751     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1752
1753     return float32_round_pack_canonical(pr, status);
1754 }
1755
1756 static float64 QEMU_SOFTFLOAT_ATTR
1757 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1758                 float_status *status)
1759 {
1760     FloatParts64 pa, pb, pc, *pr;
1761
1762     float64_unpack_canonical(&pa, a, status);
1763     float64_unpack_canonical(&pb, b, status);
1764     float64_unpack_canonical(&pc, c, status);
1765     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1766
1767     return float64_round_pack_canonical(pr, status);
1768 }
1769
1770 static bool force_soft_fma;
1771
1772 float32 QEMU_FLATTEN
1773 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1774 {
1775     union_float32 ua, ub, uc, ur;
1776
1777     ua.s = xa;
1778     ub.s = xb;
1779     uc.s = xc;
1780
1781     if (unlikely(!can_use_fpu(s))) {
1782         goto soft;
1783     }
1784     if (unlikely(flags & float_muladd_halve_result)) {
1785         goto soft;
1786     }
1787
1788     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1789     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1790         goto soft;
1791     }
1792
1793     if (unlikely(force_soft_fma)) {
1794         goto soft;
1795     }
1796
1797     /*
1798      * When (a || b) == 0, there's no need to check for under/over flow,
1799      * since we know the addend is (normal || 0) and the product is 0.
1800      */
1801     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1802         union_float32 up;
1803         bool prod_sign;
1804
1805         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1806         prod_sign ^= !!(flags & float_muladd_negate_product);
1807         up.s = float32_set_sign(float32_zero, prod_sign);
1808
1809         if (flags & float_muladd_negate_c) {
1810             uc.h = -uc.h;
1811         }
1812         ur.h = up.h + uc.h;
1813     } else {
1814         union_float32 ua_orig = ua;
1815         union_float32 uc_orig = uc;
1816
1817         if (flags & float_muladd_negate_product) {
1818             ua.h = -ua.h;
1819         }
1820         if (flags & float_muladd_negate_c) {
1821             uc.h = -uc.h;
1822         }
1823
1824         ur.h = fmaf(ua.h, ub.h, uc.h);
1825
1826         if (unlikely(f32_is_inf(ur))) {
1827             float_raise(float_flag_overflow, s);
1828         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1829             ua = ua_orig;
1830             uc = uc_orig;
1831             goto soft;
1832         }
1833     }
1834     if (flags & float_muladd_negate_result) {
1835         return float32_chs(ur.s);
1836     }
1837     return ur.s;
1838
1839  soft:
1840     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1841 }
1842
1843 float64 QEMU_FLATTEN
1844 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1845 {
1846     union_float64 ua, ub, uc, ur;
1847
1848     ua.s = xa;
1849     ub.s = xb;
1850     uc.s = xc;
1851
1852     if (unlikely(!can_use_fpu(s))) {
1853         goto soft;
1854     }
1855     if (unlikely(flags & float_muladd_halve_result)) {
1856         goto soft;
1857     }
1858
1859     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1860     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1861         goto soft;
1862     }
1863
1864     if (unlikely(force_soft_fma)) {
1865         goto soft;
1866     }
1867
1868     /*
1869      * When (a || b) == 0, there's no need to check for under/over flow,
1870      * since we know the addend is (normal || 0) and the product is 0.
1871      */
1872     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1873         union_float64 up;
1874         bool prod_sign;
1875
1876         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1877         prod_sign ^= !!(flags & float_muladd_negate_product);
1878         up.s = float64_set_sign(float64_zero, prod_sign);
1879
1880         if (flags & float_muladd_negate_c) {
1881             uc.h = -uc.h;
1882         }
1883         ur.h = up.h + uc.h;
1884     } else {
1885         union_float64 ua_orig = ua;
1886         union_float64 uc_orig = uc;
1887
1888         if (flags & float_muladd_negate_product) {
1889             ua.h = -ua.h;
1890         }
1891         if (flags & float_muladd_negate_c) {
1892             uc.h = -uc.h;
1893         }
1894
1895         ur.h = fma(ua.h, ub.h, uc.h);
1896
1897         if (unlikely(f64_is_inf(ur))) {
1898             float_raise(float_flag_overflow, s);
1899         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1900             ua = ua_orig;
1901             uc = uc_orig;
1902             goto soft;
1903         }
1904     }
1905     if (flags & float_muladd_negate_result) {
1906         return float64_chs(ur.s);
1907     }
1908     return ur.s;
1909
1910  soft:
1911     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1912 }
1913
1914 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1915                                       int flags, float_status *status)
1916 {
1917     FloatParts64 pa, pb, pc, *pr;
1918
1919     bfloat16_unpack_canonical(&pa, a, status);
1920     bfloat16_unpack_canonical(&pb, b, status);
1921     bfloat16_unpack_canonical(&pc, c, status);
1922     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1923
1924     return bfloat16_round_pack_canonical(pr, status);
1925 }
1926
1927 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1928                                       int flags, float_status *status)
1929 {
1930     FloatParts128 pa, pb, pc, *pr;
1931
1932     float128_unpack_canonical(&pa, a, status);
1933     float128_unpack_canonical(&pb, b, status);
1934     float128_unpack_canonical(&pc, c, status);
1935     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1936
1937     return float128_round_pack_canonical(pr, status);
1938 }
1939
1940 /*
1941  * Division
1942  */
1943
1944 float16 float16_div(float16 a, float16 b, float_status *status)
1945 {
1946     FloatParts64 pa, pb, *pr;
1947
1948     float16_unpack_canonical(&pa, a, status);
1949     float16_unpack_canonical(&pb, b, status);
1950     pr = parts_div(&pa, &pb, status);
1951
1952     return float16_round_pack_canonical(pr, status);
1953 }
1954
1955 static float32 QEMU_SOFTFLOAT_ATTR
1956 soft_f32_div(float32 a, float32 b, float_status *status)
1957 {
1958     FloatParts64 pa, pb, *pr;
1959
1960     float32_unpack_canonical(&pa, a, status);
1961     float32_unpack_canonical(&pb, b, status);
1962     pr = parts_div(&pa, &pb, status);
1963
1964     return float32_round_pack_canonical(pr, status);
1965 }
1966
1967 static float64 QEMU_SOFTFLOAT_ATTR
1968 soft_f64_div(float64 a, float64 b, float_status *status)
1969 {
1970     FloatParts64 pa, pb, *pr;
1971
1972     float64_unpack_canonical(&pa, a, status);
1973     float64_unpack_canonical(&pb, b, status);
1974     pr = parts_div(&pa, &pb, status);
1975
1976     return float64_round_pack_canonical(pr, status);
1977 }
1978
1979 static float hard_f32_div(float a, float b)
1980 {
1981     return a / b;
1982 }
1983
1984 static double hard_f64_div(double a, double b)
1985 {
1986     return a / b;
1987 }
1988
1989 static bool f32_div_pre(union_float32 a, union_float32 b)
1990 {
1991     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1992         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1993                fpclassify(b.h) == FP_NORMAL;
1994     }
1995     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1996 }
1997
1998 static bool f64_div_pre(union_float64 a, union_float64 b)
1999 {
2000     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2001         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2002                fpclassify(b.h) == FP_NORMAL;
2003     }
2004     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2005 }
2006
2007 static bool f32_div_post(union_float32 a, union_float32 b)
2008 {
2009     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2010         return fpclassify(a.h) != FP_ZERO;
2011     }
2012     return !float32_is_zero(a.s);
2013 }
2014
2015 static bool f64_div_post(union_float64 a, union_float64 b)
2016 {
2017     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2018         return fpclassify(a.h) != FP_ZERO;
2019     }
2020     return !float64_is_zero(a.s);
2021 }
2022
2023 float32 QEMU_FLATTEN
2024 float32_div(float32 a, float32 b, float_status *s)
2025 {
2026     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2027                         f32_div_pre, f32_div_post);
2028 }
2029
2030 float64 QEMU_FLATTEN
2031 float64_div(float64 a, float64 b, float_status *s)
2032 {
2033     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2034                         f64_div_pre, f64_div_post);
2035 }
2036
2037 bfloat16 QEMU_FLATTEN
2038 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2039 {
2040     FloatParts64 pa, pb, *pr;
2041
2042     bfloat16_unpack_canonical(&pa, a, status);
2043     bfloat16_unpack_canonical(&pb, b, status);
2044     pr = parts_div(&pa, &pb, status);
2045
2046     return bfloat16_round_pack_canonical(pr, status);
2047 }
2048
2049 float128 QEMU_FLATTEN
2050 float128_div(float128 a, float128 b, float_status *status)
2051 {
2052     FloatParts128 pa, pb, *pr;
2053
2054     float128_unpack_canonical(&pa, a, status);
2055     float128_unpack_canonical(&pb, b, status);
2056     pr = parts_div(&pa, &pb, status);
2057
2058     return float128_round_pack_canonical(pr, status);
2059 }
2060
2061 /*
2062  * Float to Float conversions
2063  *
2064  * Returns the result of converting one float format to another. The
2065  * conversion is performed according to the IEC/IEEE Standard for
2066  * Binary Floating-Point Arithmetic.
2067  *
2068  * Usually this only needs to take care of raising invalid exceptions
2069  * and handling the conversion on NaNs.
2070  */
2071
2072 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2073 {
2074     switch (a->cls) {
2075     case float_class_qnan:
2076     case float_class_snan:
2077         /*
2078          * There is no NaN in the destination format.  Raise Invalid
2079          * and return a zero with the sign of the input NaN.
2080          */
2081         float_raise(float_flag_invalid, s);
2082         a->cls = float_class_zero;
2083         break;
2084
2085     case float_class_inf:
2086         /*
2087          * There is no Inf in the destination format.  Raise Invalid
2088          * and return the maximum normal with the correct sign.
2089          */
2090         float_raise(float_flag_invalid, s);
2091         a->cls = float_class_normal;
2092         a->exp = float16_params_ahp.exp_max;
2093         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2094                                   float16_params_ahp.frac_size + 1);
2095         break;
2096
2097     case float_class_normal:
2098     case float_class_zero:
2099         break;
2100
2101     default:
2102         g_assert_not_reached();
2103     }
2104 }
2105
2106 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2107 {
2108     if (is_nan(a->cls)) {
2109         parts_return_nan(a, s);
2110     }
2111 }
2112
2113 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2114 {
2115     if (is_nan(a->cls)) {
2116         parts_return_nan(a, s);
2117     }
2118 }
2119
2120 #define parts_float_to_float(P, S) \
2121     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2122
2123 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2124                                         float_status *s)
2125 {
2126     a->cls = b->cls;
2127     a->sign = b->sign;
2128     a->exp = b->exp;
2129
2130     if (a->cls == float_class_normal) {
2131         frac_truncjam(a, b);
2132     } else if (is_nan(a->cls)) {
2133         /* Discard the low bits of the NaN. */
2134         a->frac = b->frac_hi;
2135         parts_return_nan(a, s);
2136     }
2137 }
2138
2139 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2140                                        float_status *s)
2141 {
2142     a->cls = b->cls;
2143     a->sign = b->sign;
2144     a->exp = b->exp;
2145     frac_widen(a, b);
2146
2147     if (is_nan(a->cls)) {
2148         parts_return_nan(a, s);
2149     }
2150 }
2151
2152 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2153 {
2154     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2155     FloatParts64 p;
2156
2157     float16a_unpack_canonical(&p, a, s, fmt16);
2158     parts_float_to_float(&p, s);
2159     return float32_round_pack_canonical(&p, s);
2160 }
2161
2162 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2163 {
2164     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2165     FloatParts64 p;
2166
2167     float16a_unpack_canonical(&p, a, s, fmt16);
2168     parts_float_to_float(&p, s);
2169     return float64_round_pack_canonical(&p, s);
2170 }
2171
2172 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2173 {
2174     FloatParts64 p;
2175     const FloatFmt *fmt;
2176
2177     float32_unpack_canonical(&p, a, s);
2178     if (ieee) {
2179         parts_float_to_float(&p, s);
2180         fmt = &float16_params;
2181     } else {
2182         parts_float_to_ahp(&p, s);
2183         fmt = &float16_params_ahp;
2184     }
2185     return float16a_round_pack_canonical(&p, s, fmt);
2186 }
2187
2188 static float64 QEMU_SOFTFLOAT_ATTR
2189 soft_float32_to_float64(float32 a, float_status *s)
2190 {
2191     FloatParts64 p;
2192
2193     float32_unpack_canonical(&p, a, s);
2194     parts_float_to_float(&p, s);
2195     return float64_round_pack_canonical(&p, s);
2196 }
2197
2198 float64 float32_to_float64(float32 a, float_status *s)
2199 {
2200     if (likely(float32_is_normal(a))) {
2201         /* Widening conversion can never produce inexact results.  */
2202         union_float32 uf;
2203         union_float64 ud;
2204         uf.s = a;
2205         ud.h = uf.h;
2206         return ud.s;
2207     } else if (float32_is_zero(a)) {
2208         return float64_set_sign(float64_zero, float32_is_neg(a));
2209     } else {
2210         return soft_float32_to_float64(a, s);
2211     }
2212 }
2213
2214 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2215 {
2216     FloatParts64 p;
2217     const FloatFmt *fmt;
2218
2219     float64_unpack_canonical(&p, a, s);
2220     if (ieee) {
2221         parts_float_to_float(&p, s);
2222         fmt = &float16_params;
2223     } else {
2224         parts_float_to_ahp(&p, s);
2225         fmt = &float16_params_ahp;
2226     }
2227     return float16a_round_pack_canonical(&p, s, fmt);
2228 }
2229
2230 float32 float64_to_float32(float64 a, float_status *s)
2231 {
2232     FloatParts64 p;
2233
2234     float64_unpack_canonical(&p, a, s);
2235     parts_float_to_float(&p, s);
2236     return float32_round_pack_canonical(&p, s);
2237 }
2238
2239 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2240 {
2241     FloatParts64 p;
2242
2243     bfloat16_unpack_canonical(&p, a, s);
2244     parts_float_to_float(&p, s);
2245     return float32_round_pack_canonical(&p, s);
2246 }
2247
2248 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2249 {
2250     FloatParts64 p;
2251
2252     bfloat16_unpack_canonical(&p, a, s);
2253     parts_float_to_float(&p, s);
2254     return float64_round_pack_canonical(&p, s);
2255 }
2256
2257 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2258 {
2259     FloatParts64 p;
2260
2261     float32_unpack_canonical(&p, a, s);
2262     parts_float_to_float(&p, s);
2263     return bfloat16_round_pack_canonical(&p, s);
2264 }
2265
2266 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2267 {
2268     FloatParts64 p;
2269
2270     float64_unpack_canonical(&p, a, s);
2271     parts_float_to_float(&p, s);
2272     return bfloat16_round_pack_canonical(&p, s);
2273 }
2274
2275 float32 float128_to_float32(float128 a, float_status *s)
2276 {
2277     FloatParts64 p64;
2278     FloatParts128 p128;
2279
2280     float128_unpack_canonical(&p128, a, s);
2281     parts_float_to_float_narrow(&p64, &p128, s);
2282     return float32_round_pack_canonical(&p64, s);
2283 }
2284
2285 float64 float128_to_float64(float128 a, float_status *s)
2286 {
2287     FloatParts64 p64;
2288     FloatParts128 p128;
2289
2290     float128_unpack_canonical(&p128, a, s);
2291     parts_float_to_float_narrow(&p64, &p128, s);
2292     return float64_round_pack_canonical(&p64, s);
2293 }
2294
2295 float128 float32_to_float128(float32 a, float_status *s)
2296 {
2297     FloatParts64 p64;
2298     FloatParts128 p128;
2299
2300     float32_unpack_canonical(&p64, a, s);
2301     parts_float_to_float_widen(&p128, &p64, s);
2302     return float128_round_pack_canonical(&p128, s);
2303 }
2304
2305 float128 float64_to_float128(float64 a, float_status *s)
2306 {
2307     FloatParts64 p64;
2308     FloatParts128 p128;
2309
2310     float64_unpack_canonical(&p64, a, s);
2311     parts_float_to_float_widen(&p128, &p64, s);
2312     return float128_round_pack_canonical(&p128, s);
2313 }
2314
2315 /*
2316  * Round to integral value
2317  */
2318
2319 float16 float16_round_to_int(float16 a, float_status *s)
2320 {
2321     FloatParts64 p;
2322
2323     float16_unpack_canonical(&p, a, s);
2324     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2325     return float16_round_pack_canonical(&p, s);
2326 }
2327
2328 float32 float32_round_to_int(float32 a, float_status *s)
2329 {
2330     FloatParts64 p;
2331
2332     float32_unpack_canonical(&p, a, s);
2333     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2334     return float32_round_pack_canonical(&p, s);
2335 }
2336
2337 float64 float64_round_to_int(float64 a, float_status *s)
2338 {
2339     FloatParts64 p;
2340
2341     float64_unpack_canonical(&p, a, s);
2342     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2343     return float64_round_pack_canonical(&p, s);
2344 }
2345
2346 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2347 {
2348     FloatParts64 p;
2349
2350     bfloat16_unpack_canonical(&p, a, s);
2351     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2352     return bfloat16_round_pack_canonical(&p, s);
2353 }
2354
2355 float128 float128_round_to_int(float128 a, float_status *s)
2356 {
2357     FloatParts128 p;
2358
2359     float128_unpack_canonical(&p, a, s);
2360     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2361     return float128_round_pack_canonical(&p, s);
2362 }
2363
2364 /*
2365  * Floating-point to signed integer conversions
2366  */
2367
2368 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2369                               float_status *s)
2370 {
2371     FloatParts64 p;
2372
2373     float16_unpack_canonical(&p, a, s);
2374     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2375 }
2376
2377 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2378                                 float_status *s)
2379 {
2380     FloatParts64 p;
2381
2382     float16_unpack_canonical(&p, a, s);
2383     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2384 }
2385
2386 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2387                                 float_status *s)
2388 {
2389     FloatParts64 p;
2390
2391     float16_unpack_canonical(&p, a, s);
2392     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2393 }
2394
2395 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2396                                 float_status *s)
2397 {
2398     FloatParts64 p;
2399
2400     float16_unpack_canonical(&p, a, s);
2401     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2402 }
2403
2404 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2405                                 float_status *s)
2406 {
2407     FloatParts64 p;
2408
2409     float32_unpack_canonical(&p, a, s);
2410     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2411 }
2412
2413 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2414                                 float_status *s)
2415 {
2416     FloatParts64 p;
2417
2418     float32_unpack_canonical(&p, a, s);
2419     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2420 }
2421
2422 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2423                                 float_status *s)
2424 {
2425     FloatParts64 p;
2426
2427     float32_unpack_canonical(&p, a, s);
2428     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2429 }
2430
2431 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2432                                 float_status *s)
2433 {
2434     FloatParts64 p;
2435
2436     float64_unpack_canonical(&p, a, s);
2437     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2438 }
2439
2440 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2441                                 float_status *s)
2442 {
2443     FloatParts64 p;
2444
2445     float64_unpack_canonical(&p, a, s);
2446     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2447 }
2448
2449 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2450                                 float_status *s)
2451 {
2452     FloatParts64 p;
2453
2454     float64_unpack_canonical(&p, a, s);
2455     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2456 }
2457
2458 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2459                                  float_status *s)
2460 {
2461     FloatParts64 p;
2462
2463     bfloat16_unpack_canonical(&p, a, s);
2464     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2465 }
2466
2467 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2468                                  float_status *s)
2469 {
2470     FloatParts64 p;
2471
2472     bfloat16_unpack_canonical(&p, a, s);
2473     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2474 }
2475
2476 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2477                                  float_status *s)
2478 {
2479     FloatParts64 p;
2480
2481     bfloat16_unpack_canonical(&p, a, s);
2482     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2483 }
2484
2485 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2486                                         int scale, float_status *s)
2487 {
2488     FloatParts128 p;
2489
2490     float128_unpack_canonical(&p, a, s);
2491     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2492 }
2493
2494 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2495                                         int scale, float_status *s)
2496 {
2497     FloatParts128 p;
2498
2499     float128_unpack_canonical(&p, a, s);
2500     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2501 }
2502
2503 int8_t float16_to_int8(float16 a, float_status *s)
2504 {
2505     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2506 }
2507
2508 int16_t float16_to_int16(float16 a, float_status *s)
2509 {
2510     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2511 }
2512
2513 int32_t float16_to_int32(float16 a, float_status *s)
2514 {
2515     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2516 }
2517
2518 int64_t float16_to_int64(float16 a, float_status *s)
2519 {
2520     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2521 }
2522
2523 int16_t float32_to_int16(float32 a, float_status *s)
2524 {
2525     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2526 }
2527
2528 int32_t float32_to_int32(float32 a, float_status *s)
2529 {
2530     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2531 }
2532
2533 int64_t float32_to_int64(float32 a, float_status *s)
2534 {
2535     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2536 }
2537
2538 int16_t float64_to_int16(float64 a, float_status *s)
2539 {
2540     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2541 }
2542
2543 int32_t float64_to_int32(float64 a, float_status *s)
2544 {
2545     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2546 }
2547
2548 int64_t float64_to_int64(float64 a, float_status *s)
2549 {
2550     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2551 }
2552
2553 int32_t float128_to_int32(float128 a, float_status *s)
2554 {
2555     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2556 }
2557
2558 int64_t float128_to_int64(float128 a, float_status *s)
2559 {
2560     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2561 }
2562
2563 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2564 {
2565     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2566 }
2567
2568 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2569 {
2570     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2571 }
2572
2573 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2574 {
2575     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2576 }
2577
2578 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2579 {
2580     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2581 }
2582
2583 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2584 {
2585     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2586 }
2587
2588 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2589 {
2590     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2591 }
2592
2593 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2594 {
2595     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2596 }
2597
2598 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2599 {
2600     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2601 }
2602
2603 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2604 {
2605     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2606 }
2607
2608 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2609 {
2610     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2611 }
2612
2613 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2614 {
2615     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2616 }
2617
2618 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2619 {
2620     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2621 }
2622
2623 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2624 {
2625     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2626 }
2627
2628 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2629 {
2630     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2631 }
2632
2633 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2634 {
2635     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2636 }
2637
2638 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2639 {
2640     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2641 }
2642
2643 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2644 {
2645     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2646 }
2647
2648 /*
2649  *  Returns the result of converting the floating-point value `a' to
2650  *  the unsigned integer format. The conversion is performed according
2651  *  to the IEC/IEEE Standard for Binary Floating-Point
2652  *  Arithmetic---which means in particular that the conversion is
2653  *  rounded according to the current rounding mode. If `a' is a NaN,
2654  *  the largest unsigned integer is returned. Otherwise, if the
2655  *  conversion overflows, the largest unsigned integer is returned. If
2656  *  the 'a' is negative, the result is rounded and zero is returned;
2657  *  values that do not round to zero will raise the inexact exception
2658  *  flag.
2659  */
2660
2661 static uint64_t round_to_uint_and_pack(FloatParts64 p, FloatRoundMode rmode,
2662                                        int scale, uint64_t max,
2663                                        float_status *s)
2664 {
2665     int flags = 0;
2666     uint64_t r;
2667
2668     switch (p.cls) {
2669     case float_class_snan:
2670     case float_class_qnan:
2671         flags = float_flag_invalid;
2672         r = max;
2673         break;
2674
2675     case float_class_inf:
2676         flags = float_flag_invalid;
2677         r = p.sign ? 0 : max;
2678         break;
2679
2680     case float_class_zero:
2681         return 0;
2682
2683     case float_class_normal:
2684         /* TODO: 62 = N - 2, frac_size for rounding */
2685         if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
2686             flags = float_flag_inexact;
2687             if (p.cls == float_class_zero) {
2688                 r = 0;
2689                 break;
2690             }
2691         }
2692
2693         if (p.sign) {
2694             flags = float_flag_invalid;
2695             r = 0;
2696         } else if (p.exp > DECOMPOSED_BINARY_POINT) {
2697             flags = float_flag_invalid;
2698             r = max;
2699         } else {
2700             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2701             if (r > max) {
2702                 flags = float_flag_invalid;
2703                 r = max;
2704             }
2705         }
2706         break;
2707
2708     default:
2709         g_assert_not_reached();
2710     }
2711
2712     float_raise(flags, s);
2713     return r;
2714 }
2715
2716 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2717                                 float_status *s)
2718 {
2719     FloatParts64 p;
2720
2721     float16_unpack_canonical(&p, a, s);
2722     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2723 }
2724
2725 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2726                                   float_status *s)
2727 {
2728     FloatParts64 p;
2729
2730     float16_unpack_canonical(&p, a, s);
2731     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2732 }
2733
2734 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2735                                   float_status *s)
2736 {
2737     FloatParts64 p;
2738
2739     float16_unpack_canonical(&p, a, s);
2740     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2741 }
2742
2743 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2744                                   float_status *s)
2745 {
2746     FloatParts64 p;
2747
2748     float16_unpack_canonical(&p, a, s);
2749     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2750 }
2751
2752 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2753                                   float_status *s)
2754 {
2755     FloatParts64 p;
2756
2757     float32_unpack_canonical(&p, a, s);
2758     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2759 }
2760
2761 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2762                                   float_status *s)
2763 {
2764     FloatParts64 p;
2765
2766     float32_unpack_canonical(&p, a, s);
2767     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2768 }
2769
2770 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2771                                   float_status *s)
2772 {
2773     FloatParts64 p;
2774
2775     float32_unpack_canonical(&p, a, s);
2776     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2777 }
2778
2779 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2780                                   float_status *s)
2781 {
2782     FloatParts64 p;
2783
2784     float64_unpack_canonical(&p, a, s);
2785     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2786 }
2787
2788 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2789                                   float_status *s)
2790 {
2791     FloatParts64 p;
2792
2793     float64_unpack_canonical(&p, a, s);
2794     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2795 }
2796
2797 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2798                                   float_status *s)
2799 {
2800     FloatParts64 p;
2801
2802     float64_unpack_canonical(&p, a, s);
2803     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2804 }
2805
2806 uint8_t float16_to_uint8(float16 a, float_status *s)
2807 {
2808     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2809 }
2810
2811 uint16_t float16_to_uint16(float16 a, float_status *s)
2812 {
2813     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2814 }
2815
2816 uint32_t float16_to_uint32(float16 a, float_status *s)
2817 {
2818     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2819 }
2820
2821 uint64_t float16_to_uint64(float16 a, float_status *s)
2822 {
2823     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2824 }
2825
2826 uint16_t float32_to_uint16(float32 a, float_status *s)
2827 {
2828     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2829 }
2830
2831 uint32_t float32_to_uint32(float32 a, float_status *s)
2832 {
2833     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2834 }
2835
2836 uint64_t float32_to_uint64(float32 a, float_status *s)
2837 {
2838     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2839 }
2840
2841 uint16_t float64_to_uint16(float64 a, float_status *s)
2842 {
2843     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2844 }
2845
2846 uint32_t float64_to_uint32(float64 a, float_status *s)
2847 {
2848     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2849 }
2850
2851 uint64_t float64_to_uint64(float64 a, float_status *s)
2852 {
2853     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2854 }
2855
2856 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2857 {
2858     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2859 }
2860
2861 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2862 {
2863     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2864 }
2865
2866 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2867 {
2868     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2869 }
2870
2871 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2872 {
2873     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2874 }
2875
2876 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2877 {
2878     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2879 }
2880
2881 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2882 {
2883     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2884 }
2885
2886 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2887 {
2888     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2889 }
2890
2891 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2892 {
2893     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2894 }
2895
2896 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2897 {
2898     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2899 }
2900
2901 /*
2902  *  Returns the result of converting the bfloat16 value `a' to
2903  *  the unsigned integer format.
2904  */
2905
2906 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2907                                    int scale, float_status *s)
2908 {
2909     FloatParts64 p;
2910
2911     bfloat16_unpack_canonical(&p, a, s);
2912     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2913 }
2914
2915 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2916                                    int scale, float_status *s)
2917 {
2918     FloatParts64 p;
2919
2920     bfloat16_unpack_canonical(&p, a, s);
2921     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2922 }
2923
2924 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2925                                    int scale, float_status *s)
2926 {
2927     FloatParts64 p;
2928
2929     bfloat16_unpack_canonical(&p, a, s);
2930     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2931 }
2932
2933 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2934 {
2935     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2936 }
2937
2938 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2939 {
2940     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2941 }
2942
2943 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2944 {
2945     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2946 }
2947
2948 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2949 {
2950     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2951 }
2952
2953 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2954 {
2955     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2956 }
2957
2958 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2959 {
2960     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2961 }
2962
2963 /*
2964  * Integer to float conversions
2965  *
2966  * Returns the result of converting the two's complement integer `a'
2967  * to the floating-point format. The conversion is performed according
2968  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2969  */
2970
2971 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2972 {
2973     FloatParts64 r = { .sign = false };
2974
2975     if (a == 0) {
2976         r.cls = float_class_zero;
2977     } else {
2978         uint64_t f = a;
2979         int shift;
2980
2981         r.cls = float_class_normal;
2982         if (a < 0) {
2983             f = -f;
2984             r.sign = true;
2985         }
2986         shift = clz64(f);
2987         scale = MIN(MAX(scale, -0x10000), 0x10000);
2988
2989         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2990         r.frac = f << shift;
2991     }
2992
2993     return r;
2994 }
2995
2996 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2997 {
2998     FloatParts64 pa = int_to_float(a, scale, status);
2999     return float16_round_pack_canonical(&pa, status);
3000 }
3001
3002 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3003 {
3004     return int64_to_float16_scalbn(a, scale, status);
3005 }
3006
3007 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3008 {
3009     return int64_to_float16_scalbn(a, scale, status);
3010 }
3011
3012 float16 int64_to_float16(int64_t a, float_status *status)
3013 {
3014     return int64_to_float16_scalbn(a, 0, status);
3015 }
3016
3017 float16 int32_to_float16(int32_t a, float_status *status)
3018 {
3019     return int64_to_float16_scalbn(a, 0, status);
3020 }
3021
3022 float16 int16_to_float16(int16_t a, float_status *status)
3023 {
3024     return int64_to_float16_scalbn(a, 0, status);
3025 }
3026
3027 float16 int8_to_float16(int8_t a, float_status *status)
3028 {
3029     return int64_to_float16_scalbn(a, 0, status);
3030 }
3031
3032 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3033 {
3034     FloatParts64 pa = int_to_float(a, scale, status);
3035     return float32_round_pack_canonical(&pa, status);
3036 }
3037
3038 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3039 {
3040     return int64_to_float32_scalbn(a, scale, status);
3041 }
3042
3043 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3044 {
3045     return int64_to_float32_scalbn(a, scale, status);
3046 }
3047
3048 float32 int64_to_float32(int64_t a, float_status *status)
3049 {
3050     return int64_to_float32_scalbn(a, 0, status);
3051 }
3052
3053 float32 int32_to_float32(int32_t a, float_status *status)
3054 {
3055     return int64_to_float32_scalbn(a, 0, status);
3056 }
3057
3058 float32 int16_to_float32(int16_t a, float_status *status)
3059 {
3060     return int64_to_float32_scalbn(a, 0, status);
3061 }
3062
3063 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3064 {
3065     FloatParts64 pa = int_to_float(a, scale, status);
3066     return float64_round_pack_canonical(&pa, status);
3067 }
3068
3069 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3070 {
3071     return int64_to_float64_scalbn(a, scale, status);
3072 }
3073
3074 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3075 {
3076     return int64_to_float64_scalbn(a, scale, status);
3077 }
3078
3079 float64 int64_to_float64(int64_t a, float_status *status)
3080 {
3081     return int64_to_float64_scalbn(a, 0, status);
3082 }
3083
3084 float64 int32_to_float64(int32_t a, float_status *status)
3085 {
3086     return int64_to_float64_scalbn(a, 0, status);
3087 }
3088
3089 float64 int16_to_float64(int16_t a, float_status *status)
3090 {
3091     return int64_to_float64_scalbn(a, 0, status);
3092 }
3093
3094 /*
3095  * Returns the result of converting the two's complement integer `a'
3096  * to the bfloat16 format.
3097  */
3098
3099 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3100 {
3101     FloatParts64 pa = int_to_float(a, scale, status);
3102     return bfloat16_round_pack_canonical(&pa, status);
3103 }
3104
3105 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3106 {
3107     return int64_to_bfloat16_scalbn(a, scale, status);
3108 }
3109
3110 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3111 {
3112     return int64_to_bfloat16_scalbn(a, scale, status);
3113 }
3114
3115 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3116 {
3117     return int64_to_bfloat16_scalbn(a, 0, status);
3118 }
3119
3120 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3121 {
3122     return int64_to_bfloat16_scalbn(a, 0, status);
3123 }
3124
3125 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3126 {
3127     return int64_to_bfloat16_scalbn(a, 0, status);
3128 }
3129
3130 /*
3131  * Unsigned Integer to float conversions
3132  *
3133  * Returns the result of converting the unsigned integer `a' to the
3134  * floating-point format. The conversion is performed according to the
3135  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3136  */
3137
3138 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3139 {
3140     FloatParts64 r = { .sign = false };
3141     int shift;
3142
3143     if (a == 0) {
3144         r.cls = float_class_zero;
3145     } else {
3146         scale = MIN(MAX(scale, -0x10000), 0x10000);
3147         shift = clz64(a);
3148         r.cls = float_class_normal;
3149         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3150         r.frac = a << shift;
3151     }
3152
3153     return r;
3154 }
3155
3156 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3157 {
3158     FloatParts64 pa = uint_to_float(a, scale, status);
3159     return float16_round_pack_canonical(&pa, status);
3160 }
3161
3162 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3163 {
3164     return uint64_to_float16_scalbn(a, scale, status);
3165 }
3166
3167 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3168 {
3169     return uint64_to_float16_scalbn(a, scale, status);
3170 }
3171
3172 float16 uint64_to_float16(uint64_t a, float_status *status)
3173 {
3174     return uint64_to_float16_scalbn(a, 0, status);
3175 }
3176
3177 float16 uint32_to_float16(uint32_t a, float_status *status)
3178 {
3179     return uint64_to_float16_scalbn(a, 0, status);
3180 }
3181
3182 float16 uint16_to_float16(uint16_t a, float_status *status)
3183 {
3184     return uint64_to_float16_scalbn(a, 0, status);
3185 }
3186
3187 float16 uint8_to_float16(uint8_t a, float_status *status)
3188 {
3189     return uint64_to_float16_scalbn(a, 0, status);
3190 }
3191
3192 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3193 {
3194     FloatParts64 pa = uint_to_float(a, scale, status);
3195     return float32_round_pack_canonical(&pa, status);
3196 }
3197
3198 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3199 {
3200     return uint64_to_float32_scalbn(a, scale, status);
3201 }
3202
3203 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3204 {
3205     return uint64_to_float32_scalbn(a, scale, status);
3206 }
3207
3208 float32 uint64_to_float32(uint64_t a, float_status *status)
3209 {
3210     return uint64_to_float32_scalbn(a, 0, status);
3211 }
3212
3213 float32 uint32_to_float32(uint32_t a, float_status *status)
3214 {
3215     return uint64_to_float32_scalbn(a, 0, status);
3216 }
3217
3218 float32 uint16_to_float32(uint16_t a, float_status *status)
3219 {
3220     return uint64_to_float32_scalbn(a, 0, status);
3221 }
3222
3223 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3224 {
3225     FloatParts64 pa = uint_to_float(a, scale, status);
3226     return float64_round_pack_canonical(&pa, status);
3227 }
3228
3229 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3230 {
3231     return uint64_to_float64_scalbn(a, scale, status);
3232 }
3233
3234 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3235 {
3236     return uint64_to_float64_scalbn(a, scale, status);
3237 }
3238
3239 float64 uint64_to_float64(uint64_t a, float_status *status)
3240 {
3241     return uint64_to_float64_scalbn(a, 0, status);
3242 }
3243
3244 float64 uint32_to_float64(uint32_t a, float_status *status)
3245 {
3246     return uint64_to_float64_scalbn(a, 0, status);
3247 }
3248
3249 float64 uint16_to_float64(uint16_t a, float_status *status)
3250 {
3251     return uint64_to_float64_scalbn(a, 0, status);
3252 }
3253
3254 /*
3255  * Returns the result of converting the unsigned integer `a' to the
3256  * bfloat16 format.
3257  */
3258
3259 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3260 {
3261     FloatParts64 pa = uint_to_float(a, scale, status);
3262     return bfloat16_round_pack_canonical(&pa, status);
3263 }
3264
3265 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3266 {
3267     return uint64_to_bfloat16_scalbn(a, scale, status);
3268 }
3269
3270 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3271 {
3272     return uint64_to_bfloat16_scalbn(a, scale, status);
3273 }
3274
3275 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3276 {
3277     return uint64_to_bfloat16_scalbn(a, 0, status);
3278 }
3279
3280 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3281 {
3282     return uint64_to_bfloat16_scalbn(a, 0, status);
3283 }
3284
3285 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3286 {
3287     return uint64_to_bfloat16_scalbn(a, 0, status);
3288 }
3289
3290 /* Float Min/Max */
3291 /* min() and max() functions. These can't be implemented as
3292  * 'compare and pick one input' because that would mishandle
3293  * NaNs and +0 vs -0.
3294  *
3295  * minnum() and maxnum() functions. These are similar to the min()
3296  * and max() functions but if one of the arguments is a QNaN and
3297  * the other is numerical then the numerical argument is returned.
3298  * SNaNs will get quietened before being returned.
3299  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3300  * and maxNum() operations. min() and max() are the typical min/max
3301  * semantics provided by many CPUs which predate that specification.
3302  *
3303  * minnummag() and maxnummag() functions correspond to minNumMag()
3304  * and minNumMag() from the IEEE-754 2008.
3305  */
3306 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3307                                 bool ieee, bool ismag, float_status *s)
3308 {
3309     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3310         if (ieee) {
3311             /* Takes two floating-point values `a' and `b', one of
3312              * which is a NaN, and returns the appropriate NaN
3313              * result. If either `a' or `b' is a signaling NaN,
3314              * the invalid exception is raised.
3315              */
3316             if (is_snan(a.cls) || is_snan(b.cls)) {
3317                 return *parts_pick_nan(&a, &b, s);
3318             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3319                 return b;
3320             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3321                 return a;
3322             }
3323         }
3324         return *parts_pick_nan(&a, &b, s);
3325     } else {
3326         int a_exp, b_exp;
3327
3328         switch (a.cls) {
3329         case float_class_normal:
3330             a_exp = a.exp;
3331             break;
3332         case float_class_inf:
3333             a_exp = INT_MAX;
3334             break;
3335         case float_class_zero:
3336             a_exp = INT_MIN;
3337             break;
3338         default:
3339             g_assert_not_reached();
3340             break;
3341         }
3342         switch (b.cls) {
3343         case float_class_normal:
3344             b_exp = b.exp;
3345             break;
3346         case float_class_inf:
3347             b_exp = INT_MAX;
3348             break;
3349         case float_class_zero:
3350             b_exp = INT_MIN;
3351             break;
3352         default:
3353             g_assert_not_reached();
3354             break;
3355         }
3356
3357         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3358             bool a_less = a_exp < b_exp;
3359             if (a_exp == b_exp) {
3360                 a_less = a.frac < b.frac;
3361             }
3362             return a_less ^ ismin ? b : a;
3363         }
3364
3365         if (a.sign == b.sign) {
3366             bool a_less = a_exp < b_exp;
3367             if (a_exp == b_exp) {
3368                 a_less = a.frac < b.frac;
3369             }
3370             return a.sign ^ a_less ^ ismin ? b : a;
3371         } else {
3372             return a.sign ^ ismin ? b : a;
3373         }
3374     }
3375 }
3376
3377 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3378 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3379                                      float_status *s)                   \
3380 {                                                                       \
3381     FloatParts64 pa, pb, pr;                                            \
3382     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3383     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3384     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3385     return float ## sz ## _round_pack_canonical(&pr, s);                \
3386 }
3387
3388 MINMAX(16, min, true, false, false)
3389 MINMAX(16, minnum, true, true, false)
3390 MINMAX(16, minnummag, true, true, true)
3391 MINMAX(16, max, false, false, false)
3392 MINMAX(16, maxnum, false, true, false)
3393 MINMAX(16, maxnummag, false, true, true)
3394
3395 MINMAX(32, min, true, false, false)
3396 MINMAX(32, minnum, true, true, false)
3397 MINMAX(32, minnummag, true, true, true)
3398 MINMAX(32, max, false, false, false)
3399 MINMAX(32, maxnum, false, true, false)
3400 MINMAX(32, maxnummag, false, true, true)
3401
3402 MINMAX(64, min, true, false, false)
3403 MINMAX(64, minnum, true, true, false)
3404 MINMAX(64, minnummag, true, true, true)
3405 MINMAX(64, max, false, false, false)
3406 MINMAX(64, maxnum, false, true, false)
3407 MINMAX(64, maxnummag, false, true, true)
3408
3409 #undef MINMAX
3410
3411 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3412 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3413 {                                                                       \
3414     FloatParts64 pa, pb, pr;                                            \
3415     bfloat16_unpack_canonical(&pa, a, s);                               \
3416     bfloat16_unpack_canonical(&pb, b, s);                               \
3417     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3418     return bfloat16_round_pack_canonical(&pr, s);                       \
3419 }
3420
3421 BF16_MINMAX(min, true, false, false)
3422 BF16_MINMAX(minnum, true, true, false)
3423 BF16_MINMAX(minnummag, true, true, true)
3424 BF16_MINMAX(max, false, false, false)
3425 BF16_MINMAX(maxnum, false, true, false)
3426 BF16_MINMAX(maxnummag, false, true, true)
3427
3428 #undef BF16_MINMAX
3429
3430 /* Floating point compare */
3431 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3432                                     float_status *s)
3433 {
3434     if (is_nan(a.cls) || is_nan(b.cls)) {
3435         if (!is_quiet ||
3436             a.cls == float_class_snan ||
3437             b.cls == float_class_snan) {
3438             float_raise(float_flag_invalid, s);
3439         }
3440         return float_relation_unordered;
3441     }
3442
3443     if (a.cls == float_class_zero) {
3444         if (b.cls == float_class_zero) {
3445             return float_relation_equal;
3446         }
3447         return b.sign ? float_relation_greater : float_relation_less;
3448     } else if (b.cls == float_class_zero) {
3449         return a.sign ? float_relation_less : float_relation_greater;
3450     }
3451
3452     /* The only really important thing about infinity is its sign. If
3453      * both are infinities the sign marks the smallest of the two.
3454      */
3455     if (a.cls == float_class_inf) {
3456         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3457             return float_relation_equal;
3458         }
3459         return a.sign ? float_relation_less : float_relation_greater;
3460     } else if (b.cls == float_class_inf) {
3461         return b.sign ? float_relation_greater : float_relation_less;
3462     }
3463
3464     if (a.sign != b.sign) {
3465         return a.sign ? float_relation_less : float_relation_greater;
3466     }
3467
3468     if (a.exp == b.exp) {
3469         if (a.frac == b.frac) {
3470             return float_relation_equal;
3471         }
3472         if (a.sign) {
3473             return a.frac > b.frac ?
3474                 float_relation_less : float_relation_greater;
3475         } else {
3476             return a.frac > b.frac ?
3477                 float_relation_greater : float_relation_less;
3478         }
3479     } else {
3480         if (a.sign) {
3481             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3482         } else {
3483             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3484         }
3485     }
3486 }
3487
3488 #define COMPARE(name, attr, sz)                                         \
3489 static int attr                                                         \
3490 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3491 {                                                                       \
3492     FloatParts64 pa, pb;                                                \
3493     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3494     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3495     return compare_floats(pa, pb, is_quiet, s);                         \
3496 }
3497
3498 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3499 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3500 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3501
3502 #undef COMPARE
3503
3504 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3505 {
3506     return soft_f16_compare(a, b, false, s);
3507 }
3508
3509 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3510 {
3511     return soft_f16_compare(a, b, true, s);
3512 }
3513
3514 static FloatRelation QEMU_FLATTEN
3515 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3516 {
3517     union_float32 ua, ub;
3518
3519     ua.s = xa;
3520     ub.s = xb;
3521
3522     if (QEMU_NO_HARDFLOAT) {
3523         goto soft;
3524     }
3525
3526     float32_input_flush2(&ua.s, &ub.s, s);
3527     if (isgreaterequal(ua.h, ub.h)) {
3528         if (isgreater(ua.h, ub.h)) {
3529             return float_relation_greater;
3530         }
3531         return float_relation_equal;
3532     }
3533     if (likely(isless(ua.h, ub.h))) {
3534         return float_relation_less;
3535     }
3536     /* The only condition remaining is unordered.
3537      * Fall through to set flags.
3538      */
3539  soft:
3540     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3541 }
3542
3543 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3544 {
3545     return f32_compare(a, b, false, s);
3546 }
3547
3548 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3549 {
3550     return f32_compare(a, b, true, s);
3551 }
3552
3553 static FloatRelation QEMU_FLATTEN
3554 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3555 {
3556     union_float64 ua, ub;
3557
3558     ua.s = xa;
3559     ub.s = xb;
3560
3561     if (QEMU_NO_HARDFLOAT) {
3562         goto soft;
3563     }
3564
3565     float64_input_flush2(&ua.s, &ub.s, s);
3566     if (isgreaterequal(ua.h, ub.h)) {
3567         if (isgreater(ua.h, ub.h)) {
3568             return float_relation_greater;
3569         }
3570         return float_relation_equal;
3571     }
3572     if (likely(isless(ua.h, ub.h))) {
3573         return float_relation_less;
3574     }
3575     /* The only condition remaining is unordered.
3576      * Fall through to set flags.
3577      */
3578  soft:
3579     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3580 }
3581
3582 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3583 {
3584     return f64_compare(a, b, false, s);
3585 }
3586
3587 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3588 {
3589     return f64_compare(a, b, true, s);
3590 }
3591
3592 static FloatRelation QEMU_FLATTEN
3593 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3594 {
3595     FloatParts64 pa, pb;
3596
3597     bfloat16_unpack_canonical(&pa, a, s);
3598     bfloat16_unpack_canonical(&pb, b, s);
3599     return compare_floats(pa, pb, is_quiet, s);
3600 }
3601
3602 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3603 {
3604     return soft_bf16_compare(a, b, false, s);
3605 }
3606
3607 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3608 {
3609     return soft_bf16_compare(a, b, true, s);
3610 }
3611
3612 /* Multiply A by 2 raised to the power N.  */
3613 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3614 {
3615     if (unlikely(is_nan(a.cls))) {
3616         parts_return_nan(&a, s);
3617     }
3618     if (a.cls == float_class_normal) {
3619         /* The largest float type (even though not supported by FloatParts64)
3620          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3621          * still allows rounding to infinity, without allowing overflow
3622          * within the int32_t that backs FloatParts64.exp.
3623          */
3624         n = MIN(MAX(n, -0x10000), 0x10000);
3625         a.exp += n;
3626     }
3627     return a;
3628 }
3629
3630 float16 float16_scalbn(float16 a, int n, float_status *status)
3631 {
3632     FloatParts64 pa, pr;
3633
3634     float16_unpack_canonical(&pa, a, status);
3635     pr = scalbn_decomposed(pa, n, status);
3636     return float16_round_pack_canonical(&pr, status);
3637 }
3638
3639 float32 float32_scalbn(float32 a, int n, float_status *status)
3640 {
3641     FloatParts64 pa, pr;
3642
3643     float32_unpack_canonical(&pa, a, status);
3644     pr = scalbn_decomposed(pa, n, status);
3645     return float32_round_pack_canonical(&pr, status);
3646 }
3647
3648 float64 float64_scalbn(float64 a, int n, float_status *status)
3649 {
3650     FloatParts64 pa, pr;
3651
3652     float64_unpack_canonical(&pa, a, status);
3653     pr = scalbn_decomposed(pa, n, status);
3654     return float64_round_pack_canonical(&pr, status);
3655 }
3656
3657 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3658 {
3659     FloatParts64 pa, pr;
3660
3661     bfloat16_unpack_canonical(&pa, a, status);
3662     pr = scalbn_decomposed(pa, n, status);
3663     return bfloat16_round_pack_canonical(&pr, status);
3664 }
3665
3666 /*
3667  * Square Root
3668  *
3669  * The old softfloat code did an approximation step before zeroing in
3670  * on the final result. However for simpleness we just compute the
3671  * square root by iterating down from the implicit bit to enough extra
3672  * bits to ensure we get a correctly rounded result.
3673  *
3674  * This does mean however the calculation is slower than before,
3675  * especially for 64 bit floats.
3676  */
3677
3678 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3679 {
3680     uint64_t a_frac, r_frac, s_frac;
3681     int bit, last_bit;
3682
3683     if (is_nan(a.cls)) {
3684         parts_return_nan(&a, s);
3685         return a;
3686     }
3687     if (a.cls == float_class_zero) {
3688         return a;  /* sqrt(+-0) = +-0 */
3689     }
3690     if (a.sign) {
3691         float_raise(float_flag_invalid, s);
3692         parts_default_nan(&a, s);
3693         return a;
3694     }
3695     if (a.cls == float_class_inf) {
3696         return a;  /* sqrt(+inf) = +inf */
3697     }
3698
3699     assert(a.cls == float_class_normal);
3700
3701     /* We need two overflow bits at the top. Adding room for that is a
3702      * right shift. If the exponent is odd, we can discard the low bit
3703      * by multiplying the fraction by 2; that's a left shift. Combine
3704      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3705      */
3706     a_frac = a.frac >> (2 - (a.exp & 1));
3707     a.exp >>= 1;
3708
3709     /* Bit-by-bit computation of sqrt.  */
3710     r_frac = 0;
3711     s_frac = 0;
3712
3713     /* Iterate from implicit bit down to the 3 extra bits to compute a
3714      * properly rounded result. Remember we've inserted two more bits
3715      * at the top, so these positions are two less.
3716      */
3717     bit = DECOMPOSED_BINARY_POINT - 2;
3718     last_bit = MAX(p->frac_shift - 4, 0);
3719     do {
3720         uint64_t q = 1ULL << bit;
3721         uint64_t t_frac = s_frac + q;
3722         if (t_frac <= a_frac) {
3723             s_frac = t_frac + q;
3724             a_frac -= t_frac;
3725             r_frac += q;
3726         }
3727         a_frac <<= 1;
3728     } while (--bit >= last_bit);
3729
3730     /* Undo the right shift done above. If there is any remaining
3731      * fraction, the result is inexact. Set the sticky bit.
3732      */
3733     a.frac = (r_frac << 2) + (a_frac != 0);
3734
3735     return a;
3736 }
3737
3738 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3739 {
3740     FloatParts64 pa, pr;
3741
3742     float16_unpack_canonical(&pa, a, status);
3743     pr = sqrt_float(pa, status, &float16_params);
3744     return float16_round_pack_canonical(&pr, status);
3745 }
3746
3747 static float32 QEMU_SOFTFLOAT_ATTR
3748 soft_f32_sqrt(float32 a, float_status *status)
3749 {
3750     FloatParts64 pa, pr;
3751
3752     float32_unpack_canonical(&pa, a, status);
3753     pr = sqrt_float(pa, status, &float32_params);
3754     return float32_round_pack_canonical(&pr, status);
3755 }
3756
3757 static float64 QEMU_SOFTFLOAT_ATTR
3758 soft_f64_sqrt(float64 a, float_status *status)
3759 {
3760     FloatParts64 pa, pr;
3761
3762     float64_unpack_canonical(&pa, a, status);
3763     pr = sqrt_float(pa, status, &float64_params);
3764     return float64_round_pack_canonical(&pr, status);
3765 }
3766
3767 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3768 {
3769     union_float32 ua, ur;
3770
3771     ua.s = xa;
3772     if (unlikely(!can_use_fpu(s))) {
3773         goto soft;
3774     }
3775
3776     float32_input_flush1(&ua.s, s);
3777     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3778         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3779                        fpclassify(ua.h) == FP_ZERO) ||
3780                      signbit(ua.h))) {
3781             goto soft;
3782         }
3783     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3784                         float32_is_neg(ua.s))) {
3785         goto soft;
3786     }
3787     ur.h = sqrtf(ua.h);
3788     return ur.s;
3789
3790  soft:
3791     return soft_f32_sqrt(ua.s, s);
3792 }
3793
3794 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3795 {
3796     union_float64 ua, ur;
3797
3798     ua.s = xa;
3799     if (unlikely(!can_use_fpu(s))) {
3800         goto soft;
3801     }
3802
3803     float64_input_flush1(&ua.s, s);
3804     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3805         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3806                        fpclassify(ua.h) == FP_ZERO) ||
3807                      signbit(ua.h))) {
3808             goto soft;
3809         }
3810     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3811                         float64_is_neg(ua.s))) {
3812         goto soft;
3813     }
3814     ur.h = sqrt(ua.h);
3815     return ur.s;
3816
3817  soft:
3818     return soft_f64_sqrt(ua.s, s);
3819 }
3820
3821 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3822 {
3823     FloatParts64 pa, pr;
3824
3825     bfloat16_unpack_canonical(&pa, a, status);
3826     pr = sqrt_float(pa, status, &bfloat16_params);
3827     return bfloat16_round_pack_canonical(&pr, status);
3828 }
3829
3830 /*----------------------------------------------------------------------------
3831 | The pattern for a default generated NaN.
3832 *----------------------------------------------------------------------------*/
3833
3834 float16 float16_default_nan(float_status *status)
3835 {
3836     FloatParts64 p;
3837
3838     parts_default_nan(&p, status);
3839     p.frac >>= float16_params.frac_shift;
3840     return float16_pack_raw(&p);
3841 }
3842
3843 float32 float32_default_nan(float_status *status)
3844 {
3845     FloatParts64 p;
3846
3847     parts_default_nan(&p, status);
3848     p.frac >>= float32_params.frac_shift;
3849     return float32_pack_raw(&p);
3850 }
3851
3852 float64 float64_default_nan(float_status *status)
3853 {
3854     FloatParts64 p;
3855
3856     parts_default_nan(&p, status);
3857     p.frac >>= float64_params.frac_shift;
3858     return float64_pack_raw(&p);
3859 }
3860
3861 float128 float128_default_nan(float_status *status)
3862 {
3863     FloatParts128 p;
3864
3865     parts_default_nan(&p, status);
3866     frac_shr(&p, float128_params.frac_shift);
3867     return float128_pack_raw(&p);
3868 }
3869
3870 bfloat16 bfloat16_default_nan(float_status *status)
3871 {
3872     FloatParts64 p;
3873
3874     parts_default_nan(&p, status);
3875     p.frac >>= bfloat16_params.frac_shift;
3876     return bfloat16_pack_raw(&p);
3877 }
3878
3879 /*----------------------------------------------------------------------------
3880 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3881 *----------------------------------------------------------------------------*/
3882
3883 float16 float16_silence_nan(float16 a, float_status *status)
3884 {
3885     FloatParts64 p;
3886
3887     float16_unpack_raw(&p, a);
3888     p.frac <<= float16_params.frac_shift;
3889     parts_silence_nan(&p, status);
3890     p.frac >>= float16_params.frac_shift;
3891     return float16_pack_raw(&p);
3892 }
3893
3894 float32 float32_silence_nan(float32 a, float_status *status)
3895 {
3896     FloatParts64 p;
3897
3898     float32_unpack_raw(&p, a);
3899     p.frac <<= float32_params.frac_shift;
3900     parts_silence_nan(&p, status);
3901     p.frac >>= float32_params.frac_shift;
3902     return float32_pack_raw(&p);
3903 }
3904
3905 float64 float64_silence_nan(float64 a, float_status *status)
3906 {
3907     FloatParts64 p;
3908
3909     float64_unpack_raw(&p, a);
3910     p.frac <<= float64_params.frac_shift;
3911     parts_silence_nan(&p, status);
3912     p.frac >>= float64_params.frac_shift;
3913     return float64_pack_raw(&p);
3914 }
3915
3916 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3917 {
3918     FloatParts64 p;
3919
3920     bfloat16_unpack_raw(&p, a);
3921     p.frac <<= bfloat16_params.frac_shift;
3922     parts_silence_nan(&p, status);
3923     p.frac >>= bfloat16_params.frac_shift;
3924     return bfloat16_pack_raw(&p);
3925 }
3926
3927 float128 float128_silence_nan(float128 a, float_status *status)
3928 {
3929     FloatParts128 p;
3930
3931     float128_unpack_raw(&p, a);
3932     frac_shl(&p, float128_params.frac_shift);
3933     parts_silence_nan(&p, status);
3934     frac_shr(&p, float128_params.frac_shift);
3935     return float128_pack_raw(&p);
3936 }
3937
3938 /*----------------------------------------------------------------------------
3939 | If `a' is denormal and we are in flush-to-zero mode then set the
3940 | input-denormal exception and return zero. Otherwise just return the value.
3941 *----------------------------------------------------------------------------*/
3942
3943 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3944 {
3945     if (p.exp == 0 && p.frac != 0) {
3946         float_raise(float_flag_input_denormal, status);
3947         return true;
3948     }
3949
3950     return false;
3951 }
3952
3953 float16 float16_squash_input_denormal(float16 a, float_status *status)
3954 {
3955     if (status->flush_inputs_to_zero) {
3956         FloatParts64 p;
3957
3958         float16_unpack_raw(&p, a);
3959         if (parts_squash_denormal(p, status)) {
3960             return float16_set_sign(float16_zero, p.sign);
3961         }
3962     }
3963     return a;
3964 }
3965
3966 float32 float32_squash_input_denormal(float32 a, float_status *status)
3967 {
3968     if (status->flush_inputs_to_zero) {
3969         FloatParts64 p;
3970
3971         float32_unpack_raw(&p, a);
3972         if (parts_squash_denormal(p, status)) {
3973             return float32_set_sign(float32_zero, p.sign);
3974         }
3975     }
3976     return a;
3977 }
3978
3979 float64 float64_squash_input_denormal(float64 a, float_status *status)
3980 {
3981     if (status->flush_inputs_to_zero) {
3982         FloatParts64 p;
3983
3984         float64_unpack_raw(&p, a);
3985         if (parts_squash_denormal(p, status)) {
3986             return float64_set_sign(float64_zero, p.sign);
3987         }
3988     }
3989     return a;
3990 }
3991
3992 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3993 {
3994     if (status->flush_inputs_to_zero) {
3995         FloatParts64 p;
3996
3997         bfloat16_unpack_raw(&p, a);
3998         if (parts_squash_denormal(p, status)) {
3999             return bfloat16_set_sign(bfloat16_zero, p.sign);
4000         }
4001     }
4002     return a;
4003 }
4004
4005 /*----------------------------------------------------------------------------
4006 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4007 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4008 | input.  If `zSign' is 1, the input is negated before being converted to an
4009 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4010 | is simply rounded to an integer, with the inexact exception raised if the
4011 | input cannot be represented exactly as an integer.  However, if the fixed-
4012 | point input is too large, the invalid exception is raised and the largest
4013 | positive or negative integer is returned.
4014 *----------------------------------------------------------------------------*/
4015
4016 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4017                                  float_status *status)
4018 {
4019     int8_t roundingMode;
4020     bool roundNearestEven;
4021     int8_t roundIncrement, roundBits;
4022     int32_t z;
4023
4024     roundingMode = status->float_rounding_mode;
4025     roundNearestEven = ( roundingMode == float_round_nearest_even );
4026     switch (roundingMode) {
4027     case float_round_nearest_even:
4028     case float_round_ties_away:
4029         roundIncrement = 0x40;
4030         break;
4031     case float_round_to_zero:
4032         roundIncrement = 0;
4033         break;
4034     case float_round_up:
4035         roundIncrement = zSign ? 0 : 0x7f;
4036         break;
4037     case float_round_down:
4038         roundIncrement = zSign ? 0x7f : 0;
4039         break;
4040     case float_round_to_odd:
4041         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4042         break;
4043     default:
4044         abort();
4045     }
4046     roundBits = absZ & 0x7F;
4047     absZ = ( absZ + roundIncrement )>>7;
4048     if (!(roundBits ^ 0x40) && roundNearestEven) {
4049         absZ &= ~1;
4050     }
4051     z = absZ;
4052     if ( zSign ) z = - z;
4053     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4054         float_raise(float_flag_invalid, status);
4055         return zSign ? INT32_MIN : INT32_MAX;
4056     }
4057     if (roundBits) {
4058         float_raise(float_flag_inexact, status);
4059     }
4060     return z;
4061
4062 }
4063
4064 /*----------------------------------------------------------------------------
4065 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4066 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4067 | and returns the properly rounded 64-bit integer corresponding to the input.
4068 | If `zSign' is 1, the input is negated before being converted to an integer.
4069 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4070 | the inexact exception raised if the input cannot be represented exactly as
4071 | an integer.  However, if the fixed-point input is too large, the invalid
4072 | exception is raised and the largest positive or negative integer is
4073 | returned.
4074 *----------------------------------------------------------------------------*/
4075
4076 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4077                                float_status *status)
4078 {
4079     int8_t roundingMode;
4080     bool roundNearestEven, increment;
4081     int64_t z;
4082
4083     roundingMode = status->float_rounding_mode;
4084     roundNearestEven = ( roundingMode == float_round_nearest_even );
4085     switch (roundingMode) {
4086     case float_round_nearest_even:
4087     case float_round_ties_away:
4088         increment = ((int64_t) absZ1 < 0);
4089         break;
4090     case float_round_to_zero:
4091         increment = 0;
4092         break;
4093     case float_round_up:
4094         increment = !zSign && absZ1;
4095         break;
4096     case float_round_down:
4097         increment = zSign && absZ1;
4098         break;
4099     case float_round_to_odd:
4100         increment = !(absZ0 & 1) && absZ1;
4101         break;
4102     default:
4103         abort();
4104     }
4105     if ( increment ) {
4106         ++absZ0;
4107         if ( absZ0 == 0 ) goto overflow;
4108         if (!(absZ1 << 1) && roundNearestEven) {
4109             absZ0 &= ~1;
4110         }
4111     }
4112     z = absZ0;
4113     if ( zSign ) z = - z;
4114     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4115  overflow:
4116         float_raise(float_flag_invalid, status);
4117         return zSign ? INT64_MIN : INT64_MAX;
4118     }
4119     if (absZ1) {
4120         float_raise(float_flag_inexact, status);
4121     }
4122     return z;
4123
4124 }
4125
4126 /*----------------------------------------------------------------------------
4127 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4128 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4129 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4130 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4131 | with the inexact exception raised if the input cannot be represented exactly
4132 | as an integer.  However, if the fixed-point input is too large, the invalid
4133 | exception is raised and the largest unsigned integer is returned.
4134 *----------------------------------------------------------------------------*/
4135
4136 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4137                                 uint64_t absZ1, float_status *status)
4138 {
4139     int8_t roundingMode;
4140     bool roundNearestEven, increment;
4141
4142     roundingMode = status->float_rounding_mode;
4143     roundNearestEven = (roundingMode == float_round_nearest_even);
4144     switch (roundingMode) {
4145     case float_round_nearest_even:
4146     case float_round_ties_away:
4147         increment = ((int64_t)absZ1 < 0);
4148         break;
4149     case float_round_to_zero:
4150         increment = 0;
4151         break;
4152     case float_round_up:
4153         increment = !zSign && absZ1;
4154         break;
4155     case float_round_down:
4156         increment = zSign && absZ1;
4157         break;
4158     case float_round_to_odd:
4159         increment = !(absZ0 & 1) && absZ1;
4160         break;
4161     default:
4162         abort();
4163     }
4164     if (increment) {
4165         ++absZ0;
4166         if (absZ0 == 0) {
4167             float_raise(float_flag_invalid, status);
4168             return UINT64_MAX;
4169         }
4170         if (!(absZ1 << 1) && roundNearestEven) {
4171             absZ0 &= ~1;
4172         }
4173     }
4174
4175     if (zSign && absZ0) {
4176         float_raise(float_flag_invalid, status);
4177         return 0;
4178     }
4179
4180     if (absZ1) {
4181         float_raise(float_flag_inexact, status);
4182     }
4183     return absZ0;
4184 }
4185
4186 /*----------------------------------------------------------------------------
4187 | Normalizes the subnormal single-precision floating-point value represented
4188 | by the denormalized significand `aSig'.  The normalized exponent and
4189 | significand are stored at the locations pointed to by `zExpPtr' and
4190 | `zSigPtr', respectively.
4191 *----------------------------------------------------------------------------*/
4192
4193 static void
4194  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4195 {
4196     int8_t shiftCount;
4197
4198     shiftCount = clz32(aSig) - 8;
4199     *zSigPtr = aSig<<shiftCount;
4200     *zExpPtr = 1 - shiftCount;
4201
4202 }
4203
4204 /*----------------------------------------------------------------------------
4205 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4206 | and significand `zSig', and returns the proper single-precision floating-
4207 | point value corresponding to the abstract input.  Ordinarily, the abstract
4208 | value is simply rounded and packed into the single-precision format, with
4209 | the inexact exception raised if the abstract input cannot be represented
4210 | exactly.  However, if the abstract value is too large, the overflow and
4211 | inexact exceptions are raised and an infinity or maximal finite value is
4212 | returned.  If the abstract value is too small, the input value is rounded to
4213 | a subnormal number, and the underflow and inexact exceptions are raised if
4214 | the abstract input cannot be represented exactly as a subnormal single-
4215 | precision floating-point number.
4216 |     The input significand `zSig' has its binary point between bits 30
4217 | and 29, which is 7 bits to the left of the usual location.  This shifted
4218 | significand must be normalized or smaller.  If `zSig' is not normalized,
4219 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4220 | and it must not require rounding.  In the usual case that `zSig' is
4221 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4222 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4223 | Binary Floating-Point Arithmetic.
4224 *----------------------------------------------------------------------------*/
4225
4226 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4227                                    float_status *status)
4228 {
4229     int8_t roundingMode;
4230     bool roundNearestEven;
4231     int8_t roundIncrement, roundBits;
4232     bool isTiny;
4233
4234     roundingMode = status->float_rounding_mode;
4235     roundNearestEven = ( roundingMode == float_round_nearest_even );
4236     switch (roundingMode) {
4237     case float_round_nearest_even:
4238     case float_round_ties_away:
4239         roundIncrement = 0x40;
4240         break;
4241     case float_round_to_zero:
4242         roundIncrement = 0;
4243         break;
4244     case float_round_up:
4245         roundIncrement = zSign ? 0 : 0x7f;
4246         break;
4247     case float_round_down:
4248         roundIncrement = zSign ? 0x7f : 0;
4249         break;
4250     case float_round_to_odd:
4251         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4252         break;
4253     default:
4254         abort();
4255         break;
4256     }
4257     roundBits = zSig & 0x7F;
4258     if ( 0xFD <= (uint16_t) zExp ) {
4259         if (    ( 0xFD < zExp )
4260              || (    ( zExp == 0xFD )
4261                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4262            ) {
4263             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4264                                    roundIncrement != 0;
4265             float_raise(float_flag_overflow | float_flag_inexact, status);
4266             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4267         }
4268         if ( zExp < 0 ) {
4269             if (status->flush_to_zero) {
4270                 float_raise(float_flag_output_denormal, status);
4271                 return packFloat32(zSign, 0, 0);
4272             }
4273             isTiny = status->tininess_before_rounding
4274                   || (zExp < -1)
4275                   || (zSig + roundIncrement < 0x80000000);
4276             shift32RightJamming( zSig, - zExp, &zSig );
4277             zExp = 0;
4278             roundBits = zSig & 0x7F;
4279             if (isTiny && roundBits) {
4280                 float_raise(float_flag_underflow, status);
4281             }
4282             if (roundingMode == float_round_to_odd) {
4283                 /*
4284                  * For round-to-odd case, the roundIncrement depends on
4285                  * zSig which just changed.
4286                  */
4287                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4288             }
4289         }
4290     }
4291     if (roundBits) {
4292         float_raise(float_flag_inexact, status);
4293     }
4294     zSig = ( zSig + roundIncrement )>>7;
4295     if (!(roundBits ^ 0x40) && roundNearestEven) {
4296         zSig &= ~1;
4297     }
4298     if ( zSig == 0 ) zExp = 0;
4299     return packFloat32( zSign, zExp, zSig );
4300
4301 }
4302
4303 /*----------------------------------------------------------------------------
4304 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4305 | and significand `zSig', and returns the proper single-precision floating-
4306 | point value corresponding to the abstract input.  This routine is just like
4307 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4308 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4309 | floating-point exponent.
4310 *----------------------------------------------------------------------------*/
4311
4312 static float32
4313  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4314                               float_status *status)
4315 {
4316     int8_t shiftCount;
4317
4318     shiftCount = clz32(zSig) - 1;
4319     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4320                                status);
4321
4322 }
4323
4324 /*----------------------------------------------------------------------------
4325 | Normalizes the subnormal double-precision floating-point value represented
4326 | by the denormalized significand `aSig'.  The normalized exponent and
4327 | significand are stored at the locations pointed to by `zExpPtr' and
4328 | `zSigPtr', respectively.
4329 *----------------------------------------------------------------------------*/
4330
4331 static void
4332  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4333 {
4334     int8_t shiftCount;
4335
4336     shiftCount = clz64(aSig) - 11;
4337     *zSigPtr = aSig<<shiftCount;
4338     *zExpPtr = 1 - shiftCount;
4339
4340 }
4341
4342 /*----------------------------------------------------------------------------
4343 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4344 | double-precision floating-point value, returning the result.  After being
4345 | shifted into the proper positions, the three fields are simply added
4346 | together to form the result.  This means that any integer portion of `zSig'
4347 | will be added into the exponent.  Since a properly normalized significand
4348 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4349 | than the desired result exponent whenever `zSig' is a complete, normalized
4350 | significand.
4351 *----------------------------------------------------------------------------*/
4352
4353 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4354 {
4355
4356     return make_float64(
4357         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4358
4359 }
4360
4361 /*----------------------------------------------------------------------------
4362 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4363 | and significand `zSig', and returns the proper double-precision floating-
4364 | point value corresponding to the abstract input.  Ordinarily, the abstract
4365 | value is simply rounded and packed into the double-precision format, with
4366 | the inexact exception raised if the abstract input cannot be represented
4367 | exactly.  However, if the abstract value is too large, the overflow and
4368 | inexact exceptions are raised and an infinity or maximal finite value is
4369 | returned.  If the abstract value is too small, the input value is rounded to
4370 | a subnormal number, and the underflow and inexact exceptions are raised if
4371 | the abstract input cannot be represented exactly as a subnormal double-
4372 | precision floating-point number.
4373 |     The input significand `zSig' has its binary point between bits 62
4374 | and 61, which is 10 bits to the left of the usual location.  This shifted
4375 | significand must be normalized or smaller.  If `zSig' is not normalized,
4376 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4377 | and it must not require rounding.  In the usual case that `zSig' is
4378 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4379 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4380 | Binary Floating-Point Arithmetic.
4381 *----------------------------------------------------------------------------*/
4382
4383 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4384                                    float_status *status)
4385 {
4386     int8_t roundingMode;
4387     bool roundNearestEven;
4388     int roundIncrement, roundBits;
4389     bool isTiny;
4390
4391     roundingMode = status->float_rounding_mode;
4392     roundNearestEven = ( roundingMode == float_round_nearest_even );
4393     switch (roundingMode) {
4394     case float_round_nearest_even:
4395     case float_round_ties_away:
4396         roundIncrement = 0x200;
4397         break;
4398     case float_round_to_zero:
4399         roundIncrement = 0;
4400         break;
4401     case float_round_up:
4402         roundIncrement = zSign ? 0 : 0x3ff;
4403         break;
4404     case float_round_down:
4405         roundIncrement = zSign ? 0x3ff : 0;
4406         break;
4407     case float_round_to_odd:
4408         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4409         break;
4410     default:
4411         abort();
4412     }
4413     roundBits = zSig & 0x3FF;
4414     if ( 0x7FD <= (uint16_t) zExp ) {
4415         if (    ( 0x7FD < zExp )
4416              || (    ( zExp == 0x7FD )
4417                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4418            ) {
4419             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4420                                    roundIncrement != 0;
4421             float_raise(float_flag_overflow | float_flag_inexact, status);
4422             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4423         }
4424         if ( zExp < 0 ) {
4425             if (status->flush_to_zero) {
4426                 float_raise(float_flag_output_denormal, status);
4427                 return packFloat64(zSign, 0, 0);
4428             }
4429             isTiny = status->tininess_before_rounding
4430                   || (zExp < -1)
4431                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4432             shift64RightJamming( zSig, - zExp, &zSig );
4433             zExp = 0;
4434             roundBits = zSig & 0x3FF;
4435             if (isTiny && roundBits) {
4436                 float_raise(float_flag_underflow, status);
4437             }
4438             if (roundingMode == float_round_to_odd) {
4439                 /*
4440                  * For round-to-odd case, the roundIncrement depends on
4441                  * zSig which just changed.
4442                  */
4443                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4444             }
4445         }
4446     }
4447     if (roundBits) {
4448         float_raise(float_flag_inexact, status);
4449     }
4450     zSig = ( zSig + roundIncrement )>>10;
4451     if (!(roundBits ^ 0x200) && roundNearestEven) {
4452         zSig &= ~1;
4453     }
4454     if ( zSig == 0 ) zExp = 0;
4455     return packFloat64( zSign, zExp, zSig );
4456
4457 }
4458
4459 /*----------------------------------------------------------------------------
4460 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4461 | and significand `zSig', and returns the proper double-precision floating-
4462 | point value corresponding to the abstract input.  This routine is just like
4463 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4464 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4465 | floating-point exponent.
4466 *----------------------------------------------------------------------------*/
4467
4468 static float64
4469  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4470                               float_status *status)
4471 {
4472     int8_t shiftCount;
4473
4474     shiftCount = clz64(zSig) - 1;
4475     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4476                                status);
4477
4478 }
4479
4480 /*----------------------------------------------------------------------------
4481 | Normalizes the subnormal extended double-precision floating-point value
4482 | represented by the denormalized significand `aSig'.  The normalized exponent
4483 | and significand are stored at the locations pointed to by `zExpPtr' and
4484 | `zSigPtr', respectively.
4485 *----------------------------------------------------------------------------*/
4486
4487 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4488                                 uint64_t *zSigPtr)
4489 {
4490     int8_t shiftCount;
4491
4492     shiftCount = clz64(aSig);
4493     *zSigPtr = aSig<<shiftCount;
4494     *zExpPtr = 1 - shiftCount;
4495 }
4496
4497 /*----------------------------------------------------------------------------
4498 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4499 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4500 | and returns the proper extended double-precision floating-point value
4501 | corresponding to the abstract input.  Ordinarily, the abstract value is
4502 | rounded and packed into the extended double-precision format, with the
4503 | inexact exception raised if the abstract input cannot be represented
4504 | exactly.  However, if the abstract value is too large, the overflow and
4505 | inexact exceptions are raised and an infinity or maximal finite value is
4506 | returned.  If the abstract value is too small, the input value is rounded to
4507 | a subnormal number, and the underflow and inexact exceptions are raised if
4508 | the abstract input cannot be represented exactly as a subnormal extended
4509 | double-precision floating-point number.
4510 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4511 | number of bits as single or double precision, respectively.  Otherwise, the
4512 | result is rounded to the full precision of the extended double-precision
4513 | format.
4514 |     The input significand must be normalized or smaller.  If the input
4515 | significand is not normalized, `zExp' must be 0; in that case, the result
4516 | returned is a subnormal number, and it must not require rounding.  The
4517 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4518 | Floating-Point Arithmetic.
4519 *----------------------------------------------------------------------------*/
4520
4521 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4522                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4523                               float_status *status)
4524 {
4525     int8_t roundingMode;
4526     bool roundNearestEven, increment, isTiny;
4527     int64_t roundIncrement, roundMask, roundBits;
4528
4529     roundingMode = status->float_rounding_mode;
4530     roundNearestEven = ( roundingMode == float_round_nearest_even );
4531     if ( roundingPrecision == 80 ) goto precision80;
4532     if ( roundingPrecision == 64 ) {
4533         roundIncrement = UINT64_C(0x0000000000000400);
4534         roundMask = UINT64_C(0x00000000000007FF);
4535     }
4536     else if ( roundingPrecision == 32 ) {
4537         roundIncrement = UINT64_C(0x0000008000000000);
4538         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4539     }
4540     else {
4541         goto precision80;
4542     }
4543     zSig0 |= ( zSig1 != 0 );
4544     switch (roundingMode) {
4545     case float_round_nearest_even:
4546     case float_round_ties_away:
4547         break;
4548     case float_round_to_zero:
4549         roundIncrement = 0;
4550         break;
4551     case float_round_up:
4552         roundIncrement = zSign ? 0 : roundMask;
4553         break;
4554     case float_round_down:
4555         roundIncrement = zSign ? roundMask : 0;
4556         break;
4557     default:
4558         abort();
4559     }
4560     roundBits = zSig0 & roundMask;
4561     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4562         if (    ( 0x7FFE < zExp )
4563              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4564            ) {
4565             goto overflow;
4566         }
4567         if ( zExp <= 0 ) {
4568             if (status->flush_to_zero) {
4569                 float_raise(float_flag_output_denormal, status);
4570                 return packFloatx80(zSign, 0, 0);
4571             }
4572             isTiny = status->tininess_before_rounding
4573                   || (zExp < 0 )
4574                   || (zSig0 <= zSig0 + roundIncrement);
4575             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4576             zExp = 0;
4577             roundBits = zSig0 & roundMask;
4578             if (isTiny && roundBits) {
4579                 float_raise(float_flag_underflow, status);
4580             }
4581             if (roundBits) {
4582                 float_raise(float_flag_inexact, status);
4583             }
4584             zSig0 += roundIncrement;
4585             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4586             roundIncrement = roundMask + 1;
4587             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4588                 roundMask |= roundIncrement;
4589             }
4590             zSig0 &= ~ roundMask;
4591             return packFloatx80( zSign, zExp, zSig0 );
4592         }
4593     }
4594     if (roundBits) {
4595         float_raise(float_flag_inexact, status);
4596     }
4597     zSig0 += roundIncrement;
4598     if ( zSig0 < roundIncrement ) {
4599         ++zExp;
4600         zSig0 = UINT64_C(0x8000000000000000);
4601     }
4602     roundIncrement = roundMask + 1;
4603     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4604         roundMask |= roundIncrement;
4605     }
4606     zSig0 &= ~ roundMask;
4607     if ( zSig0 == 0 ) zExp = 0;
4608     return packFloatx80( zSign, zExp, zSig0 );
4609  precision80:
4610     switch (roundingMode) {
4611     case float_round_nearest_even:
4612     case float_round_ties_away:
4613         increment = ((int64_t)zSig1 < 0);
4614         break;
4615     case float_round_to_zero:
4616         increment = 0;
4617         break;
4618     case float_round_up:
4619         increment = !zSign && zSig1;
4620         break;
4621     case float_round_down:
4622         increment = zSign && zSig1;
4623         break;
4624     default:
4625         abort();
4626     }
4627     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4628         if (    ( 0x7FFE < zExp )
4629              || (    ( zExp == 0x7FFE )
4630                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4631                   && increment
4632                 )
4633            ) {
4634             roundMask = 0;
4635  overflow:
4636             float_raise(float_flag_overflow | float_flag_inexact, status);
4637             if (    ( roundingMode == float_round_to_zero )
4638                  || ( zSign && ( roundingMode == float_round_up ) )
4639                  || ( ! zSign && ( roundingMode == float_round_down ) )
4640                ) {
4641                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4642             }
4643             return packFloatx80(zSign,
4644                                 floatx80_infinity_high,
4645                                 floatx80_infinity_low);
4646         }
4647         if ( zExp <= 0 ) {
4648             isTiny = status->tininess_before_rounding
4649                   || (zExp < 0)
4650                   || !increment
4651                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4652             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4653             zExp = 0;
4654             if (isTiny && zSig1) {
4655                 float_raise(float_flag_underflow, status);
4656             }
4657             if (zSig1) {
4658                 float_raise(float_flag_inexact, status);
4659             }
4660             switch (roundingMode) {
4661             case float_round_nearest_even:
4662             case float_round_ties_away:
4663                 increment = ((int64_t)zSig1 < 0);
4664                 break;
4665             case float_round_to_zero:
4666                 increment = 0;
4667                 break;
4668             case float_round_up:
4669                 increment = !zSign && zSig1;
4670                 break;
4671             case float_round_down:
4672                 increment = zSign && zSig1;
4673                 break;
4674             default:
4675                 abort();
4676             }
4677             if ( increment ) {
4678                 ++zSig0;
4679                 if (!(zSig1 << 1) && roundNearestEven) {
4680                     zSig0 &= ~1;
4681                 }
4682                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4683             }
4684             return packFloatx80( zSign, zExp, zSig0 );
4685         }
4686     }
4687     if (zSig1) {
4688         float_raise(float_flag_inexact, status);
4689     }
4690     if ( increment ) {
4691         ++zSig0;
4692         if ( zSig0 == 0 ) {
4693             ++zExp;
4694             zSig0 = UINT64_C(0x8000000000000000);
4695         }
4696         else {
4697             if (!(zSig1 << 1) && roundNearestEven) {
4698                 zSig0 &= ~1;
4699             }
4700         }
4701     }
4702     else {
4703         if ( zSig0 == 0 ) zExp = 0;
4704     }
4705     return packFloatx80( zSign, zExp, zSig0 );
4706
4707 }
4708
4709 /*----------------------------------------------------------------------------
4710 | Takes an abstract floating-point value having sign `zSign', exponent
4711 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4712 | and returns the proper extended double-precision floating-point value
4713 | corresponding to the abstract input.  This routine is just like
4714 | `roundAndPackFloatx80' except that the input significand does not have to be
4715 | normalized.
4716 *----------------------------------------------------------------------------*/
4717
4718 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4719                                        bool zSign, int32_t zExp,
4720                                        uint64_t zSig0, uint64_t zSig1,
4721                                        float_status *status)
4722 {
4723     int8_t shiftCount;
4724
4725     if ( zSig0 == 0 ) {
4726         zSig0 = zSig1;
4727         zSig1 = 0;
4728         zExp -= 64;
4729     }
4730     shiftCount = clz64(zSig0);
4731     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4732     zExp -= shiftCount;
4733     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4734                                 zSig0, zSig1, status);
4735
4736 }
4737
4738 /*----------------------------------------------------------------------------
4739 | Returns the least-significant 64 fraction bits of the quadruple-precision
4740 | floating-point value `a'.
4741 *----------------------------------------------------------------------------*/
4742
4743 static inline uint64_t extractFloat128Frac1( float128 a )
4744 {
4745
4746     return a.low;
4747
4748 }
4749
4750 /*----------------------------------------------------------------------------
4751 | Returns the most-significant 48 fraction bits of the quadruple-precision
4752 | floating-point value `a'.
4753 *----------------------------------------------------------------------------*/
4754
4755 static inline uint64_t extractFloat128Frac0( float128 a )
4756 {
4757
4758     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4759
4760 }
4761
4762 /*----------------------------------------------------------------------------
4763 | Returns the exponent bits of the quadruple-precision floating-point value
4764 | `a'.
4765 *----------------------------------------------------------------------------*/
4766
4767 static inline int32_t extractFloat128Exp( float128 a )
4768 {
4769
4770     return ( a.high>>48 ) & 0x7FFF;
4771
4772 }
4773
4774 /*----------------------------------------------------------------------------
4775 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4776 *----------------------------------------------------------------------------*/
4777
4778 static inline bool extractFloat128Sign(float128 a)
4779 {
4780     return a.high >> 63;
4781 }
4782
4783 /*----------------------------------------------------------------------------
4784 | Normalizes the subnormal quadruple-precision floating-point value
4785 | represented by the denormalized significand formed by the concatenation of
4786 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4787 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4788 | significand are stored at the location pointed to by `zSig0Ptr', and the
4789 | least significant 64 bits of the normalized significand are stored at the
4790 | location pointed to by `zSig1Ptr'.
4791 *----------------------------------------------------------------------------*/
4792
4793 static void
4794  normalizeFloat128Subnormal(
4795      uint64_t aSig0,
4796      uint64_t aSig1,
4797      int32_t *zExpPtr,
4798      uint64_t *zSig0Ptr,
4799      uint64_t *zSig1Ptr
4800  )
4801 {
4802     int8_t shiftCount;
4803
4804     if ( aSig0 == 0 ) {
4805         shiftCount = clz64(aSig1) - 15;
4806         if ( shiftCount < 0 ) {
4807             *zSig0Ptr = aSig1>>( - shiftCount );
4808             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4809         }
4810         else {
4811             *zSig0Ptr = aSig1<<shiftCount;
4812             *zSig1Ptr = 0;
4813         }
4814         *zExpPtr = - shiftCount - 63;
4815     }
4816     else {
4817         shiftCount = clz64(aSig0) - 15;
4818         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4819         *zExpPtr = 1 - shiftCount;
4820     }
4821
4822 }
4823
4824 /*----------------------------------------------------------------------------
4825 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4826 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4827 | floating-point value, returning the result.  After being shifted into the
4828 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4829 | added together to form the most significant 32 bits of the result.  This
4830 | means that any integer portion of `zSig0' will be added into the exponent.
4831 | Since a properly normalized significand will have an integer portion equal
4832 | to 1, the `zExp' input should be 1 less than the desired result exponent
4833 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4834 | significand.
4835 *----------------------------------------------------------------------------*/
4836
4837 static inline float128
4838 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4839 {
4840     float128 z;
4841
4842     z.low = zSig1;
4843     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4844     return z;
4845 }
4846
4847 /*----------------------------------------------------------------------------
4848 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4849 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4850 | and `zSig2', and returns the proper quadruple-precision floating-point value
4851 | corresponding to the abstract input.  Ordinarily, the abstract value is
4852 | simply rounded and packed into the quadruple-precision format, with the
4853 | inexact exception raised if the abstract input cannot be represented
4854 | exactly.  However, if the abstract value is too large, the overflow and
4855 | inexact exceptions are raised and an infinity or maximal finite value is
4856 | returned.  If the abstract value is too small, the input value is rounded to
4857 | a subnormal number, and the underflow and inexact exceptions are raised if
4858 | the abstract input cannot be represented exactly as a subnormal quadruple-
4859 | precision floating-point number.
4860 |     The input significand must be normalized or smaller.  If the input
4861 | significand is not normalized, `zExp' must be 0; in that case, the result
4862 | returned is a subnormal number, and it must not require rounding.  In the
4863 | usual case that the input significand is normalized, `zExp' must be 1 less
4864 | than the ``true'' floating-point exponent.  The handling of underflow and
4865 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4866 *----------------------------------------------------------------------------*/
4867
4868 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4869                                      uint64_t zSig0, uint64_t zSig1,
4870                                      uint64_t zSig2, float_status *status)
4871 {
4872     int8_t roundingMode;
4873     bool roundNearestEven, increment, isTiny;
4874
4875     roundingMode = status->float_rounding_mode;
4876     roundNearestEven = ( roundingMode == float_round_nearest_even );
4877     switch (roundingMode) {
4878     case float_round_nearest_even:
4879     case float_round_ties_away:
4880         increment = ((int64_t)zSig2 < 0);
4881         break;
4882     case float_round_to_zero:
4883         increment = 0;
4884         break;
4885     case float_round_up:
4886         increment = !zSign && zSig2;
4887         break;
4888     case float_round_down:
4889         increment = zSign && zSig2;
4890         break;
4891     case float_round_to_odd:
4892         increment = !(zSig1 & 0x1) && zSig2;
4893         break;
4894     default:
4895         abort();
4896     }
4897     if ( 0x7FFD <= (uint32_t) zExp ) {
4898         if (    ( 0x7FFD < zExp )
4899              || (    ( zExp == 0x7FFD )
4900                   && eq128(
4901                          UINT64_C(0x0001FFFFFFFFFFFF),
4902                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4903                          zSig0,
4904                          zSig1
4905                      )
4906                   && increment
4907                 )
4908            ) {
4909             float_raise(float_flag_overflow | float_flag_inexact, status);
4910             if (    ( roundingMode == float_round_to_zero )
4911                  || ( zSign && ( roundingMode == float_round_up ) )
4912                  || ( ! zSign && ( roundingMode == float_round_down ) )
4913                  || (roundingMode == float_round_to_odd)
4914                ) {
4915                 return
4916                     packFloat128(
4917                         zSign,
4918                         0x7FFE,
4919                         UINT64_C(0x0000FFFFFFFFFFFF),
4920                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4921                     );
4922             }
4923             return packFloat128( zSign, 0x7FFF, 0, 0 );
4924         }
4925         if ( zExp < 0 ) {
4926             if (status->flush_to_zero) {
4927                 float_raise(float_flag_output_denormal, status);
4928                 return packFloat128(zSign, 0, 0, 0);
4929             }
4930             isTiny = status->tininess_before_rounding
4931                   || (zExp < -1)
4932                   || !increment
4933                   || lt128(zSig0, zSig1,
4934                            UINT64_C(0x0001FFFFFFFFFFFF),
4935                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4936             shift128ExtraRightJamming(
4937                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4938             zExp = 0;
4939             if (isTiny && zSig2) {
4940                 float_raise(float_flag_underflow, status);
4941             }
4942             switch (roundingMode) {
4943             case float_round_nearest_even:
4944             case float_round_ties_away:
4945                 increment = ((int64_t)zSig2 < 0);
4946                 break;
4947             case float_round_to_zero:
4948                 increment = 0;
4949                 break;
4950             case float_round_up:
4951                 increment = !zSign && zSig2;
4952                 break;
4953             case float_round_down:
4954                 increment = zSign && zSig2;
4955                 break;
4956             case float_round_to_odd:
4957                 increment = !(zSig1 & 0x1) && zSig2;
4958                 break;
4959             default:
4960                 abort();
4961             }
4962         }
4963     }
4964     if (zSig2) {
4965         float_raise(float_flag_inexact, status);
4966     }
4967     if ( increment ) {
4968         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4969         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4970             zSig1 &= ~1;
4971         }
4972     }
4973     else {
4974         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4975     }
4976     return packFloat128( zSign, zExp, zSig0, zSig1 );
4977
4978 }
4979
4980 /*----------------------------------------------------------------------------
4981 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4982 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4983 | returns the proper quadruple-precision floating-point value corresponding
4984 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4985 | except that the input significand has fewer bits and does not have to be
4986 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4987 | point exponent.
4988 *----------------------------------------------------------------------------*/
4989
4990 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4991                                               uint64_t zSig0, uint64_t zSig1,
4992                                               float_status *status)
4993 {
4994     int8_t shiftCount;
4995     uint64_t zSig2;
4996
4997     if ( zSig0 == 0 ) {
4998         zSig0 = zSig1;
4999         zSig1 = 0;
5000         zExp -= 64;
5001     }
5002     shiftCount = clz64(zSig0) - 15;
5003     if ( 0 <= shiftCount ) {
5004         zSig2 = 0;
5005         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5006     }
5007     else {
5008         shift128ExtraRightJamming(
5009             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5010     }
5011     zExp -= shiftCount;
5012     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5013
5014 }
5015
5016
5017 /*----------------------------------------------------------------------------
5018 | Returns the result of converting the 32-bit two's complement integer `a'
5019 | to the extended double-precision floating-point format.  The conversion
5020 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5021 | Arithmetic.
5022 *----------------------------------------------------------------------------*/
5023
5024 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5025 {
5026     bool zSign;
5027     uint32_t absA;
5028     int8_t shiftCount;
5029     uint64_t zSig;
5030
5031     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5032     zSign = ( a < 0 );
5033     absA = zSign ? - a : a;
5034     shiftCount = clz32(absA) + 32;
5035     zSig = absA;
5036     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5037
5038 }
5039
5040 /*----------------------------------------------------------------------------
5041 | Returns the result of converting the 32-bit two's complement integer `a' to
5042 | the quadruple-precision floating-point format.  The conversion is performed
5043 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5044 *----------------------------------------------------------------------------*/
5045
5046 float128 int32_to_float128(int32_t a, float_status *status)
5047 {
5048     bool zSign;
5049     uint32_t absA;
5050     int8_t shiftCount;
5051     uint64_t zSig0;
5052
5053     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5054     zSign = ( a < 0 );
5055     absA = zSign ? - a : a;
5056     shiftCount = clz32(absA) + 17;
5057     zSig0 = absA;
5058     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5059
5060 }
5061
5062 /*----------------------------------------------------------------------------
5063 | Returns the result of converting the 64-bit two's complement integer `a'
5064 | to the extended double-precision floating-point format.  The conversion
5065 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5066 | Arithmetic.
5067 *----------------------------------------------------------------------------*/
5068
5069 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5070 {
5071     bool zSign;
5072     uint64_t absA;
5073     int8_t shiftCount;
5074
5075     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5076     zSign = ( a < 0 );
5077     absA = zSign ? - a : a;
5078     shiftCount = clz64(absA);
5079     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5080
5081 }
5082
5083 /*----------------------------------------------------------------------------
5084 | Returns the result of converting the 64-bit two's complement integer `a' to
5085 | the quadruple-precision floating-point format.  The conversion is performed
5086 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5087 *----------------------------------------------------------------------------*/
5088
5089 float128 int64_to_float128(int64_t a, float_status *status)
5090 {
5091     bool zSign;
5092     uint64_t absA;
5093     int8_t shiftCount;
5094     int32_t zExp;
5095     uint64_t zSig0, zSig1;
5096
5097     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5098     zSign = ( a < 0 );
5099     absA = zSign ? - a : a;
5100     shiftCount = clz64(absA) + 49;
5101     zExp = 0x406E - shiftCount;
5102     if ( 64 <= shiftCount ) {
5103         zSig1 = 0;
5104         zSig0 = absA;
5105         shiftCount -= 64;
5106     }
5107     else {
5108         zSig1 = absA;
5109         zSig0 = 0;
5110     }
5111     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5112     return packFloat128( zSign, zExp, zSig0, zSig1 );
5113
5114 }
5115
5116 /*----------------------------------------------------------------------------
5117 | Returns the result of converting the 64-bit unsigned integer `a'
5118 | to the quadruple-precision floating-point format.  The conversion is performed
5119 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5120 *----------------------------------------------------------------------------*/
5121
5122 float128 uint64_to_float128(uint64_t a, float_status *status)
5123 {
5124     if (a == 0) {
5125         return float128_zero;
5126     }
5127     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5128 }
5129
5130 /*----------------------------------------------------------------------------
5131 | Returns the result of converting the single-precision floating-point value
5132 | `a' to the extended double-precision floating-point format.  The conversion
5133 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5134 | Arithmetic.
5135 *----------------------------------------------------------------------------*/
5136
5137 floatx80 float32_to_floatx80(float32 a, float_status *status)
5138 {
5139     bool aSign;
5140     int aExp;
5141     uint32_t aSig;
5142
5143     a = float32_squash_input_denormal(a, status);
5144     aSig = extractFloat32Frac( a );
5145     aExp = extractFloat32Exp( a );
5146     aSign = extractFloat32Sign( a );
5147     if ( aExp == 0xFF ) {
5148         if (aSig) {
5149             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5150                                                status);
5151             return floatx80_silence_nan(res, status);
5152         }
5153         return packFloatx80(aSign,
5154                             floatx80_infinity_high,
5155                             floatx80_infinity_low);
5156     }
5157     if ( aExp == 0 ) {
5158         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5159         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5160     }
5161     aSig |= 0x00800000;
5162     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5163
5164 }
5165
5166 /*----------------------------------------------------------------------------
5167 | Returns the remainder of the single-precision floating-point value `a'
5168 | with respect to the corresponding value `b'.  The operation is performed
5169 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5170 *----------------------------------------------------------------------------*/
5171
5172 float32 float32_rem(float32 a, float32 b, float_status *status)
5173 {
5174     bool aSign, zSign;
5175     int aExp, bExp, expDiff;
5176     uint32_t aSig, bSig;
5177     uint32_t q;
5178     uint64_t aSig64, bSig64, q64;
5179     uint32_t alternateASig;
5180     int32_t sigMean;
5181     a = float32_squash_input_denormal(a, status);
5182     b = float32_squash_input_denormal(b, status);
5183
5184     aSig = extractFloat32Frac( a );
5185     aExp = extractFloat32Exp( a );
5186     aSign = extractFloat32Sign( a );
5187     bSig = extractFloat32Frac( b );
5188     bExp = extractFloat32Exp( b );
5189     if ( aExp == 0xFF ) {
5190         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5191             return propagateFloat32NaN(a, b, status);
5192         }
5193         float_raise(float_flag_invalid, status);
5194         return float32_default_nan(status);
5195     }
5196     if ( bExp == 0xFF ) {
5197         if (bSig) {
5198             return propagateFloat32NaN(a, b, status);
5199         }
5200         return a;
5201     }
5202     if ( bExp == 0 ) {
5203         if ( bSig == 0 ) {
5204             float_raise(float_flag_invalid, status);
5205             return float32_default_nan(status);
5206         }
5207         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5208     }
5209     if ( aExp == 0 ) {
5210         if ( aSig == 0 ) return a;
5211         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5212     }
5213     expDiff = aExp - bExp;
5214     aSig |= 0x00800000;
5215     bSig |= 0x00800000;
5216     if ( expDiff < 32 ) {
5217         aSig <<= 8;
5218         bSig <<= 8;
5219         if ( expDiff < 0 ) {
5220             if ( expDiff < -1 ) return a;
5221             aSig >>= 1;
5222         }
5223         q = ( bSig <= aSig );
5224         if ( q ) aSig -= bSig;
5225         if ( 0 < expDiff ) {
5226             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5227             q >>= 32 - expDiff;
5228             bSig >>= 2;
5229             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5230         }
5231         else {
5232             aSig >>= 2;
5233             bSig >>= 2;
5234         }
5235     }
5236     else {
5237         if ( bSig <= aSig ) aSig -= bSig;
5238         aSig64 = ( (uint64_t) aSig )<<40;
5239         bSig64 = ( (uint64_t) bSig )<<40;
5240         expDiff -= 64;
5241         while ( 0 < expDiff ) {
5242             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5243             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5244             aSig64 = - ( ( bSig * q64 )<<38 );
5245             expDiff -= 62;
5246         }
5247         expDiff += 64;
5248         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5249         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5250         q = q64>>( 64 - expDiff );
5251         bSig <<= 6;
5252         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5253     }
5254     do {
5255         alternateASig = aSig;
5256         ++q;
5257         aSig -= bSig;
5258     } while ( 0 <= (int32_t) aSig );
5259     sigMean = aSig + alternateASig;
5260     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5261         aSig = alternateASig;
5262     }
5263     zSign = ( (int32_t) aSig < 0 );
5264     if ( zSign ) aSig = - aSig;
5265     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5266 }
5267
5268
5269
5270 /*----------------------------------------------------------------------------
5271 | Returns the binary exponential of the single-precision floating-point value
5272 | `a'. The operation is performed according to the IEC/IEEE Standard for
5273 | Binary Floating-Point Arithmetic.
5274 |
5275 | Uses the following identities:
5276 |
5277 | 1. -------------------------------------------------------------------------
5278 |      x    x*ln(2)
5279 |     2  = e
5280 |
5281 | 2. -------------------------------------------------------------------------
5282 |                      2     3     4     5           n
5283 |      x        x     x     x     x     x           x
5284 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5285 |               1!    2!    3!    4!    5!          n!
5286 *----------------------------------------------------------------------------*/
5287
5288 static const float64 float32_exp2_coefficients[15] =
5289 {
5290     const_float64( 0x3ff0000000000000ll ), /*  1 */
5291     const_float64( 0x3fe0000000000000ll ), /*  2 */
5292     const_float64( 0x3fc5555555555555ll ), /*  3 */
5293     const_float64( 0x3fa5555555555555ll ), /*  4 */
5294     const_float64( 0x3f81111111111111ll ), /*  5 */
5295     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5296     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5297     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5298     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5299     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5300     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5301     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5302     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5303     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5304     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5305 };
5306
5307 float32 float32_exp2(float32 a, float_status *status)
5308 {
5309     bool aSign;
5310     int aExp;
5311     uint32_t aSig;
5312     float64 r, x, xn;
5313     int i;
5314     a = float32_squash_input_denormal(a, status);
5315
5316     aSig = extractFloat32Frac( a );
5317     aExp = extractFloat32Exp( a );
5318     aSign = extractFloat32Sign( a );
5319
5320     if ( aExp == 0xFF) {
5321         if (aSig) {
5322             return propagateFloat32NaN(a, float32_zero, status);
5323         }
5324         return (aSign) ? float32_zero : a;
5325     }
5326     if (aExp == 0) {
5327         if (aSig == 0) return float32_one;
5328     }
5329
5330     float_raise(float_flag_inexact, status);
5331
5332     /* ******************************* */
5333     /* using float64 for approximation */
5334     /* ******************************* */
5335     x = float32_to_float64(a, status);
5336     x = float64_mul(x, float64_ln2, status);
5337
5338     xn = x;
5339     r = float64_one;
5340     for (i = 0 ; i < 15 ; i++) {
5341         float64 f;
5342
5343         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5344         r = float64_add(r, f, status);
5345
5346         xn = float64_mul(xn, x, status);
5347     }
5348
5349     return float64_to_float32(r, status);
5350 }
5351
5352 /*----------------------------------------------------------------------------
5353 | Returns the binary log of the single-precision floating-point value `a'.
5354 | The operation is performed according to the IEC/IEEE Standard for Binary
5355 | Floating-Point Arithmetic.
5356 *----------------------------------------------------------------------------*/
5357 float32 float32_log2(float32 a, float_status *status)
5358 {
5359     bool aSign, zSign;
5360     int aExp;
5361     uint32_t aSig, zSig, i;
5362
5363     a = float32_squash_input_denormal(a, status);
5364     aSig = extractFloat32Frac( a );
5365     aExp = extractFloat32Exp( a );
5366     aSign = extractFloat32Sign( a );
5367
5368     if ( aExp == 0 ) {
5369         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5370         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5371     }
5372     if ( aSign ) {
5373         float_raise(float_flag_invalid, status);
5374         return float32_default_nan(status);
5375     }
5376     if ( aExp == 0xFF ) {
5377         if (aSig) {
5378             return propagateFloat32NaN(a, float32_zero, status);
5379         }
5380         return a;
5381     }
5382
5383     aExp -= 0x7F;
5384     aSig |= 0x00800000;
5385     zSign = aExp < 0;
5386     zSig = aExp << 23;
5387
5388     for (i = 1 << 22; i > 0; i >>= 1) {
5389         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5390         if ( aSig & 0x01000000 ) {
5391             aSig >>= 1;
5392             zSig |= i;
5393         }
5394     }
5395
5396     if ( zSign )
5397         zSig = -zSig;
5398
5399     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5400 }
5401
5402 /*----------------------------------------------------------------------------
5403 | Returns the result of converting the double-precision floating-point value
5404 | `a' to the extended double-precision floating-point format.  The conversion
5405 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5406 | Arithmetic.
5407 *----------------------------------------------------------------------------*/
5408
5409 floatx80 float64_to_floatx80(float64 a, float_status *status)
5410 {
5411     bool aSign;
5412     int aExp;
5413     uint64_t aSig;
5414
5415     a = float64_squash_input_denormal(a, status);
5416     aSig = extractFloat64Frac( a );
5417     aExp = extractFloat64Exp( a );
5418     aSign = extractFloat64Sign( a );
5419     if ( aExp == 0x7FF ) {
5420         if (aSig) {
5421             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5422                                                status);
5423             return floatx80_silence_nan(res, status);
5424         }
5425         return packFloatx80(aSign,
5426                             floatx80_infinity_high,
5427                             floatx80_infinity_low);
5428     }
5429     if ( aExp == 0 ) {
5430         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5431         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5432     }
5433     return
5434         packFloatx80(
5435             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5436
5437 }
5438
5439 /*----------------------------------------------------------------------------
5440 | Returns the remainder of the double-precision floating-point value `a'
5441 | with respect to the corresponding value `b'.  The operation is performed
5442 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5443 *----------------------------------------------------------------------------*/
5444
5445 float64 float64_rem(float64 a, float64 b, float_status *status)
5446 {
5447     bool aSign, zSign;
5448     int aExp, bExp, expDiff;
5449     uint64_t aSig, bSig;
5450     uint64_t q, alternateASig;
5451     int64_t sigMean;
5452
5453     a = float64_squash_input_denormal(a, status);
5454     b = float64_squash_input_denormal(b, status);
5455     aSig = extractFloat64Frac( a );
5456     aExp = extractFloat64Exp( a );
5457     aSign = extractFloat64Sign( a );
5458     bSig = extractFloat64Frac( b );
5459     bExp = extractFloat64Exp( b );
5460     if ( aExp == 0x7FF ) {
5461         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5462             return propagateFloat64NaN(a, b, status);
5463         }
5464         float_raise(float_flag_invalid, status);
5465         return float64_default_nan(status);
5466     }
5467     if ( bExp == 0x7FF ) {
5468         if (bSig) {
5469             return propagateFloat64NaN(a, b, status);
5470         }
5471         return a;
5472     }
5473     if ( bExp == 0 ) {
5474         if ( bSig == 0 ) {
5475             float_raise(float_flag_invalid, status);
5476             return float64_default_nan(status);
5477         }
5478         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5479     }
5480     if ( aExp == 0 ) {
5481         if ( aSig == 0 ) return a;
5482         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5483     }
5484     expDiff = aExp - bExp;
5485     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5486     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5487     if ( expDiff < 0 ) {
5488         if ( expDiff < -1 ) return a;
5489         aSig >>= 1;
5490     }
5491     q = ( bSig <= aSig );
5492     if ( q ) aSig -= bSig;
5493     expDiff -= 64;
5494     while ( 0 < expDiff ) {
5495         q = estimateDiv128To64( aSig, 0, bSig );
5496         q = ( 2 < q ) ? q - 2 : 0;
5497         aSig = - ( ( bSig>>2 ) * q );
5498         expDiff -= 62;
5499     }
5500     expDiff += 64;
5501     if ( 0 < expDiff ) {
5502         q = estimateDiv128To64( aSig, 0, bSig );
5503         q = ( 2 < q ) ? q - 2 : 0;
5504         q >>= 64 - expDiff;
5505         bSig >>= 2;
5506         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5507     }
5508     else {
5509         aSig >>= 2;
5510         bSig >>= 2;
5511     }
5512     do {
5513         alternateASig = aSig;
5514         ++q;
5515         aSig -= bSig;
5516     } while ( 0 <= (int64_t) aSig );
5517     sigMean = aSig + alternateASig;
5518     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5519         aSig = alternateASig;
5520     }
5521     zSign = ( (int64_t) aSig < 0 );
5522     if ( zSign ) aSig = - aSig;
5523     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5524
5525 }
5526
5527 /*----------------------------------------------------------------------------
5528 | Returns the binary log of the double-precision floating-point value `a'.
5529 | The operation is performed according to the IEC/IEEE Standard for Binary
5530 | Floating-Point Arithmetic.
5531 *----------------------------------------------------------------------------*/
5532 float64 float64_log2(float64 a, float_status *status)
5533 {
5534     bool aSign, zSign;
5535     int aExp;
5536     uint64_t aSig, aSig0, aSig1, zSig, i;
5537     a = float64_squash_input_denormal(a, status);
5538
5539     aSig = extractFloat64Frac( a );
5540     aExp = extractFloat64Exp( a );
5541     aSign = extractFloat64Sign( a );
5542
5543     if ( aExp == 0 ) {
5544         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5545         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5546     }
5547     if ( aSign ) {
5548         float_raise(float_flag_invalid, status);
5549         return float64_default_nan(status);
5550     }
5551     if ( aExp == 0x7FF ) {
5552         if (aSig) {
5553             return propagateFloat64NaN(a, float64_zero, status);
5554         }
5555         return a;
5556     }
5557
5558     aExp -= 0x3FF;
5559     aSig |= UINT64_C(0x0010000000000000);
5560     zSign = aExp < 0;
5561     zSig = (uint64_t)aExp << 52;
5562     for (i = 1LL << 51; i > 0; i >>= 1) {
5563         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5564         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5565         if ( aSig & UINT64_C(0x0020000000000000) ) {
5566             aSig >>= 1;
5567             zSig |= i;
5568         }
5569     }
5570
5571     if ( zSign )
5572         zSig = -zSig;
5573     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5574 }
5575
5576 /*----------------------------------------------------------------------------
5577 | Returns the result of converting the extended double-precision floating-
5578 | point value `a' to the 32-bit two's complement integer format.  The
5579 | conversion is performed according to the IEC/IEEE Standard for Binary
5580 | Floating-Point Arithmetic---which means in particular that the conversion
5581 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5582 | largest positive integer is returned.  Otherwise, if the conversion
5583 | overflows, the largest integer with the same sign as `a' is returned.
5584 *----------------------------------------------------------------------------*/
5585
5586 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5587 {
5588     bool aSign;
5589     int32_t aExp, shiftCount;
5590     uint64_t aSig;
5591
5592     if (floatx80_invalid_encoding(a)) {
5593         float_raise(float_flag_invalid, status);
5594         return 1 << 31;
5595     }
5596     aSig = extractFloatx80Frac( a );
5597     aExp = extractFloatx80Exp( a );
5598     aSign = extractFloatx80Sign( a );
5599     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5600     shiftCount = 0x4037 - aExp;
5601     if ( shiftCount <= 0 ) shiftCount = 1;
5602     shift64RightJamming( aSig, shiftCount, &aSig );
5603     return roundAndPackInt32(aSign, aSig, status);
5604
5605 }
5606
5607 /*----------------------------------------------------------------------------
5608 | Returns the result of converting the extended double-precision floating-
5609 | point value `a' to the 32-bit two's complement integer format.  The
5610 | conversion is performed according to the IEC/IEEE Standard for Binary
5611 | Floating-Point Arithmetic, except that the conversion is always rounded
5612 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5613 | Otherwise, if the conversion overflows, the largest integer with the same
5614 | sign as `a' is returned.
5615 *----------------------------------------------------------------------------*/
5616
5617 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5618 {
5619     bool aSign;
5620     int32_t aExp, shiftCount;
5621     uint64_t aSig, savedASig;
5622     int32_t z;
5623
5624     if (floatx80_invalid_encoding(a)) {
5625         float_raise(float_flag_invalid, status);
5626         return 1 << 31;
5627     }
5628     aSig = extractFloatx80Frac( a );
5629     aExp = extractFloatx80Exp( a );
5630     aSign = extractFloatx80Sign( a );
5631     if ( 0x401E < aExp ) {
5632         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5633         goto invalid;
5634     }
5635     else if ( aExp < 0x3FFF ) {
5636         if (aExp || aSig) {
5637             float_raise(float_flag_inexact, status);
5638         }
5639         return 0;
5640     }
5641     shiftCount = 0x403E - aExp;
5642     savedASig = aSig;
5643     aSig >>= shiftCount;
5644     z = aSig;
5645     if ( aSign ) z = - z;
5646     if ( ( z < 0 ) ^ aSign ) {
5647  invalid:
5648         float_raise(float_flag_invalid, status);
5649         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5650     }
5651     if ( ( aSig<<shiftCount ) != savedASig ) {
5652         float_raise(float_flag_inexact, status);
5653     }
5654     return z;
5655
5656 }
5657
5658 /*----------------------------------------------------------------------------
5659 | Returns the result of converting the extended double-precision floating-
5660 | point value `a' to the 64-bit two's complement integer format.  The
5661 | conversion is performed according to the IEC/IEEE Standard for Binary
5662 | Floating-Point Arithmetic---which means in particular that the conversion
5663 | is rounded according to the current rounding mode.  If `a' is a NaN,
5664 | the largest positive integer is returned.  Otherwise, if the conversion
5665 | overflows, the largest integer with the same sign as `a' is returned.
5666 *----------------------------------------------------------------------------*/
5667
5668 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5669 {
5670     bool aSign;
5671     int32_t aExp, shiftCount;
5672     uint64_t aSig, aSigExtra;
5673
5674     if (floatx80_invalid_encoding(a)) {
5675         float_raise(float_flag_invalid, status);
5676         return 1ULL << 63;
5677     }
5678     aSig = extractFloatx80Frac( a );
5679     aExp = extractFloatx80Exp( a );
5680     aSign = extractFloatx80Sign( a );
5681     shiftCount = 0x403E - aExp;
5682     if ( shiftCount <= 0 ) {
5683         if ( shiftCount ) {
5684             float_raise(float_flag_invalid, status);
5685             if (!aSign || floatx80_is_any_nan(a)) {
5686                 return INT64_MAX;
5687             }
5688             return INT64_MIN;
5689         }
5690         aSigExtra = 0;
5691     }
5692     else {
5693         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5694     }
5695     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5696
5697 }
5698
5699 /*----------------------------------------------------------------------------
5700 | Returns the result of converting the extended double-precision floating-
5701 | point value `a' to the 64-bit two's complement integer format.  The
5702 | conversion is performed according to the IEC/IEEE Standard for Binary
5703 | Floating-Point Arithmetic, except that the conversion is always rounded
5704 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5705 | Otherwise, if the conversion overflows, the largest integer with the same
5706 | sign as `a' is returned.
5707 *----------------------------------------------------------------------------*/
5708
5709 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5710 {
5711     bool aSign;
5712     int32_t aExp, shiftCount;
5713     uint64_t aSig;
5714     int64_t z;
5715
5716     if (floatx80_invalid_encoding(a)) {
5717         float_raise(float_flag_invalid, status);
5718         return 1ULL << 63;
5719     }
5720     aSig = extractFloatx80Frac( a );
5721     aExp = extractFloatx80Exp( a );
5722     aSign = extractFloatx80Sign( a );
5723     shiftCount = aExp - 0x403E;
5724     if ( 0 <= shiftCount ) {
5725         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5726         if ( ( a.high != 0xC03E ) || aSig ) {
5727             float_raise(float_flag_invalid, status);
5728             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5729                 return INT64_MAX;
5730             }
5731         }
5732         return INT64_MIN;
5733     }
5734     else if ( aExp < 0x3FFF ) {
5735         if (aExp | aSig) {
5736             float_raise(float_flag_inexact, status);
5737         }
5738         return 0;
5739     }
5740     z = aSig>>( - shiftCount );
5741     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5742         float_raise(float_flag_inexact, status);
5743     }
5744     if ( aSign ) z = - z;
5745     return z;
5746
5747 }
5748
5749 /*----------------------------------------------------------------------------
5750 | Returns the result of converting the extended double-precision floating-
5751 | point value `a' to the single-precision floating-point format.  The
5752 | conversion is performed according to the IEC/IEEE Standard for Binary
5753 | Floating-Point Arithmetic.
5754 *----------------------------------------------------------------------------*/
5755
5756 float32 floatx80_to_float32(floatx80 a, float_status *status)
5757 {
5758     bool aSign;
5759     int32_t aExp;
5760     uint64_t aSig;
5761
5762     if (floatx80_invalid_encoding(a)) {
5763         float_raise(float_flag_invalid, status);
5764         return float32_default_nan(status);
5765     }
5766     aSig = extractFloatx80Frac( a );
5767     aExp = extractFloatx80Exp( a );
5768     aSign = extractFloatx80Sign( a );
5769     if ( aExp == 0x7FFF ) {
5770         if ( (uint64_t) ( aSig<<1 ) ) {
5771             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5772                                              status);
5773             return float32_silence_nan(res, status);
5774         }
5775         return packFloat32( aSign, 0xFF, 0 );
5776     }
5777     shift64RightJamming( aSig, 33, &aSig );
5778     if ( aExp || aSig ) aExp -= 0x3F81;
5779     return roundAndPackFloat32(aSign, aExp, aSig, status);
5780
5781 }
5782
5783 /*----------------------------------------------------------------------------
5784 | Returns the result of converting the extended double-precision floating-
5785 | point value `a' to the double-precision floating-point format.  The
5786 | conversion is performed according to the IEC/IEEE Standard for Binary
5787 | Floating-Point Arithmetic.
5788 *----------------------------------------------------------------------------*/
5789
5790 float64 floatx80_to_float64(floatx80 a, float_status *status)
5791 {
5792     bool aSign;
5793     int32_t aExp;
5794     uint64_t aSig, zSig;
5795
5796     if (floatx80_invalid_encoding(a)) {
5797         float_raise(float_flag_invalid, status);
5798         return float64_default_nan(status);
5799     }
5800     aSig = extractFloatx80Frac( a );
5801     aExp = extractFloatx80Exp( a );
5802     aSign = extractFloatx80Sign( a );
5803     if ( aExp == 0x7FFF ) {
5804         if ( (uint64_t) ( aSig<<1 ) ) {
5805             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5806                                              status);
5807             return float64_silence_nan(res, status);
5808         }
5809         return packFloat64( aSign, 0x7FF, 0 );
5810     }
5811     shift64RightJamming( aSig, 1, &zSig );
5812     if ( aExp || aSig ) aExp -= 0x3C01;
5813     return roundAndPackFloat64(aSign, aExp, zSig, status);
5814
5815 }
5816
5817 /*----------------------------------------------------------------------------
5818 | Returns the result of converting the extended double-precision floating-
5819 | point value `a' to the quadruple-precision floating-point format.  The
5820 | conversion is performed according to the IEC/IEEE Standard for Binary
5821 | Floating-Point Arithmetic.
5822 *----------------------------------------------------------------------------*/
5823
5824 float128 floatx80_to_float128(floatx80 a, float_status *status)
5825 {
5826     bool aSign;
5827     int aExp;
5828     uint64_t aSig, zSig0, zSig1;
5829
5830     if (floatx80_invalid_encoding(a)) {
5831         float_raise(float_flag_invalid, status);
5832         return float128_default_nan(status);
5833     }
5834     aSig = extractFloatx80Frac( a );
5835     aExp = extractFloatx80Exp( a );
5836     aSign = extractFloatx80Sign( a );
5837     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5838         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5839                                            status);
5840         return float128_silence_nan(res, status);
5841     }
5842     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5843     return packFloat128( aSign, aExp, zSig0, zSig1 );
5844
5845 }
5846
5847 /*----------------------------------------------------------------------------
5848 | Rounds the extended double-precision floating-point value `a'
5849 | to the precision provided by floatx80_rounding_precision and returns the
5850 | result as an extended double-precision floating-point value.
5851 | The operation is performed according to the IEC/IEEE Standard for Binary
5852 | Floating-Point Arithmetic.
5853 *----------------------------------------------------------------------------*/
5854
5855 floatx80 floatx80_round(floatx80 a, float_status *status)
5856 {
5857     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5858                                 extractFloatx80Sign(a),
5859                                 extractFloatx80Exp(a),
5860                                 extractFloatx80Frac(a), 0, status);
5861 }
5862
5863 /*----------------------------------------------------------------------------
5864 | Rounds the extended double-precision floating-point value `a' to an integer,
5865 | and returns the result as an extended quadruple-precision floating-point
5866 | value.  The operation is performed according to the IEC/IEEE Standard for
5867 | Binary Floating-Point Arithmetic.
5868 *----------------------------------------------------------------------------*/
5869
5870 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5871 {
5872     bool aSign;
5873     int32_t aExp;
5874     uint64_t lastBitMask, roundBitsMask;
5875     floatx80 z;
5876
5877     if (floatx80_invalid_encoding(a)) {
5878         float_raise(float_flag_invalid, status);
5879         return floatx80_default_nan(status);
5880     }
5881     aExp = extractFloatx80Exp( a );
5882     if ( 0x403E <= aExp ) {
5883         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5884             return propagateFloatx80NaN(a, a, status);
5885         }
5886         return a;
5887     }
5888     if ( aExp < 0x3FFF ) {
5889         if (    ( aExp == 0 )
5890              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5891             return a;
5892         }
5893         float_raise(float_flag_inexact, status);
5894         aSign = extractFloatx80Sign( a );
5895         switch (status->float_rounding_mode) {
5896          case float_round_nearest_even:
5897             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5898                ) {
5899                 return
5900                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5901             }
5902             break;
5903         case float_round_ties_away:
5904             if (aExp == 0x3FFE) {
5905                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5906             }
5907             break;
5908          case float_round_down:
5909             return
5910                   aSign ?
5911                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5912                 : packFloatx80( 0, 0, 0 );
5913          case float_round_up:
5914             return
5915                   aSign ? packFloatx80( 1, 0, 0 )
5916                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5917
5918         case float_round_to_zero:
5919             break;
5920         default:
5921             g_assert_not_reached();
5922         }
5923         return packFloatx80( aSign, 0, 0 );
5924     }
5925     lastBitMask = 1;
5926     lastBitMask <<= 0x403E - aExp;
5927     roundBitsMask = lastBitMask - 1;
5928     z = a;
5929     switch (status->float_rounding_mode) {
5930     case float_round_nearest_even:
5931         z.low += lastBitMask>>1;
5932         if ((z.low & roundBitsMask) == 0) {
5933             z.low &= ~lastBitMask;
5934         }
5935         break;
5936     case float_round_ties_away:
5937         z.low += lastBitMask >> 1;
5938         break;
5939     case float_round_to_zero:
5940         break;
5941     case float_round_up:
5942         if (!extractFloatx80Sign(z)) {
5943             z.low += roundBitsMask;
5944         }
5945         break;
5946     case float_round_down:
5947         if (extractFloatx80Sign(z)) {
5948             z.low += roundBitsMask;
5949         }
5950         break;
5951     default:
5952         abort();
5953     }
5954     z.low &= ~ roundBitsMask;
5955     if ( z.low == 0 ) {
5956         ++z.high;
5957         z.low = UINT64_C(0x8000000000000000);
5958     }
5959     if (z.low != a.low) {
5960         float_raise(float_flag_inexact, status);
5961     }
5962     return z;
5963
5964 }
5965
5966 /*----------------------------------------------------------------------------
5967 | Returns the result of adding the absolute values of the extended double-
5968 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5969 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5970 | The addition is performed according to the IEC/IEEE Standard for Binary
5971 | Floating-Point Arithmetic.
5972 *----------------------------------------------------------------------------*/
5973
5974 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5975                                 float_status *status)
5976 {
5977     int32_t aExp, bExp, zExp;
5978     uint64_t aSig, bSig, zSig0, zSig1;
5979     int32_t expDiff;
5980
5981     aSig = extractFloatx80Frac( a );
5982     aExp = extractFloatx80Exp( a );
5983     bSig = extractFloatx80Frac( b );
5984     bExp = extractFloatx80Exp( b );
5985     expDiff = aExp - bExp;
5986     if ( 0 < expDiff ) {
5987         if ( aExp == 0x7FFF ) {
5988             if ((uint64_t)(aSig << 1)) {
5989                 return propagateFloatx80NaN(a, b, status);
5990             }
5991             return a;
5992         }
5993         if ( bExp == 0 ) --expDiff;
5994         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5995         zExp = aExp;
5996     }
5997     else if ( expDiff < 0 ) {
5998         if ( bExp == 0x7FFF ) {
5999             if ((uint64_t)(bSig << 1)) {
6000                 return propagateFloatx80NaN(a, b, status);
6001             }
6002             return packFloatx80(zSign,
6003                                 floatx80_infinity_high,
6004                                 floatx80_infinity_low);
6005         }
6006         if ( aExp == 0 ) ++expDiff;
6007         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6008         zExp = bExp;
6009     }
6010     else {
6011         if ( aExp == 0x7FFF ) {
6012             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6013                 return propagateFloatx80NaN(a, b, status);
6014             }
6015             return a;
6016         }
6017         zSig1 = 0;
6018         zSig0 = aSig + bSig;
6019         if ( aExp == 0 ) {
6020             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6021                 /* At least one of the values is a pseudo-denormal,
6022                  * and there is a carry out of the result.  */
6023                 zExp = 1;
6024                 goto shiftRight1;
6025             }
6026             if (zSig0 == 0) {
6027                 return packFloatx80(zSign, 0, 0);
6028             }
6029             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6030             goto roundAndPack;
6031         }
6032         zExp = aExp;
6033         goto shiftRight1;
6034     }
6035     zSig0 = aSig + bSig;
6036     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6037  shiftRight1:
6038     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6039     zSig0 |= UINT64_C(0x8000000000000000);
6040     ++zExp;
6041  roundAndPack:
6042     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6043                                 zSign, zExp, zSig0, zSig1, status);
6044 }
6045
6046 /*----------------------------------------------------------------------------
6047 | Returns the result of subtracting the absolute values of the extended
6048 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6049 | difference is negated before being returned.  `zSign' is ignored if the
6050 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6051 | Standard for Binary Floating-Point Arithmetic.
6052 *----------------------------------------------------------------------------*/
6053
6054 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6055                                 float_status *status)
6056 {
6057     int32_t aExp, bExp, zExp;
6058     uint64_t aSig, bSig, zSig0, zSig1;
6059     int32_t expDiff;
6060
6061     aSig = extractFloatx80Frac( a );
6062     aExp = extractFloatx80Exp( a );
6063     bSig = extractFloatx80Frac( b );
6064     bExp = extractFloatx80Exp( b );
6065     expDiff = aExp - bExp;
6066     if ( 0 < expDiff ) goto aExpBigger;
6067     if ( expDiff < 0 ) goto bExpBigger;
6068     if ( aExp == 0x7FFF ) {
6069         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6070             return propagateFloatx80NaN(a, b, status);
6071         }
6072         float_raise(float_flag_invalid, status);
6073         return floatx80_default_nan(status);
6074     }
6075     if ( aExp == 0 ) {
6076         aExp = 1;
6077         bExp = 1;
6078     }
6079     zSig1 = 0;
6080     if ( bSig < aSig ) goto aBigger;
6081     if ( aSig < bSig ) goto bBigger;
6082     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6083  bExpBigger:
6084     if ( bExp == 0x7FFF ) {
6085         if ((uint64_t)(bSig << 1)) {
6086             return propagateFloatx80NaN(a, b, status);
6087         }
6088         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6089                             floatx80_infinity_low);
6090     }
6091     if ( aExp == 0 ) ++expDiff;
6092     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6093  bBigger:
6094     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6095     zExp = bExp;
6096     zSign ^= 1;
6097     goto normalizeRoundAndPack;
6098  aExpBigger:
6099     if ( aExp == 0x7FFF ) {
6100         if ((uint64_t)(aSig << 1)) {
6101             return propagateFloatx80NaN(a, b, status);
6102         }
6103         return a;
6104     }
6105     if ( bExp == 0 ) --expDiff;
6106     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6107  aBigger:
6108     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6109     zExp = aExp;
6110  normalizeRoundAndPack:
6111     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6112                                          zSign, zExp, zSig0, zSig1, status);
6113 }
6114
6115 /*----------------------------------------------------------------------------
6116 | Returns the result of adding the extended double-precision floating-point
6117 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6118 | Standard for Binary Floating-Point Arithmetic.
6119 *----------------------------------------------------------------------------*/
6120
6121 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6122 {
6123     bool aSign, bSign;
6124
6125     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6126         float_raise(float_flag_invalid, status);
6127         return floatx80_default_nan(status);
6128     }
6129     aSign = extractFloatx80Sign( a );
6130     bSign = extractFloatx80Sign( b );
6131     if ( aSign == bSign ) {
6132         return addFloatx80Sigs(a, b, aSign, status);
6133     }
6134     else {
6135         return subFloatx80Sigs(a, b, aSign, status);
6136     }
6137
6138 }
6139
6140 /*----------------------------------------------------------------------------
6141 | Returns the result of subtracting the extended double-precision floating-
6142 | point values `a' and `b'.  The operation is performed according to the
6143 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6144 *----------------------------------------------------------------------------*/
6145
6146 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6147 {
6148     bool aSign, bSign;
6149
6150     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6151         float_raise(float_flag_invalid, status);
6152         return floatx80_default_nan(status);
6153     }
6154     aSign = extractFloatx80Sign( a );
6155     bSign = extractFloatx80Sign( b );
6156     if ( aSign == bSign ) {
6157         return subFloatx80Sigs(a, b, aSign, status);
6158     }
6159     else {
6160         return addFloatx80Sigs(a, b, aSign, status);
6161     }
6162
6163 }
6164
6165 /*----------------------------------------------------------------------------
6166 | Returns the result of multiplying the extended double-precision floating-
6167 | point values `a' and `b'.  The operation is performed according to the
6168 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6169 *----------------------------------------------------------------------------*/
6170
6171 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6172 {
6173     bool aSign, bSign, zSign;
6174     int32_t aExp, bExp, zExp;
6175     uint64_t aSig, bSig, zSig0, zSig1;
6176
6177     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6178         float_raise(float_flag_invalid, status);
6179         return floatx80_default_nan(status);
6180     }
6181     aSig = extractFloatx80Frac( a );
6182     aExp = extractFloatx80Exp( a );
6183     aSign = extractFloatx80Sign( a );
6184     bSig = extractFloatx80Frac( b );
6185     bExp = extractFloatx80Exp( b );
6186     bSign = extractFloatx80Sign( b );
6187     zSign = aSign ^ bSign;
6188     if ( aExp == 0x7FFF ) {
6189         if (    (uint64_t) ( aSig<<1 )
6190              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6191             return propagateFloatx80NaN(a, b, status);
6192         }
6193         if ( ( bExp | bSig ) == 0 ) goto invalid;
6194         return packFloatx80(zSign, floatx80_infinity_high,
6195                                    floatx80_infinity_low);
6196     }
6197     if ( bExp == 0x7FFF ) {
6198         if ((uint64_t)(bSig << 1)) {
6199             return propagateFloatx80NaN(a, b, status);
6200         }
6201         if ( ( aExp | aSig ) == 0 ) {
6202  invalid:
6203             float_raise(float_flag_invalid, status);
6204             return floatx80_default_nan(status);
6205         }
6206         return packFloatx80(zSign, floatx80_infinity_high,
6207                                    floatx80_infinity_low);
6208     }
6209     if ( aExp == 0 ) {
6210         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6211         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6212     }
6213     if ( bExp == 0 ) {
6214         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6215         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6216     }
6217     zExp = aExp + bExp - 0x3FFE;
6218     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6219     if ( 0 < (int64_t) zSig0 ) {
6220         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6221         --zExp;
6222     }
6223     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6224                                 zSign, zExp, zSig0, zSig1, status);
6225 }
6226
6227 /*----------------------------------------------------------------------------
6228 | Returns the result of dividing the extended double-precision floating-point
6229 | value `a' by the corresponding value `b'.  The operation is performed
6230 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6231 *----------------------------------------------------------------------------*/
6232
6233 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6234 {
6235     bool aSign, bSign, zSign;
6236     int32_t aExp, bExp, zExp;
6237     uint64_t aSig, bSig, zSig0, zSig1;
6238     uint64_t rem0, rem1, rem2, term0, term1, term2;
6239
6240     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6241         float_raise(float_flag_invalid, status);
6242         return floatx80_default_nan(status);
6243     }
6244     aSig = extractFloatx80Frac( a );
6245     aExp = extractFloatx80Exp( a );
6246     aSign = extractFloatx80Sign( a );
6247     bSig = extractFloatx80Frac( b );
6248     bExp = extractFloatx80Exp( b );
6249     bSign = extractFloatx80Sign( b );
6250     zSign = aSign ^ bSign;
6251     if ( aExp == 0x7FFF ) {
6252         if ((uint64_t)(aSig << 1)) {
6253             return propagateFloatx80NaN(a, b, status);
6254         }
6255         if ( bExp == 0x7FFF ) {
6256             if ((uint64_t)(bSig << 1)) {
6257                 return propagateFloatx80NaN(a, b, status);
6258             }
6259             goto invalid;
6260         }
6261         return packFloatx80(zSign, floatx80_infinity_high,
6262                                    floatx80_infinity_low);
6263     }
6264     if ( bExp == 0x7FFF ) {
6265         if ((uint64_t)(bSig << 1)) {
6266             return propagateFloatx80NaN(a, b, status);
6267         }
6268         return packFloatx80( zSign, 0, 0 );
6269     }
6270     if ( bExp == 0 ) {
6271         if ( bSig == 0 ) {
6272             if ( ( aExp | aSig ) == 0 ) {
6273  invalid:
6274                 float_raise(float_flag_invalid, status);
6275                 return floatx80_default_nan(status);
6276             }
6277             float_raise(float_flag_divbyzero, status);
6278             return packFloatx80(zSign, floatx80_infinity_high,
6279                                        floatx80_infinity_low);
6280         }
6281         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6282     }
6283     if ( aExp == 0 ) {
6284         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6285         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6286     }
6287     zExp = aExp - bExp + 0x3FFE;
6288     rem1 = 0;
6289     if ( bSig <= aSig ) {
6290         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6291         ++zExp;
6292     }
6293     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6294     mul64To128( bSig, zSig0, &term0, &term1 );
6295     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6296     while ( (int64_t) rem0 < 0 ) {
6297         --zSig0;
6298         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6299     }
6300     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6301     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6302         mul64To128( bSig, zSig1, &term1, &term2 );
6303         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6304         while ( (int64_t) rem1 < 0 ) {
6305             --zSig1;
6306             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6307         }
6308         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6309     }
6310     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6311                                 zSign, zExp, zSig0, zSig1, status);
6312 }
6313
6314 /*----------------------------------------------------------------------------
6315 | Returns the remainder of the extended double-precision floating-point value
6316 | `a' with respect to the corresponding value `b'.  The operation is performed
6317 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6318 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6319 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6320 | the absolute value of the integer quotient.
6321 *----------------------------------------------------------------------------*/
6322
6323 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6324                          float_status *status)
6325 {
6326     bool aSign, zSign;
6327     int32_t aExp, bExp, expDiff, aExpOrig;
6328     uint64_t aSig0, aSig1, bSig;
6329     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6330
6331     *quotient = 0;
6332     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6333         float_raise(float_flag_invalid, status);
6334         return floatx80_default_nan(status);
6335     }
6336     aSig0 = extractFloatx80Frac( a );
6337     aExpOrig = aExp = extractFloatx80Exp( a );
6338     aSign = extractFloatx80Sign( a );
6339     bSig = extractFloatx80Frac( b );
6340     bExp = extractFloatx80Exp( b );
6341     if ( aExp == 0x7FFF ) {
6342         if (    (uint64_t) ( aSig0<<1 )
6343              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6344             return propagateFloatx80NaN(a, b, status);
6345         }
6346         goto invalid;
6347     }
6348     if ( bExp == 0x7FFF ) {
6349         if ((uint64_t)(bSig << 1)) {
6350             return propagateFloatx80NaN(a, b, status);
6351         }
6352         if (aExp == 0 && aSig0 >> 63) {
6353             /*
6354              * Pseudo-denormal argument must be returned in normalized
6355              * form.
6356              */
6357             return packFloatx80(aSign, 1, aSig0);
6358         }
6359         return a;
6360     }
6361     if ( bExp == 0 ) {
6362         if ( bSig == 0 ) {
6363  invalid:
6364             float_raise(float_flag_invalid, status);
6365             return floatx80_default_nan(status);
6366         }
6367         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6368     }
6369     if ( aExp == 0 ) {
6370         if ( aSig0 == 0 ) return a;
6371         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6372     }
6373     zSign = aSign;
6374     expDiff = aExp - bExp;
6375     aSig1 = 0;
6376     if ( expDiff < 0 ) {
6377         if ( mod || expDiff < -1 ) {
6378             if (aExp == 1 && aExpOrig == 0) {
6379                 /*
6380                  * Pseudo-denormal argument must be returned in
6381                  * normalized form.
6382                  */
6383                 return packFloatx80(aSign, aExp, aSig0);
6384             }
6385             return a;
6386         }
6387         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6388         expDiff = 0;
6389     }
6390     *quotient = q = ( bSig <= aSig0 );
6391     if ( q ) aSig0 -= bSig;
6392     expDiff -= 64;
6393     while ( 0 < expDiff ) {
6394         q = estimateDiv128To64( aSig0, aSig1, bSig );
6395         q = ( 2 < q ) ? q - 2 : 0;
6396         mul64To128( bSig, q, &term0, &term1 );
6397         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6398         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6399         expDiff -= 62;
6400         *quotient <<= 62;
6401         *quotient += q;
6402     }
6403     expDiff += 64;
6404     if ( 0 < expDiff ) {
6405         q = estimateDiv128To64( aSig0, aSig1, bSig );
6406         q = ( 2 < q ) ? q - 2 : 0;
6407         q >>= 64 - expDiff;
6408         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6409         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6410         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6411         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6412             ++q;
6413             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6414         }
6415         if (expDiff < 64) {
6416             *quotient <<= expDiff;
6417         } else {
6418             *quotient = 0;
6419         }
6420         *quotient += q;
6421     }
6422     else {
6423         term1 = 0;
6424         term0 = bSig;
6425     }
6426     if (!mod) {
6427         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6428         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6429                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6430                         && ( q & 1 ) )
6431             ) {
6432             aSig0 = alternateASig0;
6433             aSig1 = alternateASig1;
6434             zSign = ! zSign;
6435             ++*quotient;
6436         }
6437     }
6438     return
6439         normalizeRoundAndPackFloatx80(
6440             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6441
6442 }
6443
6444 /*----------------------------------------------------------------------------
6445 | Returns the remainder of the extended double-precision floating-point value
6446 | `a' with respect to the corresponding value `b'.  The operation is performed
6447 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6448 *----------------------------------------------------------------------------*/
6449
6450 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6451 {
6452     uint64_t quotient;
6453     return floatx80_modrem(a, b, false, &quotient, status);
6454 }
6455
6456 /*----------------------------------------------------------------------------
6457 | Returns the remainder of the extended double-precision floating-point value
6458 | `a' with respect to the corresponding value `b', with the quotient truncated
6459 | toward zero.
6460 *----------------------------------------------------------------------------*/
6461
6462 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6463 {
6464     uint64_t quotient;
6465     return floatx80_modrem(a, b, true, &quotient, status);
6466 }
6467
6468 /*----------------------------------------------------------------------------
6469 | Returns the square root of the extended double-precision floating-point
6470 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6471 | for Binary Floating-Point Arithmetic.
6472 *----------------------------------------------------------------------------*/
6473
6474 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6475 {
6476     bool aSign;
6477     int32_t aExp, zExp;
6478     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6479     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6480
6481     if (floatx80_invalid_encoding(a)) {
6482         float_raise(float_flag_invalid, status);
6483         return floatx80_default_nan(status);
6484     }
6485     aSig0 = extractFloatx80Frac( a );
6486     aExp = extractFloatx80Exp( a );
6487     aSign = extractFloatx80Sign( a );
6488     if ( aExp == 0x7FFF ) {
6489         if ((uint64_t)(aSig0 << 1)) {
6490             return propagateFloatx80NaN(a, a, status);
6491         }
6492         if ( ! aSign ) return a;
6493         goto invalid;
6494     }
6495     if ( aSign ) {
6496         if ( ( aExp | aSig0 ) == 0 ) return a;
6497  invalid:
6498         float_raise(float_flag_invalid, status);
6499         return floatx80_default_nan(status);
6500     }
6501     if ( aExp == 0 ) {
6502         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6503         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6504     }
6505     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6506     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6507     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6508     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6509     doubleZSig0 = zSig0<<1;
6510     mul64To128( zSig0, zSig0, &term0, &term1 );
6511     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6512     while ( (int64_t) rem0 < 0 ) {
6513         --zSig0;
6514         doubleZSig0 -= 2;
6515         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6516     }
6517     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6518     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6519         if ( zSig1 == 0 ) zSig1 = 1;
6520         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6521         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6522         mul64To128( zSig1, zSig1, &term2, &term3 );
6523         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6524         while ( (int64_t) rem1 < 0 ) {
6525             --zSig1;
6526             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6527             term3 |= 1;
6528             term2 |= doubleZSig0;
6529             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6530         }
6531         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6532     }
6533     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6534     zSig0 |= doubleZSig0;
6535     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6536                                 0, zExp, zSig0, zSig1, status);
6537 }
6538
6539 /*----------------------------------------------------------------------------
6540 | Returns the result of converting the quadruple-precision floating-point value
6541 | `a' to the 64-bit unsigned integer format.  The conversion is
6542 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6543 | Arithmetic---which means in particular that the conversion is rounded
6544 | according to the current rounding mode.  If `a' is a NaN, the largest
6545 | positive integer is returned.  If the conversion overflows, the
6546 | largest unsigned integer is returned.  If 'a' is negative, the value is
6547 | rounded and zero is returned; negative values that do not round to zero
6548 | will raise the inexact exception.
6549 *----------------------------------------------------------------------------*/
6550
6551 uint64_t float128_to_uint64(float128 a, float_status *status)
6552 {
6553     bool aSign;
6554     int aExp;
6555     int shiftCount;
6556     uint64_t aSig0, aSig1;
6557
6558     aSig0 = extractFloat128Frac0(a);
6559     aSig1 = extractFloat128Frac1(a);
6560     aExp = extractFloat128Exp(a);
6561     aSign = extractFloat128Sign(a);
6562     if (aSign && (aExp > 0x3FFE)) {
6563         float_raise(float_flag_invalid, status);
6564         if (float128_is_any_nan(a)) {
6565             return UINT64_MAX;
6566         } else {
6567             return 0;
6568         }
6569     }
6570     if (aExp) {
6571         aSig0 |= UINT64_C(0x0001000000000000);
6572     }
6573     shiftCount = 0x402F - aExp;
6574     if (shiftCount <= 0) {
6575         if (0x403E < aExp) {
6576             float_raise(float_flag_invalid, status);
6577             return UINT64_MAX;
6578         }
6579         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6580     } else {
6581         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6582     }
6583     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6584 }
6585
6586 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6587 {
6588     uint64_t v;
6589     signed char current_rounding_mode = status->float_rounding_mode;
6590
6591     set_float_rounding_mode(float_round_to_zero, status);
6592     v = float128_to_uint64(a, status);
6593     set_float_rounding_mode(current_rounding_mode, status);
6594
6595     return v;
6596 }
6597
6598 /*----------------------------------------------------------------------------
6599 | Returns the result of converting the quadruple-precision floating-point
6600 | value `a' to the 32-bit unsigned integer format.  The conversion
6601 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6602 | Arithmetic except that the conversion is always rounded toward zero.
6603 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6604 | if the conversion overflows, the largest unsigned integer is returned.
6605 | If 'a' is negative, the value is rounded and zero is returned; negative
6606 | values that do not round to zero will raise the inexact exception.
6607 *----------------------------------------------------------------------------*/
6608
6609 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6610 {
6611     uint64_t v;
6612     uint32_t res;
6613     int old_exc_flags = get_float_exception_flags(status);
6614
6615     v = float128_to_uint64_round_to_zero(a, status);
6616     if (v > 0xffffffff) {
6617         res = 0xffffffff;
6618     } else {
6619         return v;
6620     }
6621     set_float_exception_flags(old_exc_flags, status);
6622     float_raise(float_flag_invalid, status);
6623     return res;
6624 }
6625
6626 /*----------------------------------------------------------------------------
6627 | Returns the result of converting the quadruple-precision floating-point value
6628 | `a' to the 32-bit unsigned integer format.  The conversion is
6629 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6630 | Arithmetic---which means in particular that the conversion is rounded
6631 | according to the current rounding mode.  If `a' is a NaN, the largest
6632 | positive integer is returned.  If the conversion overflows, the
6633 | largest unsigned integer is returned.  If 'a' is negative, the value is
6634 | rounded and zero is returned; negative values that do not round to zero
6635 | will raise the inexact exception.
6636 *----------------------------------------------------------------------------*/
6637
6638 uint32_t float128_to_uint32(float128 a, float_status *status)
6639 {
6640     uint64_t v;
6641     uint32_t res;
6642     int old_exc_flags = get_float_exception_flags(status);
6643
6644     v = float128_to_uint64(a, status);
6645     if (v > 0xffffffff) {
6646         res = 0xffffffff;
6647     } else {
6648         return v;
6649     }
6650     set_float_exception_flags(old_exc_flags, status);
6651     float_raise(float_flag_invalid, status);
6652     return res;
6653 }
6654
6655 /*----------------------------------------------------------------------------
6656 | Returns the result of converting the quadruple-precision floating-point
6657 | value `a' to the extended double-precision floating-point format.  The
6658 | conversion is performed according to the IEC/IEEE Standard for Binary
6659 | Floating-Point Arithmetic.
6660 *----------------------------------------------------------------------------*/
6661
6662 floatx80 float128_to_floatx80(float128 a, float_status *status)
6663 {
6664     bool aSign;
6665     int32_t aExp;
6666     uint64_t aSig0, aSig1;
6667
6668     aSig1 = extractFloat128Frac1( a );
6669     aSig0 = extractFloat128Frac0( a );
6670     aExp = extractFloat128Exp( a );
6671     aSign = extractFloat128Sign( a );
6672     if ( aExp == 0x7FFF ) {
6673         if ( aSig0 | aSig1 ) {
6674             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6675                                                status);
6676             return floatx80_silence_nan(res, status);
6677         }
6678         return packFloatx80(aSign, floatx80_infinity_high,
6679                                    floatx80_infinity_low);
6680     }
6681     if ( aExp == 0 ) {
6682         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6683         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6684     }
6685     else {
6686         aSig0 |= UINT64_C(0x0001000000000000);
6687     }
6688     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6689     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6690
6691 }
6692
6693 /*----------------------------------------------------------------------------
6694 | Returns the remainder of the quadruple-precision floating-point value `a'
6695 | with respect to the corresponding value `b'.  The operation is performed
6696 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6697 *----------------------------------------------------------------------------*/
6698
6699 float128 float128_rem(float128 a, float128 b, float_status *status)
6700 {
6701     bool aSign, zSign;
6702     int32_t aExp, bExp, expDiff;
6703     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6704     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6705     int64_t sigMean0;
6706
6707     aSig1 = extractFloat128Frac1( a );
6708     aSig0 = extractFloat128Frac0( a );
6709     aExp = extractFloat128Exp( a );
6710     aSign = extractFloat128Sign( a );
6711     bSig1 = extractFloat128Frac1( b );
6712     bSig0 = extractFloat128Frac0( b );
6713     bExp = extractFloat128Exp( b );
6714     if ( aExp == 0x7FFF ) {
6715         if (    ( aSig0 | aSig1 )
6716              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6717             return propagateFloat128NaN(a, b, status);
6718         }
6719         goto invalid;
6720     }
6721     if ( bExp == 0x7FFF ) {
6722         if (bSig0 | bSig1) {
6723             return propagateFloat128NaN(a, b, status);
6724         }
6725         return a;
6726     }
6727     if ( bExp == 0 ) {
6728         if ( ( bSig0 | bSig1 ) == 0 ) {
6729  invalid:
6730             float_raise(float_flag_invalid, status);
6731             return float128_default_nan(status);
6732         }
6733         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6734     }
6735     if ( aExp == 0 ) {
6736         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6737         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6738     }
6739     expDiff = aExp - bExp;
6740     if ( expDiff < -1 ) return a;
6741     shortShift128Left(
6742         aSig0 | UINT64_C(0x0001000000000000),
6743         aSig1,
6744         15 - ( expDiff < 0 ),
6745         &aSig0,
6746         &aSig1
6747     );
6748     shortShift128Left(
6749         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6750     q = le128( bSig0, bSig1, aSig0, aSig1 );
6751     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6752     expDiff -= 64;
6753     while ( 0 < expDiff ) {
6754         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6755         q = ( 4 < q ) ? q - 4 : 0;
6756         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6757         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6758         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6759         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6760         expDiff -= 61;
6761     }
6762     if ( -64 < expDiff ) {
6763         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6764         q = ( 4 < q ) ? q - 4 : 0;
6765         q >>= - expDiff;
6766         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6767         expDiff += 52;
6768         if ( expDiff < 0 ) {
6769             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6770         }
6771         else {
6772             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6773         }
6774         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6775         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6776     }
6777     else {
6778         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6779         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6780     }
6781     do {
6782         alternateASig0 = aSig0;
6783         alternateASig1 = aSig1;
6784         ++q;
6785         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6786     } while ( 0 <= (int64_t) aSig0 );
6787     add128(
6788         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6789     if (    ( sigMean0 < 0 )
6790          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6791         aSig0 = alternateASig0;
6792         aSig1 = alternateASig1;
6793     }
6794     zSign = ( (int64_t) aSig0 < 0 );
6795     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6796     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6797                                          status);
6798 }
6799
6800 /*----------------------------------------------------------------------------
6801 | Returns the square root of the quadruple-precision floating-point value `a'.
6802 | The operation is performed according to the IEC/IEEE Standard for Binary
6803 | Floating-Point Arithmetic.
6804 *----------------------------------------------------------------------------*/
6805
6806 float128 float128_sqrt(float128 a, float_status *status)
6807 {
6808     bool aSign;
6809     int32_t aExp, zExp;
6810     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6811     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6812
6813     aSig1 = extractFloat128Frac1( a );
6814     aSig0 = extractFloat128Frac0( a );
6815     aExp = extractFloat128Exp( a );
6816     aSign = extractFloat128Sign( a );
6817     if ( aExp == 0x7FFF ) {
6818         if (aSig0 | aSig1) {
6819             return propagateFloat128NaN(a, a, status);
6820         }
6821         if ( ! aSign ) return a;
6822         goto invalid;
6823     }
6824     if ( aSign ) {
6825         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6826  invalid:
6827         float_raise(float_flag_invalid, status);
6828         return float128_default_nan(status);
6829     }
6830     if ( aExp == 0 ) {
6831         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6832         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6833     }
6834     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6835     aSig0 |= UINT64_C(0x0001000000000000);
6836     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6837     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6838     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6839     doubleZSig0 = zSig0<<1;
6840     mul64To128( zSig0, zSig0, &term0, &term1 );
6841     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6842     while ( (int64_t) rem0 < 0 ) {
6843         --zSig0;
6844         doubleZSig0 -= 2;
6845         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6846     }
6847     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6848     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6849         if ( zSig1 == 0 ) zSig1 = 1;
6850         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6851         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6852         mul64To128( zSig1, zSig1, &term2, &term3 );
6853         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6854         while ( (int64_t) rem1 < 0 ) {
6855             --zSig1;
6856             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6857             term3 |= 1;
6858             term2 |= doubleZSig0;
6859             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6860         }
6861         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6862     }
6863     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6864     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6865
6866 }
6867
6868 static inline FloatRelation
6869 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6870                           float_status *status)
6871 {
6872     bool aSign, bSign;
6873
6874     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6875         float_raise(float_flag_invalid, status);
6876         return float_relation_unordered;
6877     }
6878     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6879           ( extractFloatx80Frac( a )<<1 ) ) ||
6880         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6881           ( extractFloatx80Frac( b )<<1 ) )) {
6882         if (!is_quiet ||
6883             floatx80_is_signaling_nan(a, status) ||
6884             floatx80_is_signaling_nan(b, status)) {
6885             float_raise(float_flag_invalid, status);
6886         }
6887         return float_relation_unordered;
6888     }
6889     aSign = extractFloatx80Sign( a );
6890     bSign = extractFloatx80Sign( b );
6891     if ( aSign != bSign ) {
6892
6893         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6894              ( ( a.low | b.low ) == 0 ) ) {
6895             /* zero case */
6896             return float_relation_equal;
6897         } else {
6898             return 1 - (2 * aSign);
6899         }
6900     } else {
6901         /* Normalize pseudo-denormals before comparison.  */
6902         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6903             ++a.high;
6904         }
6905         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6906             ++b.high;
6907         }
6908         if (a.low == b.low && a.high == b.high) {
6909             return float_relation_equal;
6910         } else {
6911             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6912         }
6913     }
6914 }
6915
6916 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6917 {
6918     return floatx80_compare_internal(a, b, 0, status);
6919 }
6920
6921 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6922                                      float_status *status)
6923 {
6924     return floatx80_compare_internal(a, b, 1, status);
6925 }
6926
6927 static inline FloatRelation
6928 float128_compare_internal(float128 a, float128 b, bool is_quiet,
6929                           float_status *status)
6930 {
6931     bool aSign, bSign;
6932
6933     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6934           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6935         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6936           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6937         if (!is_quiet ||
6938             float128_is_signaling_nan(a, status) ||
6939             float128_is_signaling_nan(b, status)) {
6940             float_raise(float_flag_invalid, status);
6941         }
6942         return float_relation_unordered;
6943     }
6944     aSign = extractFloat128Sign( a );
6945     bSign = extractFloat128Sign( b );
6946     if ( aSign != bSign ) {
6947         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6948             /* zero case */
6949             return float_relation_equal;
6950         } else {
6951             return 1 - (2 * aSign);
6952         }
6953     } else {
6954         if (a.low == b.low && a.high == b.high) {
6955             return float_relation_equal;
6956         } else {
6957             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6958         }
6959     }
6960 }
6961
6962 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
6963 {
6964     return float128_compare_internal(a, b, 0, status);
6965 }
6966
6967 FloatRelation float128_compare_quiet(float128 a, float128 b,
6968                                      float_status *status)
6969 {
6970     return float128_compare_internal(a, b, 1, status);
6971 }
6972
6973 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6974 {
6975     bool aSign;
6976     int32_t aExp;
6977     uint64_t aSig;
6978
6979     if (floatx80_invalid_encoding(a)) {
6980         float_raise(float_flag_invalid, status);
6981         return floatx80_default_nan(status);
6982     }
6983     aSig = extractFloatx80Frac( a );
6984     aExp = extractFloatx80Exp( a );
6985     aSign = extractFloatx80Sign( a );
6986
6987     if ( aExp == 0x7FFF ) {
6988         if ( aSig<<1 ) {
6989             return propagateFloatx80NaN(a, a, status);
6990         }
6991         return a;
6992     }
6993
6994     if (aExp == 0) {
6995         if (aSig == 0) {
6996             return a;
6997         }
6998         aExp++;
6999     }
7000
7001     if (n > 0x10000) {
7002         n = 0x10000;
7003     } else if (n < -0x10000) {
7004         n = -0x10000;
7005     }
7006
7007     aExp += n;
7008     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7009                                          aSign, aExp, aSig, 0, status);
7010 }
7011
7012 float128 float128_scalbn(float128 a, int n, float_status *status)
7013 {
7014     bool aSign;
7015     int32_t aExp;
7016     uint64_t aSig0, aSig1;
7017
7018     aSig1 = extractFloat128Frac1( a );
7019     aSig0 = extractFloat128Frac0( a );
7020     aExp = extractFloat128Exp( a );
7021     aSign = extractFloat128Sign( a );
7022     if ( aExp == 0x7FFF ) {
7023         if ( aSig0 | aSig1 ) {
7024             return propagateFloat128NaN(a, a, status);
7025         }
7026         return a;
7027     }
7028     if (aExp != 0) {
7029         aSig0 |= UINT64_C(0x0001000000000000);
7030     } else if (aSig0 == 0 && aSig1 == 0) {
7031         return a;
7032     } else {
7033         aExp++;
7034     }
7035
7036     if (n > 0x10000) {
7037         n = 0x10000;
7038     } else if (n < -0x10000) {
7039         n = -0x10000;
7040     }
7041
7042     aExp += n - 1;
7043     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7044                                          , status);
7045
7046 }
7047
7048 static void __attribute__((constructor)) softfloat_init(void)
7049 {
7050     union_float64 ua, ub, uc, ur;
7051
7052     if (QEMU_NO_HARDFLOAT) {
7053         return;
7054     }
7055     /*
7056      * Test that the host's FMA is not obviously broken. For example,
7057      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7058      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7059      */
7060     ua.s = 0x0020000000000001ULL;
7061     ub.s = 0x3ca0000000000000ULL;
7062     uc.s = 0x0020000000000000ULL;
7063     ur.h = fma(ua.h, ub.h, uc.h);
7064     if (ur.s != 0x0020000000000001ULL) {
7065         force_soft_fma = true;
7066     }
7067 }