Merge tag 'qemu-macppc-20230206' of https://github.com/mcayland/qemu into staging
[qemu.git] / fpu / softfloat.c
blobc7454c3eb1a17e5348f6b63584f5558124725276
1 /*
2 * QEMU float support
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
100 * Hardfloat
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
235 static inline bool can_use_fpu(const float_status *s)
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
326 static inline bool f32_is_inf(union_float32 a)
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
331 return float32_is_infinity(a.s);
334 static inline bool f64_is_inf(union_float64 a)
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
339 return float64_is_infinity(a.s);
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
347 union_float32 ua, ub, ur;
349 ua.s = xa;
350 ub.s = xb;
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
367 return ur.s;
369 soft:
370 return soft(ua.s, ub.s, s);
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
378 union_float64 ua, ub, ur;
380 ua.s = xa;
381 ub.s = xb;
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
398 return ur.s;
400 soft:
401 return soft(ua.s, ub.s, s);
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
409 typedef enum __attribute__ ((__packed__)) {
410 float_class_unclassified,
411 float_class_zero,
412 float_class_normal,
413 float_class_inf,
414 float_class_qnan, /* all NaNs from here */
415 float_class_snan,
416 } FloatClass;
418 #define float_cmask(bit) (1u << (bit))
420 enum {
421 float_cmask_zero = float_cmask(float_class_zero),
422 float_cmask_normal = float_cmask(float_class_normal),
423 float_cmask_inf = float_cmask(float_class_inf),
424 float_cmask_qnan = float_cmask(float_class_qnan),
425 float_cmask_snan = float_cmask(float_class_snan),
427 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
431 /* Flags for parts_minmax. */
432 enum {
433 /* Set for minimum; clear for maximum. */
434 minmax_ismin = 1,
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436 minmax_isnum = 2,
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438 minmax_ismag = 4,
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441 * operations.
443 minmax_isnumber = 8,
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
449 return unlikely(c >= float_class_qnan);
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
454 return c == float_class_snan;
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
459 return c == float_class_qnan;
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
471 typedef struct {
472 FloatClass cls;
473 bool sign;
474 int32_t exp;
475 union {
476 /* Routines that know the structure may reference the singular name. */
477 uint64_t frac;
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
483 uint64_t frac_hi;
484 uint64_t frac_lo;
486 } FloatParts64;
488 typedef struct {
489 FloatClass cls;
490 bool sign;
491 int32_t exp;
492 uint64_t frac_hi;
493 uint64_t frac_lo;
494 } FloatParts128;
496 typedef struct {
497 FloatClass cls;
498 bool sign;
499 int32_t exp;
500 uint64_t frac_hi;
501 uint64_t frac_hm; /* high-middle */
502 uint64_t frac_lm; /* low-middle */
503 uint64_t frac_lo;
504 } FloatParts256;
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT 63
508 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * round_mask: bits below lsb which must be rounded
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
521 typedef struct {
522 int exp_size;
523 int exp_bias;
524 int exp_re_bias;
525 int exp_max;
526 int frac_size;
527 int frac_shift;
528 bool arm_althp;
529 uint64_t round_mask;
530 } FloatFmt;
532 /* Expand fields based on the size of exponent and fraction */
533 #define FLOAT_PARAMS_(E) \
534 .exp_size = E, \
535 .exp_bias = ((1 << E) - 1) >> 1, \
536 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \
537 .exp_max = (1 << E) - 1
539 #define FLOAT_PARAMS(E, F) \
540 FLOAT_PARAMS_(E), \
541 .frac_size = F, \
542 .frac_shift = (-F - 1) & 63, \
543 .round_mask = (1ull << ((-F - 1) & 63)) - 1
545 static const FloatFmt float16_params = {
546 FLOAT_PARAMS(5, 10)
549 static const FloatFmt float16_params_ahp = {
550 FLOAT_PARAMS(5, 10),
551 .arm_althp = true
554 static const FloatFmt bfloat16_params = {
555 FLOAT_PARAMS(8, 7)
558 static const FloatFmt float32_params = {
559 FLOAT_PARAMS(8, 23)
562 static const FloatFmt float64_params = {
563 FLOAT_PARAMS(11, 52)
566 static const FloatFmt float128_params = {
567 FLOAT_PARAMS(15, 112)
570 #define FLOATX80_PARAMS(R) \
571 FLOAT_PARAMS_(15), \
572 .frac_size = R == 64 ? 63 : R, \
573 .frac_shift = 0, \
574 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
576 static const FloatFmt floatx80_params[3] = {
577 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
578 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
579 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
582 /* Unpack a float to parts, but do not canonicalize. */
583 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
585 const int f_size = fmt->frac_size;
586 const int e_size = fmt->exp_size;
588 *r = (FloatParts64) {
589 .cls = float_class_unclassified,
590 .sign = extract64(raw, f_size + e_size, 1),
591 .exp = extract64(raw, f_size, e_size),
592 .frac = extract64(raw, 0, f_size)
596 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
598 unpack_raw64(p, &float16_params, f);
601 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
603 unpack_raw64(p, &bfloat16_params, f);
606 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
608 unpack_raw64(p, &float32_params, f);
611 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
613 unpack_raw64(p, &float64_params, f);
616 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
618 *p = (FloatParts128) {
619 .cls = float_class_unclassified,
620 .sign = extract32(f.high, 15, 1),
621 .exp = extract32(f.high, 0, 15),
622 .frac_hi = f.low
626 static void float128_unpack_raw(FloatParts128 *p, float128 f)
628 const int f_size = float128_params.frac_size - 64;
629 const int e_size = float128_params.exp_size;
631 *p = (FloatParts128) {
632 .cls = float_class_unclassified,
633 .sign = extract64(f.high, f_size + e_size, 1),
634 .exp = extract64(f.high, f_size, e_size),
635 .frac_hi = extract64(f.high, 0, f_size),
636 .frac_lo = f.low,
640 /* Pack a float from parts, but do not canonicalize. */
641 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
643 const int f_size = fmt->frac_size;
644 const int e_size = fmt->exp_size;
645 uint64_t ret;
647 ret = (uint64_t)p->sign << (f_size + e_size);
648 ret = deposit64(ret, f_size, e_size, p->exp);
649 ret = deposit64(ret, 0, f_size, p->frac);
650 return ret;
653 static inline float16 float16_pack_raw(const FloatParts64 *p)
655 return make_float16(pack_raw64(p, &float16_params));
658 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
660 return pack_raw64(p, &bfloat16_params);
663 static inline float32 float32_pack_raw(const FloatParts64 *p)
665 return make_float32(pack_raw64(p, &float32_params));
668 static inline float64 float64_pack_raw(const FloatParts64 *p)
670 return make_float64(pack_raw64(p, &float64_params));
673 static float128 float128_pack_raw(const FloatParts128 *p)
675 const int f_size = float128_params.frac_size - 64;
676 const int e_size = float128_params.exp_size;
677 uint64_t hi;
679 hi = (uint64_t)p->sign << (f_size + e_size);
680 hi = deposit64(hi, f_size, e_size, p->exp);
681 hi = deposit64(hi, 0, f_size, p->frac_hi);
682 return make_float128(hi, p->frac_lo);
685 /*----------------------------------------------------------------------------
686 | Functions and definitions to determine: (1) whether tininess for underflow
687 | is detected before or after rounding by default, (2) what (if anything)
688 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
689 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
690 | are propagated from function inputs to output. These details are target-
691 | specific.
692 *----------------------------------------------------------------------------*/
693 #include "softfloat-specialize.c.inc"
695 #define PARTS_GENERIC_64_128(NAME, P) \
696 _Generic((P), FloatParts64 *: parts64_##NAME, \
697 FloatParts128 *: parts128_##NAME)
699 #define PARTS_GENERIC_64_128_256(NAME, P) \
700 _Generic((P), FloatParts64 *: parts64_##NAME, \
701 FloatParts128 *: parts128_##NAME, \
702 FloatParts256 *: parts256_##NAME)
704 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
705 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
707 static void parts64_return_nan(FloatParts64 *a, float_status *s);
708 static void parts128_return_nan(FloatParts128 *a, float_status *s);
710 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
712 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
713 float_status *s);
714 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
715 float_status *s);
717 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
719 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
720 FloatParts64 *c, float_status *s,
721 int ab_mask, int abc_mask);
722 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
723 FloatParts128 *b,
724 FloatParts128 *c,
725 float_status *s,
726 int ab_mask, int abc_mask);
728 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
729 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
731 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
732 const FloatFmt *fmt);
733 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
734 const FloatFmt *fmt);
736 #define parts_canonicalize(A, S, F) \
737 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
739 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
740 const FloatFmt *fmt);
741 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
742 const FloatFmt *fmt);
744 #define parts_uncanon_normal(A, S, F) \
745 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
747 static void parts64_uncanon(FloatParts64 *p, float_status *status,
748 const FloatFmt *fmt);
749 static void parts128_uncanon(FloatParts128 *p, float_status *status,
750 const FloatFmt *fmt);
752 #define parts_uncanon(A, S, F) \
753 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
755 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
756 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
757 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
759 #define parts_add_normal(A, B) \
760 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
762 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
763 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
764 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
766 #define parts_sub_normal(A, B) \
767 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
769 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
770 float_status *s, bool subtract);
771 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
772 float_status *s, bool subtract);
774 #define parts_addsub(A, B, S, Z) \
775 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
777 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
778 float_status *s);
779 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
780 float_status *s);
782 #define parts_mul(A, B, S) \
783 PARTS_GENERIC_64_128(mul, A)(A, B, S)
785 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
786 FloatParts64 *c, int flags,
787 float_status *s);
788 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
789 FloatParts128 *c, int flags,
790 float_status *s);
792 #define parts_muladd(A, B, C, Z, S) \
793 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
795 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
796 float_status *s);
797 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
798 float_status *s);
800 #define parts_div(A, B, S) \
801 PARTS_GENERIC_64_128(div, A)(A, B, S)
803 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
804 uint64_t *mod_quot, float_status *s);
805 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
806 uint64_t *mod_quot, float_status *s);
808 #define parts_modrem(A, B, Q, S) \
809 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
811 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
812 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
814 #define parts_sqrt(A, S, F) \
815 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
817 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
818 int scale, int frac_size);
819 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
820 int scale, int frac_size);
822 #define parts_round_to_int_normal(A, R, C, F) \
823 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
825 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
826 int scale, float_status *s,
827 const FloatFmt *fmt);
828 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
829 int scale, float_status *s,
830 const FloatFmt *fmt);
832 #define parts_round_to_int(A, R, C, S, F) \
833 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
835 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
836 int scale, int64_t min, int64_t max,
837 float_status *s);
838 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
839 int scale, int64_t min, int64_t max,
840 float_status *s);
842 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
843 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
845 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
846 int scale, uint64_t max,
847 float_status *s);
848 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
849 int scale, uint64_t max,
850 float_status *s);
852 #define parts_float_to_uint(P, R, Z, M, S) \
853 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
855 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
856 int scale, float_status *s);
857 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
858 int scale, float_status *s);
860 #define parts_sint_to_float(P, I, Z, S) \
861 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
863 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
864 int scale, float_status *s);
865 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
866 int scale, float_status *s);
868 #define parts_uint_to_float(P, I, Z, S) \
869 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
871 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
872 float_status *s, int flags);
873 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
874 float_status *s, int flags);
876 #define parts_minmax(A, B, S, F) \
877 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
879 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
880 float_status *s, bool q);
881 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
882 float_status *s, bool q);
884 #define parts_compare(A, B, S, Q) \
885 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
887 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
888 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
890 #define parts_scalbn(A, N, S) \
891 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
893 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
894 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
896 #define parts_log2(A, S, F) \
897 PARTS_GENERIC_64_128(log2, A)(A, S, F)
900 * Helper functions for softfloat-parts.c.inc, per-size operations.
903 #define FRAC_GENERIC_64_128(NAME, P) \
904 _Generic((P), FloatParts64 *: frac64_##NAME, \
905 FloatParts128 *: frac128_##NAME)
907 #define FRAC_GENERIC_64_128_256(NAME, P) \
908 _Generic((P), FloatParts64 *: frac64_##NAME, \
909 FloatParts128 *: frac128_##NAME, \
910 FloatParts256 *: frac256_##NAME)
912 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
914 return uadd64_overflow(a->frac, b->frac, &r->frac);
917 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
919 bool c = 0;
920 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
921 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
922 return c;
925 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
927 bool c = 0;
928 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
929 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
930 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
931 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
932 return c;
935 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
937 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
939 return uadd64_overflow(a->frac, c, &r->frac);
942 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
944 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
945 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
948 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
950 static void frac64_allones(FloatParts64 *a)
952 a->frac = -1;
955 static void frac128_allones(FloatParts128 *a)
957 a->frac_hi = a->frac_lo = -1;
960 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
962 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
964 return (a->frac == b->frac ? float_relation_equal
965 : a->frac < b->frac ? float_relation_less
966 : float_relation_greater);
969 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
971 uint64_t ta = a->frac_hi, tb = b->frac_hi;
972 if (ta == tb) {
973 ta = a->frac_lo, tb = b->frac_lo;
974 if (ta == tb) {
975 return float_relation_equal;
978 return ta < tb ? float_relation_less : float_relation_greater;
981 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
983 static void frac64_clear(FloatParts64 *a)
985 a->frac = 0;
988 static void frac128_clear(FloatParts128 *a)
990 a->frac_hi = a->frac_lo = 0;
993 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
995 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
997 uint64_t n1, n0, r, q;
998 bool ret;
1001 * We want a 2*N / N-bit division to produce exactly an N-bit
1002 * result, so that we do not lose any precision and so that we
1003 * do not have to renormalize afterward. If A.frac < B.frac,
1004 * then division would produce an (N-1)-bit result; shift A left
1005 * by one to produce the an N-bit result, and return true to
1006 * decrement the exponent to match.
1008 * The udiv_qrnnd algorithm that we're using requires normalization,
1009 * i.e. the msb of the denominator must be set, which is already true.
1011 ret = a->frac < b->frac;
1012 if (ret) {
1013 n0 = a->frac;
1014 n1 = 0;
1015 } else {
1016 n0 = a->frac >> 1;
1017 n1 = a->frac << 63;
1019 q = udiv_qrnnd(&r, n0, n1, b->frac);
1021 /* Set lsb if there is a remainder, to set inexact. */
1022 a->frac = q | (r != 0);
1024 return ret;
1027 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1029 uint64_t q0, q1, a0, a1, b0, b1;
1030 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1031 bool ret = false;
1033 a0 = a->frac_hi, a1 = a->frac_lo;
1034 b0 = b->frac_hi, b1 = b->frac_lo;
1036 ret = lt128(a0, a1, b0, b1);
1037 if (!ret) {
1038 a1 = shr_double(a0, a1, 1);
1039 a0 = a0 >> 1;
1042 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1043 q0 = estimateDiv128To64(a0, a1, b0);
1046 * Estimate is high because B1 was not included (unless B1 == 0).
1047 * Reduce quotient and increase remainder until remainder is non-negative.
1048 * This loop will execute 0 to 2 times.
1050 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1051 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1052 while (r0 != 0) {
1053 q0--;
1054 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1057 /* Repeat using the remainder, producing a second word of quotient. */
1058 q1 = estimateDiv128To64(r1, r2, b0);
1059 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1060 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1061 while (r1 != 0) {
1062 q1--;
1063 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1066 /* Any remainder indicates inexact; set sticky bit. */
1067 q1 |= (r2 | r3) != 0;
1069 a->frac_hi = q0;
1070 a->frac_lo = q1;
1071 return ret;
1074 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1076 static bool frac64_eqz(FloatParts64 *a)
1078 return a->frac == 0;
1081 static bool frac128_eqz(FloatParts128 *a)
1083 return (a->frac_hi | a->frac_lo) == 0;
1086 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1088 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1090 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1093 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1095 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1096 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1099 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1101 static void frac64_neg(FloatParts64 *a)
1103 a->frac = -a->frac;
1106 static void frac128_neg(FloatParts128 *a)
1108 bool c = 0;
1109 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1110 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1113 static void frac256_neg(FloatParts256 *a)
1115 bool c = 0;
1116 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1117 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1118 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1119 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1122 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1124 static int frac64_normalize(FloatParts64 *a)
1126 if (a->frac) {
1127 int shift = clz64(a->frac);
1128 a->frac <<= shift;
1129 return shift;
1131 return 64;
1134 static int frac128_normalize(FloatParts128 *a)
1136 if (a->frac_hi) {
1137 int shl = clz64(a->frac_hi);
1138 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1139 a->frac_lo <<= shl;
1140 return shl;
1141 } else if (a->frac_lo) {
1142 int shl = clz64(a->frac_lo);
1143 a->frac_hi = a->frac_lo << shl;
1144 a->frac_lo = 0;
1145 return shl + 64;
1147 return 128;
1150 static int frac256_normalize(FloatParts256 *a)
1152 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1153 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1154 int ret, shl;
1156 if (likely(a0)) {
1157 shl = clz64(a0);
1158 if (shl == 0) {
1159 return 0;
1161 ret = shl;
1162 } else {
1163 if (a1) {
1164 ret = 64;
1165 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1166 } else if (a2) {
1167 ret = 128;
1168 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1169 } else if (a3) {
1170 ret = 192;
1171 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1172 } else {
1173 ret = 256;
1174 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1175 goto done;
1177 shl = clz64(a0);
1178 if (shl == 0) {
1179 goto done;
1181 ret += shl;
1184 a0 = shl_double(a0, a1, shl);
1185 a1 = shl_double(a1, a2, shl);
1186 a2 = shl_double(a2, a3, shl);
1187 a3 <<= shl;
1189 done:
1190 a->frac_hi = a0;
1191 a->frac_hm = a1;
1192 a->frac_lm = a2;
1193 a->frac_lo = a3;
1194 return ret;
1197 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1199 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1201 uint64_t a0, a1, b0, t0, t1, q, quot;
1202 int exp_diff = a->exp - b->exp;
1203 int shift;
1205 a0 = a->frac;
1206 a1 = 0;
1208 if (exp_diff < -1) {
1209 if (mod_quot) {
1210 *mod_quot = 0;
1212 return;
1214 if (exp_diff == -1) {
1215 a0 >>= 1;
1216 exp_diff = 0;
1219 b0 = b->frac;
1220 quot = q = b0 <= a0;
1221 if (q) {
1222 a0 -= b0;
1225 exp_diff -= 64;
1226 while (exp_diff > 0) {
1227 q = estimateDiv128To64(a0, a1, b0);
1228 q = q > 2 ? q - 2 : 0;
1229 mul64To128(b0, q, &t0, &t1);
1230 sub128(a0, a1, t0, t1, &a0, &a1);
1231 shortShift128Left(a0, a1, 62, &a0, &a1);
1232 exp_diff -= 62;
1233 quot = (quot << 62) + q;
1236 exp_diff += 64;
1237 if (exp_diff > 0) {
1238 q = estimateDiv128To64(a0, a1, b0);
1239 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1240 mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1241 sub128(a0, a1, t0, t1, &a0, &a1);
1242 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1243 while (le128(t0, t1, a0, a1)) {
1244 ++q;
1245 sub128(a0, a1, t0, t1, &a0, &a1);
1247 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1248 } else {
1249 t0 = b0;
1250 t1 = 0;
1253 if (mod_quot) {
1254 *mod_quot = quot;
1255 } else {
1256 sub128(t0, t1, a0, a1, &t0, &t1);
1257 if (lt128(t0, t1, a0, a1) ||
1258 (eq128(t0, t1, a0, a1) && (q & 1))) {
1259 a0 = t0;
1260 a1 = t1;
1261 a->sign = !a->sign;
1265 if (likely(a0)) {
1266 shift = clz64(a0);
1267 shortShift128Left(a0, a1, shift, &a0, &a1);
1268 } else if (likely(a1)) {
1269 shift = clz64(a1);
1270 a0 = a1 << shift;
1271 a1 = 0;
1272 shift += 64;
1273 } else {
1274 a->cls = float_class_zero;
1275 return;
1278 a->exp = b->exp + exp_diff - shift;
1279 a->frac = a0 | (a1 != 0);
1282 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1283 uint64_t *mod_quot)
1285 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1286 int exp_diff = a->exp - b->exp;
1287 int shift;
1289 a0 = a->frac_hi;
1290 a1 = a->frac_lo;
1291 a2 = 0;
1293 if (exp_diff < -1) {
1294 if (mod_quot) {
1295 *mod_quot = 0;
1297 return;
1299 if (exp_diff == -1) {
1300 shift128Right(a0, a1, 1, &a0, &a1);
1301 exp_diff = 0;
1304 b0 = b->frac_hi;
1305 b1 = b->frac_lo;
1307 quot = q = le128(b0, b1, a0, a1);
1308 if (q) {
1309 sub128(a0, a1, b0, b1, &a0, &a1);
1312 exp_diff -= 64;
1313 while (exp_diff > 0) {
1314 q = estimateDiv128To64(a0, a1, b0);
1315 q = q > 4 ? q - 4 : 0;
1316 mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1317 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1318 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1319 exp_diff -= 61;
1320 quot = (quot << 61) + q;
1323 exp_diff += 64;
1324 if (exp_diff > 0) {
1325 q = estimateDiv128To64(a0, a1, b0);
1326 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1327 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1328 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1330 while (le192(t0, t1, t2, a0, a1, a2)) {
1331 ++q;
1332 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1334 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1335 } else {
1336 t0 = b0;
1337 t1 = b1;
1338 t2 = 0;
1341 if (mod_quot) {
1342 *mod_quot = quot;
1343 } else {
1344 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1345 if (lt192(t0, t1, t2, a0, a1, a2) ||
1346 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1347 a0 = t0;
1348 a1 = t1;
1349 a2 = t2;
1350 a->sign = !a->sign;
1354 if (likely(a0)) {
1355 shift = clz64(a0);
1356 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1357 } else if (likely(a1)) {
1358 shift = clz64(a1);
1359 shortShift128Left(a1, a2, shift, &a0, &a1);
1360 a2 = 0;
1361 shift += 64;
1362 } else if (likely(a2)) {
1363 shift = clz64(a2);
1364 a0 = a2 << shift;
1365 a1 = a2 = 0;
1366 shift += 128;
1367 } else {
1368 a->cls = float_class_zero;
1369 return;
1372 a->exp = b->exp + exp_diff - shift;
1373 a->frac_hi = a0;
1374 a->frac_lo = a1 | (a2 != 0);
1377 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1379 static void frac64_shl(FloatParts64 *a, int c)
1381 a->frac <<= c;
1384 static void frac128_shl(FloatParts128 *a, int c)
1386 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1388 if (c & 64) {
1389 a0 = a1, a1 = 0;
1392 c &= 63;
1393 if (c) {
1394 a0 = shl_double(a0, a1, c);
1395 a1 = a1 << c;
1398 a->frac_hi = a0;
1399 a->frac_lo = a1;
1402 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1404 static void frac64_shr(FloatParts64 *a, int c)
1406 a->frac >>= c;
1409 static void frac128_shr(FloatParts128 *a, int c)
1411 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1413 if (c & 64) {
1414 a1 = a0, a0 = 0;
1417 c &= 63;
1418 if (c) {
1419 a1 = shr_double(a0, a1, c);
1420 a0 = a0 >> c;
1423 a->frac_hi = a0;
1424 a->frac_lo = a1;
1427 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1429 static void frac64_shrjam(FloatParts64 *a, int c)
1431 uint64_t a0 = a->frac;
1433 if (likely(c != 0)) {
1434 if (likely(c < 64)) {
1435 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1436 } else {
1437 a0 = a0 != 0;
1439 a->frac = a0;
1443 static void frac128_shrjam(FloatParts128 *a, int c)
1445 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446 uint64_t sticky = 0;
1448 if (unlikely(c == 0)) {
1449 return;
1450 } else if (likely(c < 64)) {
1451 /* nothing */
1452 } else if (likely(c < 128)) {
1453 sticky = a1;
1454 a1 = a0;
1455 a0 = 0;
1456 c &= 63;
1457 if (c == 0) {
1458 goto done;
1460 } else {
1461 sticky = a0 | a1;
1462 a0 = a1 = 0;
1463 goto done;
1466 sticky |= shr_double(a1, 0, c);
1467 a1 = shr_double(a0, a1, c);
1468 a0 = a0 >> c;
1470 done:
1471 a->frac_lo = a1 | (sticky != 0);
1472 a->frac_hi = a0;
1475 static void frac256_shrjam(FloatParts256 *a, int c)
1477 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1478 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1479 uint64_t sticky = 0;
1481 if (unlikely(c == 0)) {
1482 return;
1483 } else if (likely(c < 64)) {
1484 /* nothing */
1485 } else if (likely(c < 256)) {
1486 if (unlikely(c & 128)) {
1487 sticky |= a2 | a3;
1488 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1490 if (unlikely(c & 64)) {
1491 sticky |= a3;
1492 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1494 c &= 63;
1495 if (c == 0) {
1496 goto done;
1498 } else {
1499 sticky = a0 | a1 | a2 | a3;
1500 a0 = a1 = a2 = a3 = 0;
1501 goto done;
1504 sticky |= shr_double(a3, 0, c);
1505 a3 = shr_double(a2, a3, c);
1506 a2 = shr_double(a1, a2, c);
1507 a1 = shr_double(a0, a1, c);
1508 a0 = a0 >> c;
1510 done:
1511 a->frac_lo = a3 | (sticky != 0);
1512 a->frac_lm = a2;
1513 a->frac_hm = a1;
1514 a->frac_hi = a0;
1517 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1519 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1521 return usub64_overflow(a->frac, b->frac, &r->frac);
1524 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1526 bool c = 0;
1527 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1528 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1529 return c;
1532 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1534 bool c = 0;
1535 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1536 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1537 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1538 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1539 return c;
1542 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1544 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1546 r->frac = a->frac_hi | (a->frac_lo != 0);
1549 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1551 r->frac_hi = a->frac_hi;
1552 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1555 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1557 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1559 r->frac_hi = a->frac;
1560 r->frac_lo = 0;
1563 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1565 r->frac_hi = a->frac_hi;
1566 r->frac_hm = a->frac_lo;
1567 r->frac_lm = 0;
1568 r->frac_lo = 0;
1571 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1574 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1575 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1576 * and thus MIT licenced.
1578 static const uint16_t rsqrt_tab[128] = {
1579 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1580 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1581 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1582 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1583 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1584 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1585 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1586 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1587 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1588 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1589 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1590 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1591 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1592 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1593 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1594 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1597 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1598 #define FloatPartsN glue(FloatParts,N)
1599 #define FloatPartsW glue(FloatParts,W)
1601 #define N 64
1602 #define W 128
1604 #include "softfloat-parts-addsub.c.inc"
1605 #include "softfloat-parts.c.inc"
1607 #undef N
1608 #undef W
1609 #define N 128
1610 #define W 256
1612 #include "softfloat-parts-addsub.c.inc"
1613 #include "softfloat-parts.c.inc"
1615 #undef N
1616 #undef W
1617 #define N 256
1619 #include "softfloat-parts-addsub.c.inc"
1621 #undef N
1622 #undef W
1623 #undef partsN
1624 #undef FloatPartsN
1625 #undef FloatPartsW
1628 * Pack/unpack routines with a specific FloatFmt.
1631 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1632 float_status *s, const FloatFmt *params)
1634 float16_unpack_raw(p, f);
1635 parts_canonicalize(p, s, params);
1638 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1639 float_status *s)
1641 float16a_unpack_canonical(p, f, s, &float16_params);
1644 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1645 float_status *s)
1647 bfloat16_unpack_raw(p, f);
1648 parts_canonicalize(p, s, &bfloat16_params);
1651 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1652 float_status *s,
1653 const FloatFmt *params)
1655 parts_uncanon(p, s, params);
1656 return float16_pack_raw(p);
1659 static float16 float16_round_pack_canonical(FloatParts64 *p,
1660 float_status *s)
1662 return float16a_round_pack_canonical(p, s, &float16_params);
1665 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1666 float_status *s)
1668 parts_uncanon(p, s, &bfloat16_params);
1669 return bfloat16_pack_raw(p);
1672 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1673 float_status *s)
1675 float32_unpack_raw(p, f);
1676 parts_canonicalize(p, s, &float32_params);
1679 static float32 float32_round_pack_canonical(FloatParts64 *p,
1680 float_status *s)
1682 parts_uncanon(p, s, &float32_params);
1683 return float32_pack_raw(p);
1686 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1687 float_status *s)
1689 float64_unpack_raw(p, f);
1690 parts_canonicalize(p, s, &float64_params);
1693 static float64 float64_round_pack_canonical(FloatParts64 *p,
1694 float_status *s)
1696 parts_uncanon(p, s, &float64_params);
1697 return float64_pack_raw(p);
1700 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1701 float_status *s)
1703 parts_uncanon(p, s, &float32_params);
1706 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1707 * We need to adjust the fraction higher so that the least N bits are
1708 * zero, and the fraction is adjacent to the float64 implicit bit.
1710 switch (p->cls) {
1711 case float_class_normal:
1712 if (unlikely(p->exp == 0)) {
1714 * The result is denormal for float32, but can be represented
1715 * in normalized form for float64. Adjust, per canonicalize.
1717 int shift = frac_normalize(p);
1718 p->exp = (float32_params.frac_shift -
1719 float32_params.exp_bias - shift + 1 +
1720 float64_params.exp_bias);
1721 frac_shr(p, float64_params.frac_shift);
1722 } else {
1723 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1724 p->exp += float64_params.exp_bias - float32_params.exp_bias;
1726 break;
1727 case float_class_snan:
1728 case float_class_qnan:
1729 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1730 p->exp = float64_params.exp_max;
1731 break;
1732 case float_class_inf:
1733 p->exp = float64_params.exp_max;
1734 break;
1735 case float_class_zero:
1736 break;
1737 default:
1738 g_assert_not_reached();
1741 return float64_pack_raw(p);
1744 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1745 float_status *s)
1747 float128_unpack_raw(p, f);
1748 parts_canonicalize(p, s, &float128_params);
1751 static float128 float128_round_pack_canonical(FloatParts128 *p,
1752 float_status *s)
1754 parts_uncanon(p, s, &float128_params);
1755 return float128_pack_raw(p);
1758 /* Returns false if the encoding is invalid. */
1759 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1760 float_status *s)
1762 /* Ensure rounding precision is set before beginning. */
1763 switch (s->floatx80_rounding_precision) {
1764 case floatx80_precision_x:
1765 case floatx80_precision_d:
1766 case floatx80_precision_s:
1767 break;
1768 default:
1769 g_assert_not_reached();
1772 if (unlikely(floatx80_invalid_encoding(f))) {
1773 float_raise(float_flag_invalid, s);
1774 return false;
1777 floatx80_unpack_raw(p, f);
1779 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1780 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1781 } else {
1782 /* The explicit integer bit is ignored, after invalid checks. */
1783 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1784 p->cls = (p->frac_hi == 0 ? float_class_inf
1785 : parts_is_snan_frac(p->frac_hi, s)
1786 ? float_class_snan : float_class_qnan);
1788 return true;
1791 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1792 float_status *s)
1794 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1795 uint64_t frac;
1796 int exp;
1798 switch (p->cls) {
1799 case float_class_normal:
1800 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1801 parts_uncanon_normal(p, s, fmt);
1802 frac = p->frac_hi;
1803 exp = p->exp;
1804 } else {
1805 FloatParts64 p64;
1807 p64.sign = p->sign;
1808 p64.exp = p->exp;
1809 frac_truncjam(&p64, p);
1810 parts_uncanon_normal(&p64, s, fmt);
1811 frac = p64.frac;
1812 exp = p64.exp;
1814 if (exp != fmt->exp_max) {
1815 break;
1817 /* rounded to inf -- fall through to set frac correctly */
1819 case float_class_inf:
1820 /* x86 and m68k differ in the setting of the integer bit. */
1821 frac = floatx80_infinity_low;
1822 exp = fmt->exp_max;
1823 break;
1825 case float_class_zero:
1826 frac = 0;
1827 exp = 0;
1828 break;
1830 case float_class_snan:
1831 case float_class_qnan:
1832 /* NaNs have the integer bit set. */
1833 frac = p->frac_hi | (1ull << 63);
1834 exp = fmt->exp_max;
1835 break;
1837 default:
1838 g_assert_not_reached();
1841 return packFloatx80(p->sign, exp, frac);
1845 * Addition and subtraction
1848 static float16 QEMU_FLATTEN
1849 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1851 FloatParts64 pa, pb, *pr;
1853 float16_unpack_canonical(&pa, a, status);
1854 float16_unpack_canonical(&pb, b, status);
1855 pr = parts_addsub(&pa, &pb, status, subtract);
1857 return float16_round_pack_canonical(pr, status);
1860 float16 float16_add(float16 a, float16 b, float_status *status)
1862 return float16_addsub(a, b, status, false);
1865 float16 float16_sub(float16 a, float16 b, float_status *status)
1867 return float16_addsub(a, b, status, true);
1870 static float32 QEMU_SOFTFLOAT_ATTR
1871 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1873 FloatParts64 pa, pb, *pr;
1875 float32_unpack_canonical(&pa, a, status);
1876 float32_unpack_canonical(&pb, b, status);
1877 pr = parts_addsub(&pa, &pb, status, subtract);
1879 return float32_round_pack_canonical(pr, status);
1882 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1884 return soft_f32_addsub(a, b, status, false);
1887 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1889 return soft_f32_addsub(a, b, status, true);
1892 static float64 QEMU_SOFTFLOAT_ATTR
1893 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1895 FloatParts64 pa, pb, *pr;
1897 float64_unpack_canonical(&pa, a, status);
1898 float64_unpack_canonical(&pb, b, status);
1899 pr = parts_addsub(&pa, &pb, status, subtract);
1901 return float64_round_pack_canonical(pr, status);
1904 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1906 return soft_f64_addsub(a, b, status, false);
1909 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1911 return soft_f64_addsub(a, b, status, true);
1914 static float hard_f32_add(float a, float b)
1916 return a + b;
1919 static float hard_f32_sub(float a, float b)
1921 return a - b;
1924 static double hard_f64_add(double a, double b)
1926 return a + b;
1929 static double hard_f64_sub(double a, double b)
1931 return a - b;
1934 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1936 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1937 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1939 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1942 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1944 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1945 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1946 } else {
1947 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1951 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1952 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1954 return float32_gen2(a, b, s, hard, soft,
1955 f32_is_zon2, f32_addsubmul_post);
1958 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1959 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1961 return float64_gen2(a, b, s, hard, soft,
1962 f64_is_zon2, f64_addsubmul_post);
1965 float32 QEMU_FLATTEN
1966 float32_add(float32 a, float32 b, float_status *s)
1968 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1971 float32 QEMU_FLATTEN
1972 float32_sub(float32 a, float32 b, float_status *s)
1974 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1977 float64 QEMU_FLATTEN
1978 float64_add(float64 a, float64 b, float_status *s)
1980 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1983 float64 QEMU_FLATTEN
1984 float64_sub(float64 a, float64 b, float_status *s)
1986 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1989 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1990 bool subtract)
1992 FloatParts64 pa, pb, *pr;
1994 float64_unpack_canonical(&pa, a, status);
1995 float64_unpack_canonical(&pb, b, status);
1996 pr = parts_addsub(&pa, &pb, status, subtract);
1998 return float64r32_round_pack_canonical(pr, status);
2001 float64 float64r32_add(float64 a, float64 b, float_status *status)
2003 return float64r32_addsub(a, b, status, false);
2006 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2008 return float64r32_addsub(a, b, status, true);
2011 static bfloat16 QEMU_FLATTEN
2012 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2014 FloatParts64 pa, pb, *pr;
2016 bfloat16_unpack_canonical(&pa, a, status);
2017 bfloat16_unpack_canonical(&pb, b, status);
2018 pr = parts_addsub(&pa, &pb, status, subtract);
2020 return bfloat16_round_pack_canonical(pr, status);
2023 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2025 return bfloat16_addsub(a, b, status, false);
2028 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2030 return bfloat16_addsub(a, b, status, true);
2033 static float128 QEMU_FLATTEN
2034 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2036 FloatParts128 pa, pb, *pr;
2038 float128_unpack_canonical(&pa, a, status);
2039 float128_unpack_canonical(&pb, b, status);
2040 pr = parts_addsub(&pa, &pb, status, subtract);
2042 return float128_round_pack_canonical(pr, status);
2045 float128 float128_add(float128 a, float128 b, float_status *status)
2047 return float128_addsub(a, b, status, false);
2050 float128 float128_sub(float128 a, float128 b, float_status *status)
2052 return float128_addsub(a, b, status, true);
2055 static floatx80 QEMU_FLATTEN
2056 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2058 FloatParts128 pa, pb, *pr;
2060 if (!floatx80_unpack_canonical(&pa, a, status) ||
2061 !floatx80_unpack_canonical(&pb, b, status)) {
2062 return floatx80_default_nan(status);
2065 pr = parts_addsub(&pa, &pb, status, subtract);
2066 return floatx80_round_pack_canonical(pr, status);
2069 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2071 return floatx80_addsub(a, b, status, false);
2074 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2076 return floatx80_addsub(a, b, status, true);
2080 * Multiplication
2083 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2085 FloatParts64 pa, pb, *pr;
2087 float16_unpack_canonical(&pa, a, status);
2088 float16_unpack_canonical(&pb, b, status);
2089 pr = parts_mul(&pa, &pb, status);
2091 return float16_round_pack_canonical(pr, status);
2094 static float32 QEMU_SOFTFLOAT_ATTR
2095 soft_f32_mul(float32 a, float32 b, float_status *status)
2097 FloatParts64 pa, pb, *pr;
2099 float32_unpack_canonical(&pa, a, status);
2100 float32_unpack_canonical(&pb, b, status);
2101 pr = parts_mul(&pa, &pb, status);
2103 return float32_round_pack_canonical(pr, status);
2106 static float64 QEMU_SOFTFLOAT_ATTR
2107 soft_f64_mul(float64 a, float64 b, float_status *status)
2109 FloatParts64 pa, pb, *pr;
2111 float64_unpack_canonical(&pa, a, status);
2112 float64_unpack_canonical(&pb, b, status);
2113 pr = parts_mul(&pa, &pb, status);
2115 return float64_round_pack_canonical(pr, status);
2118 static float hard_f32_mul(float a, float b)
2120 return a * b;
2123 static double hard_f64_mul(double a, double b)
2125 return a * b;
2128 float32 QEMU_FLATTEN
2129 float32_mul(float32 a, float32 b, float_status *s)
2131 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2132 f32_is_zon2, f32_addsubmul_post);
2135 float64 QEMU_FLATTEN
2136 float64_mul(float64 a, float64 b, float_status *s)
2138 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2139 f64_is_zon2, f64_addsubmul_post);
2142 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2144 FloatParts64 pa, pb, *pr;
2146 float64_unpack_canonical(&pa, a, status);
2147 float64_unpack_canonical(&pb, b, status);
2148 pr = parts_mul(&pa, &pb, status);
2150 return float64r32_round_pack_canonical(pr, status);
2153 bfloat16 QEMU_FLATTEN
2154 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2156 FloatParts64 pa, pb, *pr;
2158 bfloat16_unpack_canonical(&pa, a, status);
2159 bfloat16_unpack_canonical(&pb, b, status);
2160 pr = parts_mul(&pa, &pb, status);
2162 return bfloat16_round_pack_canonical(pr, status);
2165 float128 QEMU_FLATTEN
2166 float128_mul(float128 a, float128 b, float_status *status)
2168 FloatParts128 pa, pb, *pr;
2170 float128_unpack_canonical(&pa, a, status);
2171 float128_unpack_canonical(&pb, b, status);
2172 pr = parts_mul(&pa, &pb, status);
2174 return float128_round_pack_canonical(pr, status);
2177 floatx80 QEMU_FLATTEN
2178 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2180 FloatParts128 pa, pb, *pr;
2182 if (!floatx80_unpack_canonical(&pa, a, status) ||
2183 !floatx80_unpack_canonical(&pb, b, status)) {
2184 return floatx80_default_nan(status);
2187 pr = parts_mul(&pa, &pb, status);
2188 return floatx80_round_pack_canonical(pr, status);
2192 * Fused multiply-add
2195 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2196 int flags, float_status *status)
2198 FloatParts64 pa, pb, pc, *pr;
2200 float16_unpack_canonical(&pa, a, status);
2201 float16_unpack_canonical(&pb, b, status);
2202 float16_unpack_canonical(&pc, c, status);
2203 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2205 return float16_round_pack_canonical(pr, status);
2208 static float32 QEMU_SOFTFLOAT_ATTR
2209 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2210 float_status *status)
2212 FloatParts64 pa, pb, pc, *pr;
2214 float32_unpack_canonical(&pa, a, status);
2215 float32_unpack_canonical(&pb, b, status);
2216 float32_unpack_canonical(&pc, c, status);
2217 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2219 return float32_round_pack_canonical(pr, status);
2222 static float64 QEMU_SOFTFLOAT_ATTR
2223 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2224 float_status *status)
2226 FloatParts64 pa, pb, pc, *pr;
2228 float64_unpack_canonical(&pa, a, status);
2229 float64_unpack_canonical(&pb, b, status);
2230 float64_unpack_canonical(&pc, c, status);
2231 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2233 return float64_round_pack_canonical(pr, status);
2236 static bool force_soft_fma;
2238 float32 QEMU_FLATTEN
2239 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2241 union_float32 ua, ub, uc, ur;
2243 ua.s = xa;
2244 ub.s = xb;
2245 uc.s = xc;
2247 if (unlikely(!can_use_fpu(s))) {
2248 goto soft;
2250 if (unlikely(flags & float_muladd_halve_result)) {
2251 goto soft;
2254 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2255 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2256 goto soft;
2259 if (unlikely(force_soft_fma)) {
2260 goto soft;
2264 * When (a || b) == 0, there's no need to check for under/over flow,
2265 * since we know the addend is (normal || 0) and the product is 0.
2267 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2268 union_float32 up;
2269 bool prod_sign;
2271 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2272 prod_sign ^= !!(flags & float_muladd_negate_product);
2273 up.s = float32_set_sign(float32_zero, prod_sign);
2275 if (flags & float_muladd_negate_c) {
2276 uc.h = -uc.h;
2278 ur.h = up.h + uc.h;
2279 } else {
2280 union_float32 ua_orig = ua;
2281 union_float32 uc_orig = uc;
2283 if (flags & float_muladd_negate_product) {
2284 ua.h = -ua.h;
2286 if (flags & float_muladd_negate_c) {
2287 uc.h = -uc.h;
2290 ur.h = fmaf(ua.h, ub.h, uc.h);
2292 if (unlikely(f32_is_inf(ur))) {
2293 float_raise(float_flag_overflow, s);
2294 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2295 ua = ua_orig;
2296 uc = uc_orig;
2297 goto soft;
2300 if (flags & float_muladd_negate_result) {
2301 return float32_chs(ur.s);
2303 return ur.s;
2305 soft:
2306 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2309 float64 QEMU_FLATTEN
2310 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2312 union_float64 ua, ub, uc, ur;
2314 ua.s = xa;
2315 ub.s = xb;
2316 uc.s = xc;
2318 if (unlikely(!can_use_fpu(s))) {
2319 goto soft;
2321 if (unlikely(flags & float_muladd_halve_result)) {
2322 goto soft;
2325 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2326 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2327 goto soft;
2330 if (unlikely(force_soft_fma)) {
2331 goto soft;
2335 * When (a || b) == 0, there's no need to check for under/over flow,
2336 * since we know the addend is (normal || 0) and the product is 0.
2338 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2339 union_float64 up;
2340 bool prod_sign;
2342 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2343 prod_sign ^= !!(flags & float_muladd_negate_product);
2344 up.s = float64_set_sign(float64_zero, prod_sign);
2346 if (flags & float_muladd_negate_c) {
2347 uc.h = -uc.h;
2349 ur.h = up.h + uc.h;
2350 } else {
2351 union_float64 ua_orig = ua;
2352 union_float64 uc_orig = uc;
2354 if (flags & float_muladd_negate_product) {
2355 ua.h = -ua.h;
2357 if (flags & float_muladd_negate_c) {
2358 uc.h = -uc.h;
2361 ur.h = fma(ua.h, ub.h, uc.h);
2363 if (unlikely(f64_is_inf(ur))) {
2364 float_raise(float_flag_overflow, s);
2365 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2366 ua = ua_orig;
2367 uc = uc_orig;
2368 goto soft;
2371 if (flags & float_muladd_negate_result) {
2372 return float64_chs(ur.s);
2374 return ur.s;
2376 soft:
2377 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2380 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2381 int flags, float_status *status)
2383 FloatParts64 pa, pb, pc, *pr;
2385 float64_unpack_canonical(&pa, a, status);
2386 float64_unpack_canonical(&pb, b, status);
2387 float64_unpack_canonical(&pc, c, status);
2388 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2390 return float64r32_round_pack_canonical(pr, status);
2393 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2394 int flags, float_status *status)
2396 FloatParts64 pa, pb, pc, *pr;
2398 bfloat16_unpack_canonical(&pa, a, status);
2399 bfloat16_unpack_canonical(&pb, b, status);
2400 bfloat16_unpack_canonical(&pc, c, status);
2401 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2403 return bfloat16_round_pack_canonical(pr, status);
2406 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2407 int flags, float_status *status)
2409 FloatParts128 pa, pb, pc, *pr;
2411 float128_unpack_canonical(&pa, a, status);
2412 float128_unpack_canonical(&pb, b, status);
2413 float128_unpack_canonical(&pc, c, status);
2414 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2416 return float128_round_pack_canonical(pr, status);
2420 * Division
2423 float16 float16_div(float16 a, float16 b, float_status *status)
2425 FloatParts64 pa, pb, *pr;
2427 float16_unpack_canonical(&pa, a, status);
2428 float16_unpack_canonical(&pb, b, status);
2429 pr = parts_div(&pa, &pb, status);
2431 return float16_round_pack_canonical(pr, status);
2434 static float32 QEMU_SOFTFLOAT_ATTR
2435 soft_f32_div(float32 a, float32 b, float_status *status)
2437 FloatParts64 pa, pb, *pr;
2439 float32_unpack_canonical(&pa, a, status);
2440 float32_unpack_canonical(&pb, b, status);
2441 pr = parts_div(&pa, &pb, status);
2443 return float32_round_pack_canonical(pr, status);
2446 static float64 QEMU_SOFTFLOAT_ATTR
2447 soft_f64_div(float64 a, float64 b, float_status *status)
2449 FloatParts64 pa, pb, *pr;
2451 float64_unpack_canonical(&pa, a, status);
2452 float64_unpack_canonical(&pb, b, status);
2453 pr = parts_div(&pa, &pb, status);
2455 return float64_round_pack_canonical(pr, status);
2458 static float hard_f32_div(float a, float b)
2460 return a / b;
2463 static double hard_f64_div(double a, double b)
2465 return a / b;
2468 static bool f32_div_pre(union_float32 a, union_float32 b)
2470 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2471 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2472 fpclassify(b.h) == FP_NORMAL;
2474 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2477 static bool f64_div_pre(union_float64 a, union_float64 b)
2479 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2480 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2481 fpclassify(b.h) == FP_NORMAL;
2483 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2486 static bool f32_div_post(union_float32 a, union_float32 b)
2488 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2489 return fpclassify(a.h) != FP_ZERO;
2491 return !float32_is_zero(a.s);
2494 static bool f64_div_post(union_float64 a, union_float64 b)
2496 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2497 return fpclassify(a.h) != FP_ZERO;
2499 return !float64_is_zero(a.s);
2502 float32 QEMU_FLATTEN
2503 float32_div(float32 a, float32 b, float_status *s)
2505 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2506 f32_div_pre, f32_div_post);
2509 float64 QEMU_FLATTEN
2510 float64_div(float64 a, float64 b, float_status *s)
2512 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2513 f64_div_pre, f64_div_post);
2516 float64 float64r32_div(float64 a, float64 b, float_status *status)
2518 FloatParts64 pa, pb, *pr;
2520 float64_unpack_canonical(&pa, a, status);
2521 float64_unpack_canonical(&pb, b, status);
2522 pr = parts_div(&pa, &pb, status);
2524 return float64r32_round_pack_canonical(pr, status);
2527 bfloat16 QEMU_FLATTEN
2528 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2530 FloatParts64 pa, pb, *pr;
2532 bfloat16_unpack_canonical(&pa, a, status);
2533 bfloat16_unpack_canonical(&pb, b, status);
2534 pr = parts_div(&pa, &pb, status);
2536 return bfloat16_round_pack_canonical(pr, status);
2539 float128 QEMU_FLATTEN
2540 float128_div(float128 a, float128 b, float_status *status)
2542 FloatParts128 pa, pb, *pr;
2544 float128_unpack_canonical(&pa, a, status);
2545 float128_unpack_canonical(&pb, b, status);
2546 pr = parts_div(&pa, &pb, status);
2548 return float128_round_pack_canonical(pr, status);
2551 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2553 FloatParts128 pa, pb, *pr;
2555 if (!floatx80_unpack_canonical(&pa, a, status) ||
2556 !floatx80_unpack_canonical(&pb, b, status)) {
2557 return floatx80_default_nan(status);
2560 pr = parts_div(&pa, &pb, status);
2561 return floatx80_round_pack_canonical(pr, status);
2565 * Remainder
2568 float32 float32_rem(float32 a, float32 b, float_status *status)
2570 FloatParts64 pa, pb, *pr;
2572 float32_unpack_canonical(&pa, a, status);
2573 float32_unpack_canonical(&pb, b, status);
2574 pr = parts_modrem(&pa, &pb, NULL, status);
2576 return float32_round_pack_canonical(pr, status);
2579 float64 float64_rem(float64 a, float64 b, float_status *status)
2581 FloatParts64 pa, pb, *pr;
2583 float64_unpack_canonical(&pa, a, status);
2584 float64_unpack_canonical(&pb, b, status);
2585 pr = parts_modrem(&pa, &pb, NULL, status);
2587 return float64_round_pack_canonical(pr, status);
2590 float128 float128_rem(float128 a, float128 b, float_status *status)
2592 FloatParts128 pa, pb, *pr;
2594 float128_unpack_canonical(&pa, a, status);
2595 float128_unpack_canonical(&pb, b, status);
2596 pr = parts_modrem(&pa, &pb, NULL, status);
2598 return float128_round_pack_canonical(pr, status);
2602 * Returns the remainder of the extended double-precision floating-point value
2603 * `a' with respect to the corresponding value `b'.
2604 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2605 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2606 * the remainder based on truncating the quotient toward zero instead and
2607 * *quotient is set to the low 64 bits of the absolute value of the integer
2608 * quotient.
2610 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2611 uint64_t *quotient, float_status *status)
2613 FloatParts128 pa, pb, *pr;
2615 *quotient = 0;
2616 if (!floatx80_unpack_canonical(&pa, a, status) ||
2617 !floatx80_unpack_canonical(&pb, b, status)) {
2618 return floatx80_default_nan(status);
2620 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2622 return floatx80_round_pack_canonical(pr, status);
2625 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2627 uint64_t quotient;
2628 return floatx80_modrem(a, b, false, &quotient, status);
2631 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2633 uint64_t quotient;
2634 return floatx80_modrem(a, b, true, &quotient, status);
2638 * Float to Float conversions
2640 * Returns the result of converting one float format to another. The
2641 * conversion is performed according to the IEC/IEEE Standard for
2642 * Binary Floating-Point Arithmetic.
2644 * Usually this only needs to take care of raising invalid exceptions
2645 * and handling the conversion on NaNs.
2648 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2650 switch (a->cls) {
2651 case float_class_snan:
2652 float_raise(float_flag_invalid_snan, s);
2653 /* fall through */
2654 case float_class_qnan:
2656 * There is no NaN in the destination format. Raise Invalid
2657 * and return a zero with the sign of the input NaN.
2659 float_raise(float_flag_invalid, s);
2660 a->cls = float_class_zero;
2661 break;
2663 case float_class_inf:
2665 * There is no Inf in the destination format. Raise Invalid
2666 * and return the maximum normal with the correct sign.
2668 float_raise(float_flag_invalid, s);
2669 a->cls = float_class_normal;
2670 a->exp = float16_params_ahp.exp_max;
2671 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2672 float16_params_ahp.frac_size + 1);
2673 break;
2675 case float_class_normal:
2676 case float_class_zero:
2677 break;
2679 default:
2680 g_assert_not_reached();
2684 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2686 if (is_nan(a->cls)) {
2687 parts_return_nan(a, s);
2691 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2693 if (is_nan(a->cls)) {
2694 parts_return_nan(a, s);
2698 #define parts_float_to_float(P, S) \
2699 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2701 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2702 float_status *s)
2704 a->cls = b->cls;
2705 a->sign = b->sign;
2706 a->exp = b->exp;
2708 if (a->cls == float_class_normal) {
2709 frac_truncjam(a, b);
2710 } else if (is_nan(a->cls)) {
2711 /* Discard the low bits of the NaN. */
2712 a->frac = b->frac_hi;
2713 parts_return_nan(a, s);
2717 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2718 float_status *s)
2720 a->cls = b->cls;
2721 a->sign = b->sign;
2722 a->exp = b->exp;
2723 frac_widen(a, b);
2725 if (is_nan(a->cls)) {
2726 parts_return_nan(a, s);
2730 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2732 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2733 FloatParts64 p;
2735 float16a_unpack_canonical(&p, a, s, fmt16);
2736 parts_float_to_float(&p, s);
2737 return float32_round_pack_canonical(&p, s);
2740 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2742 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2743 FloatParts64 p;
2745 float16a_unpack_canonical(&p, a, s, fmt16);
2746 parts_float_to_float(&p, s);
2747 return float64_round_pack_canonical(&p, s);
2750 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2752 FloatParts64 p;
2753 const FloatFmt *fmt;
2755 float32_unpack_canonical(&p, a, s);
2756 if (ieee) {
2757 parts_float_to_float(&p, s);
2758 fmt = &float16_params;
2759 } else {
2760 parts_float_to_ahp(&p, s);
2761 fmt = &float16_params_ahp;
2763 return float16a_round_pack_canonical(&p, s, fmt);
2766 static float64 QEMU_SOFTFLOAT_ATTR
2767 soft_float32_to_float64(float32 a, float_status *s)
2769 FloatParts64 p;
2771 float32_unpack_canonical(&p, a, s);
2772 parts_float_to_float(&p, s);
2773 return float64_round_pack_canonical(&p, s);
2776 float64 float32_to_float64(float32 a, float_status *s)
2778 if (likely(float32_is_normal(a))) {
2779 /* Widening conversion can never produce inexact results. */
2780 union_float32 uf;
2781 union_float64 ud;
2782 uf.s = a;
2783 ud.h = uf.h;
2784 return ud.s;
2785 } else if (float32_is_zero(a)) {
2786 return float64_set_sign(float64_zero, float32_is_neg(a));
2787 } else {
2788 return soft_float32_to_float64(a, s);
2792 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2794 FloatParts64 p;
2795 const FloatFmt *fmt;
2797 float64_unpack_canonical(&p, a, s);
2798 if (ieee) {
2799 parts_float_to_float(&p, s);
2800 fmt = &float16_params;
2801 } else {
2802 parts_float_to_ahp(&p, s);
2803 fmt = &float16_params_ahp;
2805 return float16a_round_pack_canonical(&p, s, fmt);
2808 float32 float64_to_float32(float64 a, float_status *s)
2810 FloatParts64 p;
2812 float64_unpack_canonical(&p, a, s);
2813 parts_float_to_float(&p, s);
2814 return float32_round_pack_canonical(&p, s);
2817 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2819 FloatParts64 p;
2821 bfloat16_unpack_canonical(&p, a, s);
2822 parts_float_to_float(&p, s);
2823 return float32_round_pack_canonical(&p, s);
2826 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2828 FloatParts64 p;
2830 bfloat16_unpack_canonical(&p, a, s);
2831 parts_float_to_float(&p, s);
2832 return float64_round_pack_canonical(&p, s);
2835 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2837 FloatParts64 p;
2839 float32_unpack_canonical(&p, a, s);
2840 parts_float_to_float(&p, s);
2841 return bfloat16_round_pack_canonical(&p, s);
2844 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2846 FloatParts64 p;
2848 float64_unpack_canonical(&p, a, s);
2849 parts_float_to_float(&p, s);
2850 return bfloat16_round_pack_canonical(&p, s);
2853 float32 float128_to_float32(float128 a, float_status *s)
2855 FloatParts64 p64;
2856 FloatParts128 p128;
2858 float128_unpack_canonical(&p128, a, s);
2859 parts_float_to_float_narrow(&p64, &p128, s);
2860 return float32_round_pack_canonical(&p64, s);
2863 float64 float128_to_float64(float128 a, float_status *s)
2865 FloatParts64 p64;
2866 FloatParts128 p128;
2868 float128_unpack_canonical(&p128, a, s);
2869 parts_float_to_float_narrow(&p64, &p128, s);
2870 return float64_round_pack_canonical(&p64, s);
2873 float128 float32_to_float128(float32 a, float_status *s)
2875 FloatParts64 p64;
2876 FloatParts128 p128;
2878 float32_unpack_canonical(&p64, a, s);
2879 parts_float_to_float_widen(&p128, &p64, s);
2880 return float128_round_pack_canonical(&p128, s);
2883 float128 float64_to_float128(float64 a, float_status *s)
2885 FloatParts64 p64;
2886 FloatParts128 p128;
2888 float64_unpack_canonical(&p64, a, s);
2889 parts_float_to_float_widen(&p128, &p64, s);
2890 return float128_round_pack_canonical(&p128, s);
2893 float32 floatx80_to_float32(floatx80 a, float_status *s)
2895 FloatParts64 p64;
2896 FloatParts128 p128;
2898 if (floatx80_unpack_canonical(&p128, a, s)) {
2899 parts_float_to_float_narrow(&p64, &p128, s);
2900 } else {
2901 parts_default_nan(&p64, s);
2903 return float32_round_pack_canonical(&p64, s);
2906 float64 floatx80_to_float64(floatx80 a, float_status *s)
2908 FloatParts64 p64;
2909 FloatParts128 p128;
2911 if (floatx80_unpack_canonical(&p128, a, s)) {
2912 parts_float_to_float_narrow(&p64, &p128, s);
2913 } else {
2914 parts_default_nan(&p64, s);
2916 return float64_round_pack_canonical(&p64, s);
2919 float128 floatx80_to_float128(floatx80 a, float_status *s)
2921 FloatParts128 p;
2923 if (floatx80_unpack_canonical(&p, a, s)) {
2924 parts_float_to_float(&p, s);
2925 } else {
2926 parts_default_nan(&p, s);
2928 return float128_round_pack_canonical(&p, s);
2931 floatx80 float32_to_floatx80(float32 a, float_status *s)
2933 FloatParts64 p64;
2934 FloatParts128 p128;
2936 float32_unpack_canonical(&p64, a, s);
2937 parts_float_to_float_widen(&p128, &p64, s);
2938 return floatx80_round_pack_canonical(&p128, s);
2941 floatx80 float64_to_floatx80(float64 a, float_status *s)
2943 FloatParts64 p64;
2944 FloatParts128 p128;
2946 float64_unpack_canonical(&p64, a, s);
2947 parts_float_to_float_widen(&p128, &p64, s);
2948 return floatx80_round_pack_canonical(&p128, s);
2951 floatx80 float128_to_floatx80(float128 a, float_status *s)
2953 FloatParts128 p;
2955 float128_unpack_canonical(&p, a, s);
2956 parts_float_to_float(&p, s);
2957 return floatx80_round_pack_canonical(&p, s);
2961 * Round to integral value
2964 float16 float16_round_to_int(float16 a, float_status *s)
2966 FloatParts64 p;
2968 float16_unpack_canonical(&p, a, s);
2969 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2970 return float16_round_pack_canonical(&p, s);
2973 float32 float32_round_to_int(float32 a, float_status *s)
2975 FloatParts64 p;
2977 float32_unpack_canonical(&p, a, s);
2978 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2979 return float32_round_pack_canonical(&p, s);
2982 float64 float64_round_to_int(float64 a, float_status *s)
2984 FloatParts64 p;
2986 float64_unpack_canonical(&p, a, s);
2987 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2988 return float64_round_pack_canonical(&p, s);
2991 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2993 FloatParts64 p;
2995 bfloat16_unpack_canonical(&p, a, s);
2996 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2997 return bfloat16_round_pack_canonical(&p, s);
3000 float128 float128_round_to_int(float128 a, float_status *s)
3002 FloatParts128 p;
3004 float128_unpack_canonical(&p, a, s);
3005 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3006 return float128_round_pack_canonical(&p, s);
3009 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3011 FloatParts128 p;
3013 if (!floatx80_unpack_canonical(&p, a, status)) {
3014 return floatx80_default_nan(status);
3017 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3018 &floatx80_params[status->floatx80_rounding_precision]);
3019 return floatx80_round_pack_canonical(&p, status);
3023 * Floating-point to signed integer conversions
3026 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3027 float_status *s)
3029 FloatParts64 p;
3031 float16_unpack_canonical(&p, a, s);
3032 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3035 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3036 float_status *s)
3038 FloatParts64 p;
3040 float16_unpack_canonical(&p, a, s);
3041 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3044 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3045 float_status *s)
3047 FloatParts64 p;
3049 float16_unpack_canonical(&p, a, s);
3050 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3053 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3054 float_status *s)
3056 FloatParts64 p;
3058 float16_unpack_canonical(&p, a, s);
3059 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3062 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3063 float_status *s)
3065 FloatParts64 p;
3067 float32_unpack_canonical(&p, a, s);
3068 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3071 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3072 float_status *s)
3074 FloatParts64 p;
3076 float32_unpack_canonical(&p, a, s);
3077 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3080 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3081 float_status *s)
3083 FloatParts64 p;
3085 float32_unpack_canonical(&p, a, s);
3086 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3089 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3090 float_status *s)
3092 FloatParts64 p;
3094 float64_unpack_canonical(&p, a, s);
3095 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3098 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3099 float_status *s)
3101 FloatParts64 p;
3103 float64_unpack_canonical(&p, a, s);
3104 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3107 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3108 float_status *s)
3110 FloatParts64 p;
3112 float64_unpack_canonical(&p, a, s);
3113 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3116 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3117 float_status *s)
3119 FloatParts64 p;
3121 bfloat16_unpack_canonical(&p, a, s);
3122 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3125 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3126 float_status *s)
3128 FloatParts64 p;
3130 bfloat16_unpack_canonical(&p, a, s);
3131 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3134 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3135 float_status *s)
3137 FloatParts64 p;
3139 bfloat16_unpack_canonical(&p, a, s);
3140 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3143 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3144 int scale, float_status *s)
3146 FloatParts128 p;
3148 float128_unpack_canonical(&p, a, s);
3149 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3152 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3153 int scale, float_status *s)
3155 FloatParts128 p;
3157 float128_unpack_canonical(&p, a, s);
3158 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3161 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3162 int scale, float_status *s)
3164 int flags = 0;
3165 Int128 r;
3166 FloatParts128 p;
3168 float128_unpack_canonical(&p, a, s);
3170 switch (p.cls) {
3171 case float_class_snan:
3172 flags |= float_flag_invalid_snan;
3173 /* fall through */
3174 case float_class_qnan:
3175 flags |= float_flag_invalid;
3176 r = UINT128_MAX;
3177 break;
3179 case float_class_inf:
3180 flags = float_flag_invalid | float_flag_invalid_cvti;
3181 r = p.sign ? INT128_MIN : INT128_MAX;
3182 break;
3184 case float_class_zero:
3185 return int128_zero();
3187 case float_class_normal:
3188 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3189 flags = float_flag_inexact;
3192 if (p.exp < 127) {
3193 int shift = 127 - p.exp;
3194 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3195 if (p.sign) {
3196 r = int128_neg(r);
3198 } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3199 p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3200 r = INT128_MIN;
3201 } else {
3202 flags = float_flag_invalid | float_flag_invalid_cvti;
3203 r = p.sign ? INT128_MIN : INT128_MAX;
3205 break;
3207 default:
3208 g_assert_not_reached();
3211 float_raise(flags, s);
3212 return r;
3215 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3216 int scale, float_status *s)
3218 FloatParts128 p;
3220 if (!floatx80_unpack_canonical(&p, a, s)) {
3221 parts_default_nan(&p, s);
3223 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3226 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3227 int scale, float_status *s)
3229 FloatParts128 p;
3231 if (!floatx80_unpack_canonical(&p, a, s)) {
3232 parts_default_nan(&p, s);
3234 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3237 int8_t float16_to_int8(float16 a, float_status *s)
3239 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3242 int16_t float16_to_int16(float16 a, float_status *s)
3244 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3247 int32_t float16_to_int32(float16 a, float_status *s)
3249 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3252 int64_t float16_to_int64(float16 a, float_status *s)
3254 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3257 int16_t float32_to_int16(float32 a, float_status *s)
3259 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3262 int32_t float32_to_int32(float32 a, float_status *s)
3264 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3267 int64_t float32_to_int64(float32 a, float_status *s)
3269 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3272 int16_t float64_to_int16(float64 a, float_status *s)
3274 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3277 int32_t float64_to_int32(float64 a, float_status *s)
3279 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3282 int64_t float64_to_int64(float64 a, float_status *s)
3284 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3287 int32_t float128_to_int32(float128 a, float_status *s)
3289 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3292 int64_t float128_to_int64(float128 a, float_status *s)
3294 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3297 Int128 float128_to_int128(float128 a, float_status *s)
3299 return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3302 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3304 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3307 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3309 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3312 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3314 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3317 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3319 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3322 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3324 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3327 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3329 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3332 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3334 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3337 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3339 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3342 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3344 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3347 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3349 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3352 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3354 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3357 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3359 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3362 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3364 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3367 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3369 return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3372 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3374 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3377 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3379 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3382 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3384 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3387 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3389 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3392 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3394 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3397 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3399 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3402 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3404 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3407 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3409 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3413 * Floating-point to unsigned integer conversions
3416 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3417 float_status *s)
3419 FloatParts64 p;
3421 float16_unpack_canonical(&p, a, s);
3422 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3425 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3426 float_status *s)
3428 FloatParts64 p;
3430 float16_unpack_canonical(&p, a, s);
3431 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3434 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3435 float_status *s)
3437 FloatParts64 p;
3439 float16_unpack_canonical(&p, a, s);
3440 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3443 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3444 float_status *s)
3446 FloatParts64 p;
3448 float16_unpack_canonical(&p, a, s);
3449 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3452 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3453 float_status *s)
3455 FloatParts64 p;
3457 float32_unpack_canonical(&p, a, s);
3458 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3461 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3462 float_status *s)
3464 FloatParts64 p;
3466 float32_unpack_canonical(&p, a, s);
3467 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3470 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3471 float_status *s)
3473 FloatParts64 p;
3475 float32_unpack_canonical(&p, a, s);
3476 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3479 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3480 float_status *s)
3482 FloatParts64 p;
3484 float64_unpack_canonical(&p, a, s);
3485 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3488 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3489 float_status *s)
3491 FloatParts64 p;
3493 float64_unpack_canonical(&p, a, s);
3494 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3497 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3498 float_status *s)
3500 FloatParts64 p;
3502 float64_unpack_canonical(&p, a, s);
3503 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3506 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3507 int scale, float_status *s)
3509 FloatParts64 p;
3511 bfloat16_unpack_canonical(&p, a, s);
3512 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3515 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3516 int scale, float_status *s)
3518 FloatParts64 p;
3520 bfloat16_unpack_canonical(&p, a, s);
3521 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3524 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3525 int scale, float_status *s)
3527 FloatParts64 p;
3529 bfloat16_unpack_canonical(&p, a, s);
3530 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3533 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3534 int scale, float_status *s)
3536 FloatParts128 p;
3538 float128_unpack_canonical(&p, a, s);
3539 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3542 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3543 int scale, float_status *s)
3545 FloatParts128 p;
3547 float128_unpack_canonical(&p, a, s);
3548 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3551 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3552 int scale, float_status *s)
3554 int flags = 0;
3555 Int128 r;
3556 FloatParts128 p;
3558 float128_unpack_canonical(&p, a, s);
3560 switch (p.cls) {
3561 case float_class_snan:
3562 flags |= float_flag_invalid_snan;
3563 /* fall through */
3564 case float_class_qnan:
3565 flags |= float_flag_invalid;
3566 r = UINT128_MAX;
3567 break;
3569 case float_class_inf:
3570 flags = float_flag_invalid | float_flag_invalid_cvti;
3571 r = p.sign ? int128_zero() : UINT128_MAX;
3572 break;
3574 case float_class_zero:
3575 return int128_zero();
3577 case float_class_normal:
3578 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3579 flags = float_flag_inexact;
3580 if (p.cls == float_class_zero) {
3581 r = int128_zero();
3582 break;
3586 if (p.sign) {
3587 flags = float_flag_invalid | float_flag_invalid_cvti;
3588 r = int128_zero();
3589 } else if (p.exp <= 127) {
3590 int shift = 127 - p.exp;
3591 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3592 } else {
3593 flags = float_flag_invalid | float_flag_invalid_cvti;
3594 r = UINT128_MAX;
3596 break;
3598 default:
3599 g_assert_not_reached();
3602 float_raise(flags, s);
3603 return r;
3606 uint8_t float16_to_uint8(float16 a, float_status *s)
3608 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3611 uint16_t float16_to_uint16(float16 a, float_status *s)
3613 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3616 uint32_t float16_to_uint32(float16 a, float_status *s)
3618 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3621 uint64_t float16_to_uint64(float16 a, float_status *s)
3623 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3626 uint16_t float32_to_uint16(float32 a, float_status *s)
3628 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3631 uint32_t float32_to_uint32(float32 a, float_status *s)
3633 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3636 uint64_t float32_to_uint64(float32 a, float_status *s)
3638 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3641 uint16_t float64_to_uint16(float64 a, float_status *s)
3643 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3646 uint32_t float64_to_uint32(float64 a, float_status *s)
3648 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3651 uint64_t float64_to_uint64(float64 a, float_status *s)
3653 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3656 uint32_t float128_to_uint32(float128 a, float_status *s)
3658 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3661 uint64_t float128_to_uint64(float128 a, float_status *s)
3663 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3666 Int128 float128_to_uint128(float128 a, float_status *s)
3668 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3671 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3673 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3676 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3678 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3681 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3683 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3686 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3688 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3691 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3693 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3696 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3698 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3701 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3703 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3706 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3708 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3711 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3713 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3716 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3718 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3721 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3723 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3726 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3728 return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3731 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3733 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3736 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3738 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3741 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3743 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3746 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3748 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3751 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3753 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3756 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3758 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3762 * Signed integer to floating-point conversions
3765 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3767 FloatParts64 p;
3769 parts_sint_to_float(&p, a, scale, status);
3770 return float16_round_pack_canonical(&p, status);
3773 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3775 return int64_to_float16_scalbn(a, scale, status);
3778 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3780 return int64_to_float16_scalbn(a, scale, status);
3783 float16 int64_to_float16(int64_t a, float_status *status)
3785 return int64_to_float16_scalbn(a, 0, status);
3788 float16 int32_to_float16(int32_t a, float_status *status)
3790 return int64_to_float16_scalbn(a, 0, status);
3793 float16 int16_to_float16(int16_t a, float_status *status)
3795 return int64_to_float16_scalbn(a, 0, status);
3798 float16 int8_to_float16(int8_t a, float_status *status)
3800 return int64_to_float16_scalbn(a, 0, status);
3803 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3805 FloatParts64 p;
3807 /* Without scaling, there are no overflow concerns. */
3808 if (likely(scale == 0) && can_use_fpu(status)) {
3809 union_float32 ur;
3810 ur.h = a;
3811 return ur.s;
3814 parts64_sint_to_float(&p, a, scale, status);
3815 return float32_round_pack_canonical(&p, status);
3818 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3820 return int64_to_float32_scalbn(a, scale, status);
3823 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3825 return int64_to_float32_scalbn(a, scale, status);
3828 float32 int64_to_float32(int64_t a, float_status *status)
3830 return int64_to_float32_scalbn(a, 0, status);
3833 float32 int32_to_float32(int32_t a, float_status *status)
3835 return int64_to_float32_scalbn(a, 0, status);
3838 float32 int16_to_float32(int16_t a, float_status *status)
3840 return int64_to_float32_scalbn(a, 0, status);
3843 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3845 FloatParts64 p;
3847 /* Without scaling, there are no overflow concerns. */
3848 if (likely(scale == 0) && can_use_fpu(status)) {
3849 union_float64 ur;
3850 ur.h = a;
3851 return ur.s;
3854 parts_sint_to_float(&p, a, scale, status);
3855 return float64_round_pack_canonical(&p, status);
3858 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3860 return int64_to_float64_scalbn(a, scale, status);
3863 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3865 return int64_to_float64_scalbn(a, scale, status);
3868 float64 int64_to_float64(int64_t a, float_status *status)
3870 return int64_to_float64_scalbn(a, 0, status);
3873 float64 int32_to_float64(int32_t a, float_status *status)
3875 return int64_to_float64_scalbn(a, 0, status);
3878 float64 int16_to_float64(int16_t a, float_status *status)
3880 return int64_to_float64_scalbn(a, 0, status);
3883 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3885 FloatParts64 p;
3887 parts_sint_to_float(&p, a, scale, status);
3888 return bfloat16_round_pack_canonical(&p, status);
3891 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3893 return int64_to_bfloat16_scalbn(a, scale, status);
3896 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3898 return int64_to_bfloat16_scalbn(a, scale, status);
3901 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3903 return int64_to_bfloat16_scalbn(a, 0, status);
3906 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3908 return int64_to_bfloat16_scalbn(a, 0, status);
3911 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3913 return int64_to_bfloat16_scalbn(a, 0, status);
3916 float128 int128_to_float128(Int128 a, float_status *status)
3918 FloatParts128 p = { };
3919 int shift;
3921 if (int128_nz(a)) {
3922 p.cls = float_class_normal;
3923 if (!int128_nonneg(a)) {
3924 p.sign = true;
3925 a = int128_neg(a);
3928 shift = clz64(int128_gethi(a));
3929 if (shift == 64) {
3930 shift += clz64(int128_getlo(a));
3933 p.exp = 127 - shift;
3934 a = int128_lshift(a, shift);
3936 p.frac_hi = int128_gethi(a);
3937 p.frac_lo = int128_getlo(a);
3938 } else {
3939 p.cls = float_class_zero;
3942 return float128_round_pack_canonical(&p, status);
3945 float128 int64_to_float128(int64_t a, float_status *status)
3947 FloatParts128 p;
3949 parts_sint_to_float(&p, a, 0, status);
3950 return float128_round_pack_canonical(&p, status);
3953 float128 int32_to_float128(int32_t a, float_status *status)
3955 return int64_to_float128(a, status);
3958 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3960 FloatParts128 p;
3962 parts_sint_to_float(&p, a, 0, status);
3963 return floatx80_round_pack_canonical(&p, status);
3966 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3968 return int64_to_floatx80(a, status);
3972 * Unsigned Integer to floating-point conversions
3975 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3977 FloatParts64 p;
3979 parts_uint_to_float(&p, a, scale, status);
3980 return float16_round_pack_canonical(&p, status);
3983 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3985 return uint64_to_float16_scalbn(a, scale, status);
3988 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3990 return uint64_to_float16_scalbn(a, scale, status);
3993 float16 uint64_to_float16(uint64_t a, float_status *status)
3995 return uint64_to_float16_scalbn(a, 0, status);
3998 float16 uint32_to_float16(uint32_t a, float_status *status)
4000 return uint64_to_float16_scalbn(a, 0, status);
4003 float16 uint16_to_float16(uint16_t a, float_status *status)
4005 return uint64_to_float16_scalbn(a, 0, status);
4008 float16 uint8_to_float16(uint8_t a, float_status *status)
4010 return uint64_to_float16_scalbn(a, 0, status);
4013 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4015 FloatParts64 p;
4017 /* Without scaling, there are no overflow concerns. */
4018 if (likely(scale == 0) && can_use_fpu(status)) {
4019 union_float32 ur;
4020 ur.h = a;
4021 return ur.s;
4024 parts_uint_to_float(&p, a, scale, status);
4025 return float32_round_pack_canonical(&p, status);
4028 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4030 return uint64_to_float32_scalbn(a, scale, status);
4033 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4035 return uint64_to_float32_scalbn(a, scale, status);
4038 float32 uint64_to_float32(uint64_t a, float_status *status)
4040 return uint64_to_float32_scalbn(a, 0, status);
4043 float32 uint32_to_float32(uint32_t a, float_status *status)
4045 return uint64_to_float32_scalbn(a, 0, status);
4048 float32 uint16_to_float32(uint16_t a, float_status *status)
4050 return uint64_to_float32_scalbn(a, 0, status);
4053 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4055 FloatParts64 p;
4057 /* Without scaling, there are no overflow concerns. */
4058 if (likely(scale == 0) && can_use_fpu(status)) {
4059 union_float64 ur;
4060 ur.h = a;
4061 return ur.s;
4064 parts_uint_to_float(&p, a, scale, status);
4065 return float64_round_pack_canonical(&p, status);
4068 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4070 return uint64_to_float64_scalbn(a, scale, status);
4073 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4075 return uint64_to_float64_scalbn(a, scale, status);
4078 float64 uint64_to_float64(uint64_t a, float_status *status)
4080 return uint64_to_float64_scalbn(a, 0, status);
4083 float64 uint32_to_float64(uint32_t a, float_status *status)
4085 return uint64_to_float64_scalbn(a, 0, status);
4088 float64 uint16_to_float64(uint16_t a, float_status *status)
4090 return uint64_to_float64_scalbn(a, 0, status);
4093 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4095 FloatParts64 p;
4097 parts_uint_to_float(&p, a, scale, status);
4098 return bfloat16_round_pack_canonical(&p, status);
4101 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4103 return uint64_to_bfloat16_scalbn(a, scale, status);
4106 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4108 return uint64_to_bfloat16_scalbn(a, scale, status);
4111 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4113 return uint64_to_bfloat16_scalbn(a, 0, status);
4116 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4118 return uint64_to_bfloat16_scalbn(a, 0, status);
4121 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4123 return uint64_to_bfloat16_scalbn(a, 0, status);
4126 float128 uint64_to_float128(uint64_t a, float_status *status)
4128 FloatParts128 p;
4130 parts_uint_to_float(&p, a, 0, status);
4131 return float128_round_pack_canonical(&p, status);
4134 float128 uint128_to_float128(Int128 a, float_status *status)
4136 FloatParts128 p = { };
4137 int shift;
4139 if (int128_nz(a)) {
4140 p.cls = float_class_normal;
4142 shift = clz64(int128_gethi(a));
4143 if (shift == 64) {
4144 shift += clz64(int128_getlo(a));
4147 p.exp = 127 - shift;
4148 a = int128_lshift(a, shift);
4150 p.frac_hi = int128_gethi(a);
4151 p.frac_lo = int128_getlo(a);
4152 } else {
4153 p.cls = float_class_zero;
4156 return float128_round_pack_canonical(&p, status);
4160 * Minimum and maximum
4163 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4165 FloatParts64 pa, pb, *pr;
4167 float16_unpack_canonical(&pa, a, s);
4168 float16_unpack_canonical(&pb, b, s);
4169 pr = parts_minmax(&pa, &pb, s, flags);
4171 return float16_round_pack_canonical(pr, s);
4174 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4175 float_status *s, int flags)
4177 FloatParts64 pa, pb, *pr;
4179 bfloat16_unpack_canonical(&pa, a, s);
4180 bfloat16_unpack_canonical(&pb, b, s);
4181 pr = parts_minmax(&pa, &pb, s, flags);
4183 return bfloat16_round_pack_canonical(pr, s);
4186 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4188 FloatParts64 pa, pb, *pr;
4190 float32_unpack_canonical(&pa, a, s);
4191 float32_unpack_canonical(&pb, b, s);
4192 pr = parts_minmax(&pa, &pb, s, flags);
4194 return float32_round_pack_canonical(pr, s);
4197 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4199 FloatParts64 pa, pb, *pr;
4201 float64_unpack_canonical(&pa, a, s);
4202 float64_unpack_canonical(&pb, b, s);
4203 pr = parts_minmax(&pa, &pb, s, flags);
4205 return float64_round_pack_canonical(pr, s);
4208 static float128 float128_minmax(float128 a, float128 b,
4209 float_status *s, int flags)
4211 FloatParts128 pa, pb, *pr;
4213 float128_unpack_canonical(&pa, a, s);
4214 float128_unpack_canonical(&pb, b, s);
4215 pr = parts_minmax(&pa, &pb, s, flags);
4217 return float128_round_pack_canonical(pr, s);
4220 #define MINMAX_1(type, name, flags) \
4221 type type##_##name(type a, type b, float_status *s) \
4222 { return type##_minmax(a, b, s, flags); }
4224 #define MINMAX_2(type) \
4225 MINMAX_1(type, max, 0) \
4226 MINMAX_1(type, maxnum, minmax_isnum) \
4227 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4228 MINMAX_1(type, maximum_number, minmax_isnumber) \
4229 MINMAX_1(type, min, minmax_ismin) \
4230 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4231 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4232 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
4234 MINMAX_2(float16)
4235 MINMAX_2(bfloat16)
4236 MINMAX_2(float32)
4237 MINMAX_2(float64)
4238 MINMAX_2(float128)
4240 #undef MINMAX_1
4241 #undef MINMAX_2
4244 * Floating point compare
4247 static FloatRelation QEMU_FLATTEN
4248 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4250 FloatParts64 pa, pb;
4252 float16_unpack_canonical(&pa, a, s);
4253 float16_unpack_canonical(&pb, b, s);
4254 return parts_compare(&pa, &pb, s, is_quiet);
4257 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4259 return float16_do_compare(a, b, s, false);
4262 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4264 return float16_do_compare(a, b, s, true);
4267 static FloatRelation QEMU_SOFTFLOAT_ATTR
4268 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4270 FloatParts64 pa, pb;
4272 float32_unpack_canonical(&pa, a, s);
4273 float32_unpack_canonical(&pb, b, s);
4274 return parts_compare(&pa, &pb, s, is_quiet);
4277 static FloatRelation QEMU_FLATTEN
4278 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4280 union_float32 ua, ub;
4282 ua.s = xa;
4283 ub.s = xb;
4285 if (QEMU_NO_HARDFLOAT) {
4286 goto soft;
4289 float32_input_flush2(&ua.s, &ub.s, s);
4290 if (isgreaterequal(ua.h, ub.h)) {
4291 if (isgreater(ua.h, ub.h)) {
4292 return float_relation_greater;
4294 return float_relation_equal;
4296 if (likely(isless(ua.h, ub.h))) {
4297 return float_relation_less;
4300 * The only condition remaining is unordered.
4301 * Fall through to set flags.
4303 soft:
4304 return float32_do_compare(ua.s, ub.s, s, is_quiet);
4307 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4309 return float32_hs_compare(a, b, s, false);
4312 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4314 return float32_hs_compare(a, b, s, true);
4317 static FloatRelation QEMU_SOFTFLOAT_ATTR
4318 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4320 FloatParts64 pa, pb;
4322 float64_unpack_canonical(&pa, a, s);
4323 float64_unpack_canonical(&pb, b, s);
4324 return parts_compare(&pa, &pb, s, is_quiet);
4327 static FloatRelation QEMU_FLATTEN
4328 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4330 union_float64 ua, ub;
4332 ua.s = xa;
4333 ub.s = xb;
4335 if (QEMU_NO_HARDFLOAT) {
4336 goto soft;
4339 float64_input_flush2(&ua.s, &ub.s, s);
4340 if (isgreaterequal(ua.h, ub.h)) {
4341 if (isgreater(ua.h, ub.h)) {
4342 return float_relation_greater;
4344 return float_relation_equal;
4346 if (likely(isless(ua.h, ub.h))) {
4347 return float_relation_less;
4350 * The only condition remaining is unordered.
4351 * Fall through to set flags.
4353 soft:
4354 return float64_do_compare(ua.s, ub.s, s, is_quiet);
4357 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4359 return float64_hs_compare(a, b, s, false);
4362 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4364 return float64_hs_compare(a, b, s, true);
4367 static FloatRelation QEMU_FLATTEN
4368 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4370 FloatParts64 pa, pb;
4372 bfloat16_unpack_canonical(&pa, a, s);
4373 bfloat16_unpack_canonical(&pb, b, s);
4374 return parts_compare(&pa, &pb, s, is_quiet);
4377 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4379 return bfloat16_do_compare(a, b, s, false);
4382 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4384 return bfloat16_do_compare(a, b, s, true);
4387 static FloatRelation QEMU_FLATTEN
4388 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4390 FloatParts128 pa, pb;
4392 float128_unpack_canonical(&pa, a, s);
4393 float128_unpack_canonical(&pb, b, s);
4394 return parts_compare(&pa, &pb, s, is_quiet);
4397 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4399 return float128_do_compare(a, b, s, false);
4402 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4404 return float128_do_compare(a, b, s, true);
4407 static FloatRelation QEMU_FLATTEN
4408 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4410 FloatParts128 pa, pb;
4412 if (!floatx80_unpack_canonical(&pa, a, s) ||
4413 !floatx80_unpack_canonical(&pb, b, s)) {
4414 return float_relation_unordered;
4416 return parts_compare(&pa, &pb, s, is_quiet);
4419 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4421 return floatx80_do_compare(a, b, s, false);
4424 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4426 return floatx80_do_compare(a, b, s, true);
4430 * Scale by 2**N
4433 float16 float16_scalbn(float16 a, int n, float_status *status)
4435 FloatParts64 p;
4437 float16_unpack_canonical(&p, a, status);
4438 parts_scalbn(&p, n, status);
4439 return float16_round_pack_canonical(&p, status);
4442 float32 float32_scalbn(float32 a, int n, float_status *status)
4444 FloatParts64 p;
4446 float32_unpack_canonical(&p, a, status);
4447 parts_scalbn(&p, n, status);
4448 return float32_round_pack_canonical(&p, status);
4451 float64 float64_scalbn(float64 a, int n, float_status *status)
4453 FloatParts64 p;
4455 float64_unpack_canonical(&p, a, status);
4456 parts_scalbn(&p, n, status);
4457 return float64_round_pack_canonical(&p, status);
4460 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4462 FloatParts64 p;
4464 bfloat16_unpack_canonical(&p, a, status);
4465 parts_scalbn(&p, n, status);
4466 return bfloat16_round_pack_canonical(&p, status);
4469 float128 float128_scalbn(float128 a, int n, float_status *status)
4471 FloatParts128 p;
4473 float128_unpack_canonical(&p, a, status);
4474 parts_scalbn(&p, n, status);
4475 return float128_round_pack_canonical(&p, status);
4478 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4480 FloatParts128 p;
4482 if (!floatx80_unpack_canonical(&p, a, status)) {
4483 return floatx80_default_nan(status);
4485 parts_scalbn(&p, n, status);
4486 return floatx80_round_pack_canonical(&p, status);
4490 * Square Root
4493 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4495 FloatParts64 p;
4497 float16_unpack_canonical(&p, a, status);
4498 parts_sqrt(&p, status, &float16_params);
4499 return float16_round_pack_canonical(&p, status);
4502 static float32 QEMU_SOFTFLOAT_ATTR
4503 soft_f32_sqrt(float32 a, float_status *status)
4505 FloatParts64 p;
4507 float32_unpack_canonical(&p, a, status);
4508 parts_sqrt(&p, status, &float32_params);
4509 return float32_round_pack_canonical(&p, status);
4512 static float64 QEMU_SOFTFLOAT_ATTR
4513 soft_f64_sqrt(float64 a, float_status *status)
4515 FloatParts64 p;
4517 float64_unpack_canonical(&p, a, status);
4518 parts_sqrt(&p, status, &float64_params);
4519 return float64_round_pack_canonical(&p, status);
4522 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4524 union_float32 ua, ur;
4526 ua.s = xa;
4527 if (unlikely(!can_use_fpu(s))) {
4528 goto soft;
4531 float32_input_flush1(&ua.s, s);
4532 if (QEMU_HARDFLOAT_1F32_USE_FP) {
4533 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4534 fpclassify(ua.h) == FP_ZERO) ||
4535 signbit(ua.h))) {
4536 goto soft;
4538 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4539 float32_is_neg(ua.s))) {
4540 goto soft;
4542 ur.h = sqrtf(ua.h);
4543 return ur.s;
4545 soft:
4546 return soft_f32_sqrt(ua.s, s);
4549 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4551 union_float64 ua, ur;
4553 ua.s = xa;
4554 if (unlikely(!can_use_fpu(s))) {
4555 goto soft;
4558 float64_input_flush1(&ua.s, s);
4559 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4560 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4561 fpclassify(ua.h) == FP_ZERO) ||
4562 signbit(ua.h))) {
4563 goto soft;
4565 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4566 float64_is_neg(ua.s))) {
4567 goto soft;
4569 ur.h = sqrt(ua.h);
4570 return ur.s;
4572 soft:
4573 return soft_f64_sqrt(ua.s, s);
4576 float64 float64r32_sqrt(float64 a, float_status *status)
4578 FloatParts64 p;
4580 float64_unpack_canonical(&p, a, status);
4581 parts_sqrt(&p, status, &float64_params);
4582 return float64r32_round_pack_canonical(&p, status);
4585 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4587 FloatParts64 p;
4589 bfloat16_unpack_canonical(&p, a, status);
4590 parts_sqrt(&p, status, &bfloat16_params);
4591 return bfloat16_round_pack_canonical(&p, status);
4594 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4596 FloatParts128 p;
4598 float128_unpack_canonical(&p, a, status);
4599 parts_sqrt(&p, status, &float128_params);
4600 return float128_round_pack_canonical(&p, status);
4603 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4605 FloatParts128 p;
4607 if (!floatx80_unpack_canonical(&p, a, s)) {
4608 return floatx80_default_nan(s);
4610 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4611 return floatx80_round_pack_canonical(&p, s);
4615 * log2
4617 float32 float32_log2(float32 a, float_status *status)
4619 FloatParts64 p;
4621 float32_unpack_canonical(&p, a, status);
4622 parts_log2(&p, status, &float32_params);
4623 return float32_round_pack_canonical(&p, status);
4626 float64 float64_log2(float64 a, float_status *status)
4628 FloatParts64 p;
4630 float64_unpack_canonical(&p, a, status);
4631 parts_log2(&p, status, &float64_params);
4632 return float64_round_pack_canonical(&p, status);
4635 /*----------------------------------------------------------------------------
4636 | The pattern for a default generated NaN.
4637 *----------------------------------------------------------------------------*/
4639 float16 float16_default_nan(float_status *status)
4641 FloatParts64 p;
4643 parts_default_nan(&p, status);
4644 p.frac >>= float16_params.frac_shift;
4645 return float16_pack_raw(&p);
4648 float32 float32_default_nan(float_status *status)
4650 FloatParts64 p;
4652 parts_default_nan(&p, status);
4653 p.frac >>= float32_params.frac_shift;
4654 return float32_pack_raw(&p);
4657 float64 float64_default_nan(float_status *status)
4659 FloatParts64 p;
4661 parts_default_nan(&p, status);
4662 p.frac >>= float64_params.frac_shift;
4663 return float64_pack_raw(&p);
4666 float128 float128_default_nan(float_status *status)
4668 FloatParts128 p;
4670 parts_default_nan(&p, status);
4671 frac_shr(&p, float128_params.frac_shift);
4672 return float128_pack_raw(&p);
4675 bfloat16 bfloat16_default_nan(float_status *status)
4677 FloatParts64 p;
4679 parts_default_nan(&p, status);
4680 p.frac >>= bfloat16_params.frac_shift;
4681 return bfloat16_pack_raw(&p);
4684 /*----------------------------------------------------------------------------
4685 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4686 *----------------------------------------------------------------------------*/
4688 float16 float16_silence_nan(float16 a, float_status *status)
4690 FloatParts64 p;
4692 float16_unpack_raw(&p, a);
4693 p.frac <<= float16_params.frac_shift;
4694 parts_silence_nan(&p, status);
4695 p.frac >>= float16_params.frac_shift;
4696 return float16_pack_raw(&p);
4699 float32 float32_silence_nan(float32 a, float_status *status)
4701 FloatParts64 p;
4703 float32_unpack_raw(&p, a);
4704 p.frac <<= float32_params.frac_shift;
4705 parts_silence_nan(&p, status);
4706 p.frac >>= float32_params.frac_shift;
4707 return float32_pack_raw(&p);
4710 float64 float64_silence_nan(float64 a, float_status *status)
4712 FloatParts64 p;
4714 float64_unpack_raw(&p, a);
4715 p.frac <<= float64_params.frac_shift;
4716 parts_silence_nan(&p, status);
4717 p.frac >>= float64_params.frac_shift;
4718 return float64_pack_raw(&p);
4721 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4723 FloatParts64 p;
4725 bfloat16_unpack_raw(&p, a);
4726 p.frac <<= bfloat16_params.frac_shift;
4727 parts_silence_nan(&p, status);
4728 p.frac >>= bfloat16_params.frac_shift;
4729 return bfloat16_pack_raw(&p);
4732 float128 float128_silence_nan(float128 a, float_status *status)
4734 FloatParts128 p;
4736 float128_unpack_raw(&p, a);
4737 frac_shl(&p, float128_params.frac_shift);
4738 parts_silence_nan(&p, status);
4739 frac_shr(&p, float128_params.frac_shift);
4740 return float128_pack_raw(&p);
4743 /*----------------------------------------------------------------------------
4744 | If `a' is denormal and we are in flush-to-zero mode then set the
4745 | input-denormal exception and return zero. Otherwise just return the value.
4746 *----------------------------------------------------------------------------*/
4748 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4750 if (p.exp == 0 && p.frac != 0) {
4751 float_raise(float_flag_input_denormal, status);
4752 return true;
4755 return false;
4758 float16 float16_squash_input_denormal(float16 a, float_status *status)
4760 if (status->flush_inputs_to_zero) {
4761 FloatParts64 p;
4763 float16_unpack_raw(&p, a);
4764 if (parts_squash_denormal(p, status)) {
4765 return float16_set_sign(float16_zero, p.sign);
4768 return a;
4771 float32 float32_squash_input_denormal(float32 a, float_status *status)
4773 if (status->flush_inputs_to_zero) {
4774 FloatParts64 p;
4776 float32_unpack_raw(&p, a);
4777 if (parts_squash_denormal(p, status)) {
4778 return float32_set_sign(float32_zero, p.sign);
4781 return a;
4784 float64 float64_squash_input_denormal(float64 a, float_status *status)
4786 if (status->flush_inputs_to_zero) {
4787 FloatParts64 p;
4789 float64_unpack_raw(&p, a);
4790 if (parts_squash_denormal(p, status)) {
4791 return float64_set_sign(float64_zero, p.sign);
4794 return a;
4797 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4799 if (status->flush_inputs_to_zero) {
4800 FloatParts64 p;
4802 bfloat16_unpack_raw(&p, a);
4803 if (parts_squash_denormal(p, status)) {
4804 return bfloat16_set_sign(bfloat16_zero, p.sign);
4807 return a;
4810 /*----------------------------------------------------------------------------
4811 | Normalizes the subnormal extended double-precision floating-point value
4812 | represented by the denormalized significand `aSig'. The normalized exponent
4813 | and significand are stored at the locations pointed to by `zExpPtr' and
4814 | `zSigPtr', respectively.
4815 *----------------------------------------------------------------------------*/
4817 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4818 uint64_t *zSigPtr)
4820 int8_t shiftCount;
4822 shiftCount = clz64(aSig);
4823 *zSigPtr = aSig<<shiftCount;
4824 *zExpPtr = 1 - shiftCount;
4827 /*----------------------------------------------------------------------------
4828 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4829 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4830 | and returns the proper extended double-precision floating-point value
4831 | corresponding to the abstract input. Ordinarily, the abstract value is
4832 | rounded and packed into the extended double-precision format, with the
4833 | inexact exception raised if the abstract input cannot be represented
4834 | exactly. However, if the abstract value is too large, the overflow and
4835 | inexact exceptions are raised and an infinity or maximal finite value is
4836 | returned. If the abstract value is too small, the input value is rounded to
4837 | a subnormal number, and the underflow and inexact exceptions are raised if
4838 | the abstract input cannot be represented exactly as a subnormal extended
4839 | double-precision floating-point number.
4840 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4841 | the result is rounded to the same number of bits as single or double
4842 | precision, respectively. Otherwise, the result is rounded to the full
4843 | precision of the extended double-precision format.
4844 | The input significand must be normalized or smaller. If the input
4845 | significand is not normalized, `zExp' must be 0; in that case, the result
4846 | returned is a subnormal number, and it must not require rounding. The
4847 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4848 | Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4851 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4852 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4853 float_status *status)
4855 FloatRoundMode roundingMode;
4856 bool roundNearestEven, increment, isTiny;
4857 int64_t roundIncrement, roundMask, roundBits;
4859 roundingMode = status->float_rounding_mode;
4860 roundNearestEven = ( roundingMode == float_round_nearest_even );
4861 switch (roundingPrecision) {
4862 case floatx80_precision_x:
4863 goto precision80;
4864 case floatx80_precision_d:
4865 roundIncrement = UINT64_C(0x0000000000000400);
4866 roundMask = UINT64_C(0x00000000000007FF);
4867 break;
4868 case floatx80_precision_s:
4869 roundIncrement = UINT64_C(0x0000008000000000);
4870 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4871 break;
4872 default:
4873 g_assert_not_reached();
4875 zSig0 |= ( zSig1 != 0 );
4876 switch (roundingMode) {
4877 case float_round_nearest_even:
4878 case float_round_ties_away:
4879 break;
4880 case float_round_to_zero:
4881 roundIncrement = 0;
4882 break;
4883 case float_round_up:
4884 roundIncrement = zSign ? 0 : roundMask;
4885 break;
4886 case float_round_down:
4887 roundIncrement = zSign ? roundMask : 0;
4888 break;
4889 default:
4890 abort();
4892 roundBits = zSig0 & roundMask;
4893 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4894 if ( ( 0x7FFE < zExp )
4895 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4897 goto overflow;
4899 if ( zExp <= 0 ) {
4900 if (status->flush_to_zero) {
4901 float_raise(float_flag_output_denormal, status);
4902 return packFloatx80(zSign, 0, 0);
4904 isTiny = status->tininess_before_rounding
4905 || (zExp < 0 )
4906 || (zSig0 <= zSig0 + roundIncrement);
4907 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4908 zExp = 0;
4909 roundBits = zSig0 & roundMask;
4910 if (isTiny && roundBits) {
4911 float_raise(float_flag_underflow, status);
4913 if (roundBits) {
4914 float_raise(float_flag_inexact, status);
4916 zSig0 += roundIncrement;
4917 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4918 roundIncrement = roundMask + 1;
4919 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4920 roundMask |= roundIncrement;
4922 zSig0 &= ~ roundMask;
4923 return packFloatx80( zSign, zExp, zSig0 );
4926 if (roundBits) {
4927 float_raise(float_flag_inexact, status);
4929 zSig0 += roundIncrement;
4930 if ( zSig0 < roundIncrement ) {
4931 ++zExp;
4932 zSig0 = UINT64_C(0x8000000000000000);
4934 roundIncrement = roundMask + 1;
4935 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4936 roundMask |= roundIncrement;
4938 zSig0 &= ~ roundMask;
4939 if ( zSig0 == 0 ) zExp = 0;
4940 return packFloatx80( zSign, zExp, zSig0 );
4941 precision80:
4942 switch (roundingMode) {
4943 case float_round_nearest_even:
4944 case float_round_ties_away:
4945 increment = ((int64_t)zSig1 < 0);
4946 break;
4947 case float_round_to_zero:
4948 increment = 0;
4949 break;
4950 case float_round_up:
4951 increment = !zSign && zSig1;
4952 break;
4953 case float_round_down:
4954 increment = zSign && zSig1;
4955 break;
4956 default:
4957 abort();
4959 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4960 if ( ( 0x7FFE < zExp )
4961 || ( ( zExp == 0x7FFE )
4962 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4963 && increment
4966 roundMask = 0;
4967 overflow:
4968 float_raise(float_flag_overflow | float_flag_inexact, status);
4969 if ( ( roundingMode == float_round_to_zero )
4970 || ( zSign && ( roundingMode == float_round_up ) )
4971 || ( ! zSign && ( roundingMode == float_round_down ) )
4973 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4975 return packFloatx80(zSign,
4976 floatx80_infinity_high,
4977 floatx80_infinity_low);
4979 if ( zExp <= 0 ) {
4980 isTiny = status->tininess_before_rounding
4981 || (zExp < 0)
4982 || !increment
4983 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4984 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4985 zExp = 0;
4986 if (isTiny && zSig1) {
4987 float_raise(float_flag_underflow, status);
4989 if (zSig1) {
4990 float_raise(float_flag_inexact, status);
4992 switch (roundingMode) {
4993 case float_round_nearest_even:
4994 case float_round_ties_away:
4995 increment = ((int64_t)zSig1 < 0);
4996 break;
4997 case float_round_to_zero:
4998 increment = 0;
4999 break;
5000 case float_round_up:
5001 increment = !zSign && zSig1;
5002 break;
5003 case float_round_down:
5004 increment = zSign && zSig1;
5005 break;
5006 default:
5007 abort();
5009 if ( increment ) {
5010 ++zSig0;
5011 if (!(zSig1 << 1) && roundNearestEven) {
5012 zSig0 &= ~1;
5014 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5016 return packFloatx80( zSign, zExp, zSig0 );
5019 if (zSig1) {
5020 float_raise(float_flag_inexact, status);
5022 if ( increment ) {
5023 ++zSig0;
5024 if ( zSig0 == 0 ) {
5025 ++zExp;
5026 zSig0 = UINT64_C(0x8000000000000000);
5028 else {
5029 if (!(zSig1 << 1) && roundNearestEven) {
5030 zSig0 &= ~1;
5034 else {
5035 if ( zSig0 == 0 ) zExp = 0;
5037 return packFloatx80( zSign, zExp, zSig0 );
5041 /*----------------------------------------------------------------------------
5042 | Takes an abstract floating-point value having sign `zSign', exponent
5043 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5044 | and returns the proper extended double-precision floating-point value
5045 | corresponding to the abstract input. This routine is just like
5046 | `roundAndPackFloatx80' except that the input significand does not have to be
5047 | normalized.
5048 *----------------------------------------------------------------------------*/
5050 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5051 bool zSign, int32_t zExp,
5052 uint64_t zSig0, uint64_t zSig1,
5053 float_status *status)
5055 int8_t shiftCount;
5057 if ( zSig0 == 0 ) {
5058 zSig0 = zSig1;
5059 zSig1 = 0;
5060 zExp -= 64;
5062 shiftCount = clz64(zSig0);
5063 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5064 zExp -= shiftCount;
5065 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5066 zSig0, zSig1, status);
5070 /*----------------------------------------------------------------------------
5071 | Returns the binary exponential of the single-precision floating-point value
5072 | `a'. The operation is performed according to the IEC/IEEE Standard for
5073 | Binary Floating-Point Arithmetic.
5075 | Uses the following identities:
5077 | 1. -------------------------------------------------------------------------
5078 | x x*ln(2)
5079 | 2 = e
5081 | 2. -------------------------------------------------------------------------
5082 | 2 3 4 5 n
5083 | x x x x x x x
5084 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5085 | 1! 2! 3! 4! 5! n!
5086 *----------------------------------------------------------------------------*/
5088 static const float64 float32_exp2_coefficients[15] =
5090 const_float64( 0x3ff0000000000000ll ), /* 1 */
5091 const_float64( 0x3fe0000000000000ll ), /* 2 */
5092 const_float64( 0x3fc5555555555555ll ), /* 3 */
5093 const_float64( 0x3fa5555555555555ll ), /* 4 */
5094 const_float64( 0x3f81111111111111ll ), /* 5 */
5095 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5096 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5097 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5098 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5099 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5100 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5101 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5102 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5103 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5104 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5107 float32 float32_exp2(float32 a, float_status *status)
5109 FloatParts64 xp, xnp, tp, rp;
5110 int i;
5112 float32_unpack_canonical(&xp, a, status);
5113 if (unlikely(xp.cls != float_class_normal)) {
5114 switch (xp.cls) {
5115 case float_class_snan:
5116 case float_class_qnan:
5117 parts_return_nan(&xp, status);
5118 return float32_round_pack_canonical(&xp, status);
5119 case float_class_inf:
5120 return xp.sign ? float32_zero : a;
5121 case float_class_zero:
5122 return float32_one;
5123 default:
5124 break;
5126 g_assert_not_reached();
5129 float_raise(float_flag_inexact, status);
5131 float64_unpack_canonical(&tp, float64_ln2, status);
5132 xp = *parts_mul(&xp, &tp, status);
5133 xnp = xp;
5135 float64_unpack_canonical(&rp, float64_one, status);
5136 for (i = 0 ; i < 15 ; i++) {
5137 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5138 rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5139 xnp = *parts_mul(&xnp, &xp, status);
5142 return float32_round_pack_canonical(&rp, status);
5145 /*----------------------------------------------------------------------------
5146 | Rounds the extended double-precision floating-point value `a'
5147 | to the precision provided by floatx80_rounding_precision and returns the
5148 | result as an extended double-precision floating-point value.
5149 | The operation is performed according to the IEC/IEEE Standard for Binary
5150 | Floating-Point Arithmetic.
5151 *----------------------------------------------------------------------------*/
5153 floatx80 floatx80_round(floatx80 a, float_status *status)
5155 FloatParts128 p;
5157 if (!floatx80_unpack_canonical(&p, a, status)) {
5158 return floatx80_default_nan(status);
5160 return floatx80_round_pack_canonical(&p, status);
5163 static void __attribute__((constructor)) softfloat_init(void)
5165 union_float64 ua, ub, uc, ur;
5167 if (QEMU_NO_HARDFLOAT) {
5168 return;
5171 * Test that the host's FMA is not obviously broken. For example,
5172 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5173 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5175 ua.s = 0x0020000000000001ULL;
5176 ub.s = 0x3ca0000000000000ULL;
5177 uc.s = 0x0020000000000000ULL;
5178 ur.h = fma(ua.h, ub.h, uc.h);
5179 if (ur.s != 0x0020000000000001ULL) {
5180 force_soft_fma = true;