4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
409 typedef enum __attribute__ ((__packed__
)) {
410 float_class_unclassified
,
414 float_class_qnan
, /* all NaNs from here */
418 #define float_cmask(bit) (1u << (bit))
421 float_cmask_zero
= float_cmask(float_class_zero
),
422 float_cmask_normal
= float_cmask(float_class_normal
),
423 float_cmask_inf
= float_cmask(float_class_inf
),
424 float_cmask_qnan
= float_cmask(float_class_qnan
),
425 float_cmask_snan
= float_cmask(float_class_snan
),
427 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
428 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
431 /* Flags for parts_minmax. */
433 /* Set for minimum; clear for maximum. */
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
449 return unlikely(c
>= float_class_qnan
);
452 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
454 return c
== float_class_snan
;
457 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
459 return c
== float_class_qnan
;
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
476 /* Routines that know the structure may reference the singular name. */
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
501 uint64_t frac_hm
; /* high-middle */
502 uint64_t frac_lm
; /* low-middle */
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT 63
508 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * round_mask: bits below lsb which must be rounded
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
532 /* Expand fields based on the size of exponent and fraction */
533 #define FLOAT_PARAMS_(E) \
535 .exp_bias = ((1 << E) - 1) >> 1, \
536 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \
537 .exp_max = (1 << E) - 1
539 #define FLOAT_PARAMS(E, F) \
542 .frac_shift = (-F - 1) & 63, \
543 .round_mask = (1ull << ((-F - 1) & 63)) - 1
545 static const FloatFmt float16_params
= {
549 static const FloatFmt float16_params_ahp
= {
554 static const FloatFmt bfloat16_params
= {
558 static const FloatFmt float32_params
= {
562 static const FloatFmt float64_params
= {
566 static const FloatFmt float128_params
= {
567 FLOAT_PARAMS(15, 112)
570 #define FLOATX80_PARAMS(R) \
572 .frac_size = R == 64 ? 63 : R, \
574 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
576 static const FloatFmt floatx80_params
[3] = {
577 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
578 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
579 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
582 /* Unpack a float to parts, but do not canonicalize. */
583 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
585 const int f_size
= fmt
->frac_size
;
586 const int e_size
= fmt
->exp_size
;
588 *r
= (FloatParts64
) {
589 .cls
= float_class_unclassified
,
590 .sign
= extract64(raw
, f_size
+ e_size
, 1),
591 .exp
= extract64(raw
, f_size
, e_size
),
592 .frac
= extract64(raw
, 0, f_size
)
596 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
598 unpack_raw64(p
, &float16_params
, f
);
601 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
603 unpack_raw64(p
, &bfloat16_params
, f
);
606 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
608 unpack_raw64(p
, &float32_params
, f
);
611 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
613 unpack_raw64(p
, &float64_params
, f
);
616 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
618 *p
= (FloatParts128
) {
619 .cls
= float_class_unclassified
,
620 .sign
= extract32(f
.high
, 15, 1),
621 .exp
= extract32(f
.high
, 0, 15),
626 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
628 const int f_size
= float128_params
.frac_size
- 64;
629 const int e_size
= float128_params
.exp_size
;
631 *p
= (FloatParts128
) {
632 .cls
= float_class_unclassified
,
633 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
634 .exp
= extract64(f
.high
, f_size
, e_size
),
635 .frac_hi
= extract64(f
.high
, 0, f_size
),
640 /* Pack a float from parts, but do not canonicalize. */
641 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
643 const int f_size
= fmt
->frac_size
;
644 const int e_size
= fmt
->exp_size
;
647 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
648 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
649 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
653 static inline float16
float16_pack_raw(const FloatParts64
*p
)
655 return make_float16(pack_raw64(p
, &float16_params
));
658 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
660 return pack_raw64(p
, &bfloat16_params
);
663 static inline float32
float32_pack_raw(const FloatParts64
*p
)
665 return make_float32(pack_raw64(p
, &float32_params
));
668 static inline float64
float64_pack_raw(const FloatParts64
*p
)
670 return make_float64(pack_raw64(p
, &float64_params
));
673 static float128
float128_pack_raw(const FloatParts128
*p
)
675 const int f_size
= float128_params
.frac_size
- 64;
676 const int e_size
= float128_params
.exp_size
;
679 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
680 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
681 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
682 return make_float128(hi
, p
->frac_lo
);
685 /*----------------------------------------------------------------------------
686 | Functions and definitions to determine: (1) whether tininess for underflow
687 | is detected before or after rounding by default, (2) what (if anything)
688 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
689 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
690 | are propagated from function inputs to output. These details are target-
692 *----------------------------------------------------------------------------*/
693 #include "softfloat-specialize.c.inc"
695 #define PARTS_GENERIC_64_128(NAME, P) \
696 _Generic((P), FloatParts64 *: parts64_##NAME, \
697 FloatParts128 *: parts128_##NAME)
699 #define PARTS_GENERIC_64_128_256(NAME, P) \
700 _Generic((P), FloatParts64 *: parts64_##NAME, \
701 FloatParts128 *: parts128_##NAME, \
702 FloatParts256 *: parts256_##NAME)
704 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
705 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
707 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
708 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
710 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
712 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
714 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
717 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
719 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
720 FloatParts64
*c
, float_status
*s
,
721 int ab_mask
, int abc_mask
);
722 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
726 int ab_mask
, int abc_mask
);
728 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
729 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
731 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
732 const FloatFmt
*fmt
);
733 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
734 const FloatFmt
*fmt
);
736 #define parts_canonicalize(A, S, F) \
737 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
739 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
740 const FloatFmt
*fmt
);
741 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
742 const FloatFmt
*fmt
);
744 #define parts_uncanon_normal(A, S, F) \
745 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
747 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
748 const FloatFmt
*fmt
);
749 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
750 const FloatFmt
*fmt
);
752 #define parts_uncanon(A, S, F) \
753 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
755 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
756 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
757 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
759 #define parts_add_normal(A, B) \
760 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
762 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
763 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
764 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
766 #define parts_sub_normal(A, B) \
767 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
769 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
770 float_status
*s
, bool subtract
);
771 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
772 float_status
*s
, bool subtract
);
774 #define parts_addsub(A, B, S, Z) \
775 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
777 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
779 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
782 #define parts_mul(A, B, S) \
783 PARTS_GENERIC_64_128(mul, A)(A, B, S)
785 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
786 FloatParts64
*c
, int flags
,
788 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
789 FloatParts128
*c
, int flags
,
792 #define parts_muladd(A, B, C, Z, S) \
793 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
795 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
797 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
800 #define parts_div(A, B, S) \
801 PARTS_GENERIC_64_128(div, A)(A, B, S)
803 static FloatParts64
*parts64_modrem(FloatParts64
*a
, FloatParts64
*b
,
804 uint64_t *mod_quot
, float_status
*s
);
805 static FloatParts128
*parts128_modrem(FloatParts128
*a
, FloatParts128
*b
,
806 uint64_t *mod_quot
, float_status
*s
);
808 #define parts_modrem(A, B, Q, S) \
809 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
811 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
812 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
814 #define parts_sqrt(A, S, F) \
815 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
817 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
818 int scale
, int frac_size
);
819 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
820 int scale
, int frac_size
);
822 #define parts_round_to_int_normal(A, R, C, F) \
823 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
825 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
826 int scale
, float_status
*s
,
827 const FloatFmt
*fmt
);
828 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
829 int scale
, float_status
*s
,
830 const FloatFmt
*fmt
);
832 #define parts_round_to_int(A, R, C, S, F) \
833 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
835 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
836 int scale
, int64_t min
, int64_t max
,
838 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
839 int scale
, int64_t min
, int64_t max
,
842 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
843 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
845 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
846 int scale
, uint64_t max
,
848 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
849 int scale
, uint64_t max
,
852 #define parts_float_to_uint(P, R, Z, M, S) \
853 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
855 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
856 int scale
, float_status
*s
);
857 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
858 int scale
, float_status
*s
);
860 #define parts_sint_to_float(P, I, Z, S) \
861 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
863 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
864 int scale
, float_status
*s
);
865 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
866 int scale
, float_status
*s
);
868 #define parts_uint_to_float(P, I, Z, S) \
869 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
871 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
872 float_status
*s
, int flags
);
873 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
874 float_status
*s
, int flags
);
876 #define parts_minmax(A, B, S, F) \
877 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
879 static FloatRelation
parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
880 float_status
*s
, bool q
);
881 static FloatRelation
parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
882 float_status
*s
, bool q
);
884 #define parts_compare(A, B, S, Q) \
885 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
887 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
888 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
890 #define parts_scalbn(A, N, S) \
891 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
893 static void parts64_log2(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
894 static void parts128_log2(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
896 #define parts_log2(A, S, F) \
897 PARTS_GENERIC_64_128(log2, A)(A, S, F)
900 * Helper functions for softfloat-parts.c.inc, per-size operations.
903 #define FRAC_GENERIC_64_128(NAME, P) \
904 _Generic((P), FloatParts64 *: frac64_##NAME, \
905 FloatParts128 *: frac128_##NAME)
907 #define FRAC_GENERIC_64_128_256(NAME, P) \
908 _Generic((P), FloatParts64 *: frac64_##NAME, \
909 FloatParts128 *: frac128_##NAME, \
910 FloatParts256 *: frac256_##NAME)
912 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
914 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
917 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
920 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
921 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
925 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
928 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
929 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
930 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
931 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
935 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
937 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
939 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
942 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
944 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
945 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
948 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
950 static void frac64_allones(FloatParts64
*a
)
955 static void frac128_allones(FloatParts128
*a
)
957 a
->frac_hi
= a
->frac_lo
= -1;
960 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
962 static FloatRelation
frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
964 return (a
->frac
== b
->frac
? float_relation_equal
965 : a
->frac
< b
->frac
? float_relation_less
966 : float_relation_greater
);
969 static FloatRelation
frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
971 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
973 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
975 return float_relation_equal
;
978 return ta
< tb
? float_relation_less
: float_relation_greater
;
981 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
983 static void frac64_clear(FloatParts64
*a
)
988 static void frac128_clear(FloatParts128
*a
)
990 a
->frac_hi
= a
->frac_lo
= 0;
993 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
995 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
997 uint64_t n1
, n0
, r
, q
;
1001 * We want a 2*N / N-bit division to produce exactly an N-bit
1002 * result, so that we do not lose any precision and so that we
1003 * do not have to renormalize afterward. If A.frac < B.frac,
1004 * then division would produce an (N-1)-bit result; shift A left
1005 * by one to produce the an N-bit result, and return true to
1006 * decrement the exponent to match.
1008 * The udiv_qrnnd algorithm that we're using requires normalization,
1009 * i.e. the msb of the denominator must be set, which is already true.
1011 ret
= a
->frac
< b
->frac
;
1019 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1021 /* Set lsb if there is a remainder, to set inexact. */
1022 a
->frac
= q
| (r
!= 0);
1027 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1029 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1030 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1033 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1034 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1036 ret
= lt128(a0
, a1
, b0
, b1
);
1038 a1
= shr_double(a0
, a1
, 1);
1042 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1043 q0
= estimateDiv128To64(a0
, a1
, b0
);
1046 * Estimate is high because B1 was not included (unless B1 == 0).
1047 * Reduce quotient and increase remainder until remainder is non-negative.
1048 * This loop will execute 0 to 2 times.
1050 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1051 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1054 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1057 /* Repeat using the remainder, producing a second word of quotient. */
1058 q1
= estimateDiv128To64(r1
, r2
, b0
);
1059 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1060 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1063 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1066 /* Any remainder indicates inexact; set sticky bit. */
1067 q1
|= (r2
| r3
) != 0;
1074 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1076 static bool frac64_eqz(FloatParts64
*a
)
1078 return a
->frac
== 0;
1081 static bool frac128_eqz(FloatParts128
*a
)
1083 return (a
->frac_hi
| a
->frac_lo
) == 0;
1086 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1088 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1090 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1093 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1095 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1096 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1099 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1101 static void frac64_neg(FloatParts64
*a
)
1106 static void frac128_neg(FloatParts128
*a
)
1109 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1110 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1113 static void frac256_neg(FloatParts256
*a
)
1116 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1117 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1118 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1119 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1122 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1124 static int frac64_normalize(FloatParts64
*a
)
1127 int shift
= clz64(a
->frac
);
1134 static int frac128_normalize(FloatParts128
*a
)
1137 int shl
= clz64(a
->frac_hi
);
1138 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1141 } else if (a
->frac_lo
) {
1142 int shl
= clz64(a
->frac_lo
);
1143 a
->frac_hi
= a
->frac_lo
<< shl
;
1150 static int frac256_normalize(FloatParts256
*a
)
1152 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1153 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1165 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1168 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1171 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1174 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1184 a0
= shl_double(a0
, a1
, shl
);
1185 a1
= shl_double(a1
, a2
, shl
);
1186 a2
= shl_double(a2
, a3
, shl
);
1197 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1199 static void frac64_modrem(FloatParts64
*a
, FloatParts64
*b
, uint64_t *mod_quot
)
1201 uint64_t a0
, a1
, b0
, t0
, t1
, q
, quot
;
1202 int exp_diff
= a
->exp
- b
->exp
;
1208 if (exp_diff
< -1) {
1214 if (exp_diff
== -1) {
1220 quot
= q
= b0
<= a0
;
1226 while (exp_diff
> 0) {
1227 q
= estimateDiv128To64(a0
, a1
, b0
);
1228 q
= q
> 2 ? q
- 2 : 0;
1229 mul64To128(b0
, q
, &t0
, &t1
);
1230 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1231 shortShift128Left(a0
, a1
, 62, &a0
, &a1
);
1233 quot
= (quot
<< 62) + q
;
1238 q
= estimateDiv128To64(a0
, a1
, b0
);
1239 q
= q
> 2 ? (q
- 2) >> (64 - exp_diff
) : 0;
1240 mul64To128(b0
, q
<< (64 - exp_diff
), &t0
, &t1
);
1241 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1242 shortShift128Left(0, b0
, 64 - exp_diff
, &t0
, &t1
);
1243 while (le128(t0
, t1
, a0
, a1
)) {
1245 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1247 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1256 sub128(t0
, t1
, a0
, a1
, &t0
, &t1
);
1257 if (lt128(t0
, t1
, a0
, a1
) ||
1258 (eq128(t0
, t1
, a0
, a1
) && (q
& 1))) {
1267 shortShift128Left(a0
, a1
, shift
, &a0
, &a1
);
1268 } else if (likely(a1
)) {
1274 a
->cls
= float_class_zero
;
1278 a
->exp
= b
->exp
+ exp_diff
- shift
;
1279 a
->frac
= a0
| (a1
!= 0);
1282 static void frac128_modrem(FloatParts128
*a
, FloatParts128
*b
,
1285 uint64_t a0
, a1
, a2
, b0
, b1
, t0
, t1
, t2
, q
, quot
;
1286 int exp_diff
= a
->exp
- b
->exp
;
1293 if (exp_diff
< -1) {
1299 if (exp_diff
== -1) {
1300 shift128Right(a0
, a1
, 1, &a0
, &a1
);
1307 quot
= q
= le128(b0
, b1
, a0
, a1
);
1309 sub128(a0
, a1
, b0
, b1
, &a0
, &a1
);
1313 while (exp_diff
> 0) {
1314 q
= estimateDiv128To64(a0
, a1
, b0
);
1315 q
= q
> 4 ? q
- 4 : 0;
1316 mul128By64To192(b0
, b1
, q
, &t0
, &t1
, &t2
);
1317 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1318 shortShift192Left(a0
, a1
, a2
, 61, &a0
, &a1
, &a2
);
1320 quot
= (quot
<< 61) + q
;
1325 q
= estimateDiv128To64(a0
, a1
, b0
);
1326 q
= q
> 4 ? (q
- 4) >> (64 - exp_diff
) : 0;
1327 mul128By64To192(b0
, b1
, q
<< (64 - exp_diff
), &t0
, &t1
, &t2
);
1328 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1329 shortShift192Left(0, b0
, b1
, 64 - exp_diff
, &t0
, &t1
, &t2
);
1330 while (le192(t0
, t1
, t2
, a0
, a1
, a2
)) {
1332 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1334 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1344 sub192(t0
, t1
, t2
, a0
, a1
, a2
, &t0
, &t1
, &t2
);
1345 if (lt192(t0
, t1
, t2
, a0
, a1
, a2
) ||
1346 (eq192(t0
, t1
, t2
, a0
, a1
, a2
) && (q
& 1))) {
1356 shortShift192Left(a0
, a1
, a2
, shift
, &a0
, &a1
, &a2
);
1357 } else if (likely(a1
)) {
1359 shortShift128Left(a1
, a2
, shift
, &a0
, &a1
);
1362 } else if (likely(a2
)) {
1368 a
->cls
= float_class_zero
;
1372 a
->exp
= b
->exp
+ exp_diff
- shift
;
1374 a
->frac_lo
= a1
| (a2
!= 0);
1377 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1379 static void frac64_shl(FloatParts64
*a
, int c
)
1384 static void frac128_shl(FloatParts128
*a
, int c
)
1386 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1394 a0
= shl_double(a0
, a1
, c
);
1402 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1404 static void frac64_shr(FloatParts64
*a
, int c
)
1409 static void frac128_shr(FloatParts128
*a
, int c
)
1411 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1419 a1
= shr_double(a0
, a1
, c
);
1427 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1429 static void frac64_shrjam(FloatParts64
*a
, int c
)
1431 uint64_t a0
= a
->frac
;
1433 if (likely(c
!= 0)) {
1434 if (likely(c
< 64)) {
1435 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1443 static void frac128_shrjam(FloatParts128
*a
, int c
)
1445 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1446 uint64_t sticky
= 0;
1448 if (unlikely(c
== 0)) {
1450 } else if (likely(c
< 64)) {
1452 } else if (likely(c
< 128)) {
1466 sticky
|= shr_double(a1
, 0, c
);
1467 a1
= shr_double(a0
, a1
, c
);
1471 a
->frac_lo
= a1
| (sticky
!= 0);
1475 static void frac256_shrjam(FloatParts256
*a
, int c
)
1477 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1478 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1479 uint64_t sticky
= 0;
1481 if (unlikely(c
== 0)) {
1483 } else if (likely(c
< 64)) {
1485 } else if (likely(c
< 256)) {
1486 if (unlikely(c
& 128)) {
1488 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1490 if (unlikely(c
& 64)) {
1492 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1499 sticky
= a0
| a1
| a2
| a3
;
1500 a0
= a1
= a2
= a3
= 0;
1504 sticky
|= shr_double(a3
, 0, c
);
1505 a3
= shr_double(a2
, a3
, c
);
1506 a2
= shr_double(a1
, a2
, c
);
1507 a1
= shr_double(a0
, a1
, c
);
1511 a
->frac_lo
= a3
| (sticky
!= 0);
1517 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1519 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1521 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1524 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1527 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1528 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1532 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1535 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1536 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1537 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1538 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1542 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1544 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1546 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1549 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1551 r
->frac_hi
= a
->frac_hi
;
1552 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1555 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1557 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1559 r
->frac_hi
= a
->frac
;
1563 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1565 r
->frac_hi
= a
->frac_hi
;
1566 r
->frac_hm
= a
->frac_lo
;
1571 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1574 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1575 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1576 * and thus MIT licenced.
1578 static const uint16_t rsqrt_tab
[128] = {
1579 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1580 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1581 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1582 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1583 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1584 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1585 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1586 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1587 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1588 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1589 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1590 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1591 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1592 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1593 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1594 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1597 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1598 #define FloatPartsN glue(FloatParts,N)
1599 #define FloatPartsW glue(FloatParts,W)
1604 #include "softfloat-parts-addsub.c.inc"
1605 #include "softfloat-parts.c.inc"
1612 #include "softfloat-parts-addsub.c.inc"
1613 #include "softfloat-parts.c.inc"
1619 #include "softfloat-parts-addsub.c.inc"
1628 * Pack/unpack routines with a specific FloatFmt.
1631 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1632 float_status
*s
, const FloatFmt
*params
)
1634 float16_unpack_raw(p
, f
);
1635 parts_canonicalize(p
, s
, params
);
1638 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1641 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1644 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1647 bfloat16_unpack_raw(p
, f
);
1648 parts_canonicalize(p
, s
, &bfloat16_params
);
1651 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1653 const FloatFmt
*params
)
1655 parts_uncanon(p
, s
, params
);
1656 return float16_pack_raw(p
);
1659 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1662 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1665 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1668 parts_uncanon(p
, s
, &bfloat16_params
);
1669 return bfloat16_pack_raw(p
);
1672 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1675 float32_unpack_raw(p
, f
);
1676 parts_canonicalize(p
, s
, &float32_params
);
1679 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1682 parts_uncanon(p
, s
, &float32_params
);
1683 return float32_pack_raw(p
);
1686 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1689 float64_unpack_raw(p
, f
);
1690 parts_canonicalize(p
, s
, &float64_params
);
1693 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1696 parts_uncanon(p
, s
, &float64_params
);
1697 return float64_pack_raw(p
);
1700 static float64
float64r32_round_pack_canonical(FloatParts64
*p
,
1703 parts_uncanon(p
, s
, &float32_params
);
1706 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1707 * We need to adjust the fraction higher so that the least N bits are
1708 * zero, and the fraction is adjacent to the float64 implicit bit.
1711 case float_class_normal
:
1712 if (unlikely(p
->exp
== 0)) {
1714 * The result is denormal for float32, but can be represented
1715 * in normalized form for float64. Adjust, per canonicalize.
1717 int shift
= frac_normalize(p
);
1718 p
->exp
= (float32_params
.frac_shift
-
1719 float32_params
.exp_bias
- shift
+ 1 +
1720 float64_params
.exp_bias
);
1721 frac_shr(p
, float64_params
.frac_shift
);
1723 frac_shl(p
, float32_params
.frac_shift
- float64_params
.frac_shift
);
1724 p
->exp
+= float64_params
.exp_bias
- float32_params
.exp_bias
;
1727 case float_class_snan
:
1728 case float_class_qnan
:
1729 frac_shl(p
, float32_params
.frac_shift
- float64_params
.frac_shift
);
1730 p
->exp
= float64_params
.exp_max
;
1732 case float_class_inf
:
1733 p
->exp
= float64_params
.exp_max
;
1735 case float_class_zero
:
1738 g_assert_not_reached();
1741 return float64_pack_raw(p
);
1744 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1747 float128_unpack_raw(p
, f
);
1748 parts_canonicalize(p
, s
, &float128_params
);
1751 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1754 parts_uncanon(p
, s
, &float128_params
);
1755 return float128_pack_raw(p
);
1758 /* Returns false if the encoding is invalid. */
1759 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1762 /* Ensure rounding precision is set before beginning. */
1763 switch (s
->floatx80_rounding_precision
) {
1764 case floatx80_precision_x
:
1765 case floatx80_precision_d
:
1766 case floatx80_precision_s
:
1769 g_assert_not_reached();
1772 if (unlikely(floatx80_invalid_encoding(f
))) {
1773 float_raise(float_flag_invalid
, s
);
1777 floatx80_unpack_raw(p
, f
);
1779 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1780 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1782 /* The explicit integer bit is ignored, after invalid checks. */
1783 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1784 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1785 : parts_is_snan_frac(p
->frac_hi
, s
)
1786 ? float_class_snan
: float_class_qnan
);
1791 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1794 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1799 case float_class_normal
:
1800 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1801 parts_uncanon_normal(p
, s
, fmt
);
1809 frac_truncjam(&p64
, p
);
1810 parts_uncanon_normal(&p64
, s
, fmt
);
1814 if (exp
!= fmt
->exp_max
) {
1817 /* rounded to inf -- fall through to set frac correctly */
1819 case float_class_inf
:
1820 /* x86 and m68k differ in the setting of the integer bit. */
1821 frac
= floatx80_infinity_low
;
1825 case float_class_zero
:
1830 case float_class_snan
:
1831 case float_class_qnan
:
1832 /* NaNs have the integer bit set. */
1833 frac
= p
->frac_hi
| (1ull << 63);
1838 g_assert_not_reached();
1841 return packFloatx80(p
->sign
, exp
, frac
);
1845 * Addition and subtraction
1848 static float16 QEMU_FLATTEN
1849 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1851 FloatParts64 pa
, pb
, *pr
;
1853 float16_unpack_canonical(&pa
, a
, status
);
1854 float16_unpack_canonical(&pb
, b
, status
);
1855 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1857 return float16_round_pack_canonical(pr
, status
);
1860 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1862 return float16_addsub(a
, b
, status
, false);
1865 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1867 return float16_addsub(a
, b
, status
, true);
1870 static float32 QEMU_SOFTFLOAT_ATTR
1871 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1873 FloatParts64 pa
, pb
, *pr
;
1875 float32_unpack_canonical(&pa
, a
, status
);
1876 float32_unpack_canonical(&pb
, b
, status
);
1877 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1879 return float32_round_pack_canonical(pr
, status
);
1882 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1884 return soft_f32_addsub(a
, b
, status
, false);
1887 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1889 return soft_f32_addsub(a
, b
, status
, true);
1892 static float64 QEMU_SOFTFLOAT_ATTR
1893 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1895 FloatParts64 pa
, pb
, *pr
;
1897 float64_unpack_canonical(&pa
, a
, status
);
1898 float64_unpack_canonical(&pb
, b
, status
);
1899 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1901 return float64_round_pack_canonical(pr
, status
);
1904 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1906 return soft_f64_addsub(a
, b
, status
, false);
1909 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1911 return soft_f64_addsub(a
, b
, status
, true);
1914 static float hard_f32_add(float a
, float b
)
1919 static float hard_f32_sub(float a
, float b
)
1924 static double hard_f64_add(double a
, double b
)
1929 static double hard_f64_sub(double a
, double b
)
1934 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1936 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1937 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1939 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1942 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1944 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1945 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1947 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1951 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1952 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1954 return float32_gen2(a
, b
, s
, hard
, soft
,
1955 f32_is_zon2
, f32_addsubmul_post
);
1958 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1959 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1961 return float64_gen2(a
, b
, s
, hard
, soft
,
1962 f64_is_zon2
, f64_addsubmul_post
);
1965 float32 QEMU_FLATTEN
1966 float32_add(float32 a
, float32 b
, float_status
*s
)
1968 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1971 float32 QEMU_FLATTEN
1972 float32_sub(float32 a
, float32 b
, float_status
*s
)
1974 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1977 float64 QEMU_FLATTEN
1978 float64_add(float64 a
, float64 b
, float_status
*s
)
1980 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1983 float64 QEMU_FLATTEN
1984 float64_sub(float64 a
, float64 b
, float_status
*s
)
1986 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1989 static float64
float64r32_addsub(float64 a
, float64 b
, float_status
*status
,
1992 FloatParts64 pa
, pb
, *pr
;
1994 float64_unpack_canonical(&pa
, a
, status
);
1995 float64_unpack_canonical(&pb
, b
, status
);
1996 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1998 return float64r32_round_pack_canonical(pr
, status
);
2001 float64
float64r32_add(float64 a
, float64 b
, float_status
*status
)
2003 return float64r32_addsub(a
, b
, status
, false);
2006 float64
float64r32_sub(float64 a
, float64 b
, float_status
*status
)
2008 return float64r32_addsub(a
, b
, status
, true);
2011 static bfloat16 QEMU_FLATTEN
2012 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
2014 FloatParts64 pa
, pb
, *pr
;
2016 bfloat16_unpack_canonical(&pa
, a
, status
);
2017 bfloat16_unpack_canonical(&pb
, b
, status
);
2018 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2020 return bfloat16_round_pack_canonical(pr
, status
);
2023 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
2025 return bfloat16_addsub(a
, b
, status
, false);
2028 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
2030 return bfloat16_addsub(a
, b
, status
, true);
2033 static float128 QEMU_FLATTEN
2034 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
2036 FloatParts128 pa
, pb
, *pr
;
2038 float128_unpack_canonical(&pa
, a
, status
);
2039 float128_unpack_canonical(&pb
, b
, status
);
2040 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2042 return float128_round_pack_canonical(pr
, status
);
2045 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
2047 return float128_addsub(a
, b
, status
, false);
2050 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
2052 return float128_addsub(a
, b
, status
, true);
2055 static floatx80 QEMU_FLATTEN
2056 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
2058 FloatParts128 pa
, pb
, *pr
;
2060 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2061 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2062 return floatx80_default_nan(status
);
2065 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2066 return floatx80_round_pack_canonical(pr
, status
);
2069 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
2071 return floatx80_addsub(a
, b
, status
, false);
2074 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
2076 return floatx80_addsub(a
, b
, status
, true);
2083 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
2085 FloatParts64 pa
, pb
, *pr
;
2087 float16_unpack_canonical(&pa
, a
, status
);
2088 float16_unpack_canonical(&pb
, b
, status
);
2089 pr
= parts_mul(&pa
, &pb
, status
);
2091 return float16_round_pack_canonical(pr
, status
);
2094 static float32 QEMU_SOFTFLOAT_ATTR
2095 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
2097 FloatParts64 pa
, pb
, *pr
;
2099 float32_unpack_canonical(&pa
, a
, status
);
2100 float32_unpack_canonical(&pb
, b
, status
);
2101 pr
= parts_mul(&pa
, &pb
, status
);
2103 return float32_round_pack_canonical(pr
, status
);
2106 static float64 QEMU_SOFTFLOAT_ATTR
2107 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
2109 FloatParts64 pa
, pb
, *pr
;
2111 float64_unpack_canonical(&pa
, a
, status
);
2112 float64_unpack_canonical(&pb
, b
, status
);
2113 pr
= parts_mul(&pa
, &pb
, status
);
2115 return float64_round_pack_canonical(pr
, status
);
2118 static float hard_f32_mul(float a
, float b
)
2123 static double hard_f64_mul(double a
, double b
)
2128 float32 QEMU_FLATTEN
2129 float32_mul(float32 a
, float32 b
, float_status
*s
)
2131 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
2132 f32_is_zon2
, f32_addsubmul_post
);
2135 float64 QEMU_FLATTEN
2136 float64_mul(float64 a
, float64 b
, float_status
*s
)
2138 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
2139 f64_is_zon2
, f64_addsubmul_post
);
2142 float64
float64r32_mul(float64 a
, float64 b
, float_status
*status
)
2144 FloatParts64 pa
, pb
, *pr
;
2146 float64_unpack_canonical(&pa
, a
, status
);
2147 float64_unpack_canonical(&pb
, b
, status
);
2148 pr
= parts_mul(&pa
, &pb
, status
);
2150 return float64r32_round_pack_canonical(pr
, status
);
2153 bfloat16 QEMU_FLATTEN
2154 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
2156 FloatParts64 pa
, pb
, *pr
;
2158 bfloat16_unpack_canonical(&pa
, a
, status
);
2159 bfloat16_unpack_canonical(&pb
, b
, status
);
2160 pr
= parts_mul(&pa
, &pb
, status
);
2162 return bfloat16_round_pack_canonical(pr
, status
);
2165 float128 QEMU_FLATTEN
2166 float128_mul(float128 a
, float128 b
, float_status
*status
)
2168 FloatParts128 pa
, pb
, *pr
;
2170 float128_unpack_canonical(&pa
, a
, status
);
2171 float128_unpack_canonical(&pb
, b
, status
);
2172 pr
= parts_mul(&pa
, &pb
, status
);
2174 return float128_round_pack_canonical(pr
, status
);
2177 floatx80 QEMU_FLATTEN
2178 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
2180 FloatParts128 pa
, pb
, *pr
;
2182 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2183 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2184 return floatx80_default_nan(status
);
2187 pr
= parts_mul(&pa
, &pb
, status
);
2188 return floatx80_round_pack_canonical(pr
, status
);
2192 * Fused multiply-add
2195 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
2196 int flags
, float_status
*status
)
2198 FloatParts64 pa
, pb
, pc
, *pr
;
2200 float16_unpack_canonical(&pa
, a
, status
);
2201 float16_unpack_canonical(&pb
, b
, status
);
2202 float16_unpack_canonical(&pc
, c
, status
);
2203 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2205 return float16_round_pack_canonical(pr
, status
);
2208 static float32 QEMU_SOFTFLOAT_ATTR
2209 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
2210 float_status
*status
)
2212 FloatParts64 pa
, pb
, pc
, *pr
;
2214 float32_unpack_canonical(&pa
, a
, status
);
2215 float32_unpack_canonical(&pb
, b
, status
);
2216 float32_unpack_canonical(&pc
, c
, status
);
2217 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2219 return float32_round_pack_canonical(pr
, status
);
2222 static float64 QEMU_SOFTFLOAT_ATTR
2223 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
2224 float_status
*status
)
2226 FloatParts64 pa
, pb
, pc
, *pr
;
2228 float64_unpack_canonical(&pa
, a
, status
);
2229 float64_unpack_canonical(&pb
, b
, status
);
2230 float64_unpack_canonical(&pc
, c
, status
);
2231 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2233 return float64_round_pack_canonical(pr
, status
);
2236 static bool force_soft_fma
;
2238 float32 QEMU_FLATTEN
2239 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2241 union_float32 ua
, ub
, uc
, ur
;
2247 if (unlikely(!can_use_fpu(s
))) {
2250 if (unlikely(flags
& float_muladd_halve_result
)) {
2254 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2255 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2259 if (unlikely(force_soft_fma
)) {
2264 * When (a || b) == 0, there's no need to check for under/over flow,
2265 * since we know the addend is (normal || 0) and the product is 0.
2267 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2271 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2272 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2273 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2275 if (flags
& float_muladd_negate_c
) {
2280 union_float32 ua_orig
= ua
;
2281 union_float32 uc_orig
= uc
;
2283 if (flags
& float_muladd_negate_product
) {
2286 if (flags
& float_muladd_negate_c
) {
2290 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2292 if (unlikely(f32_is_inf(ur
))) {
2293 float_raise(float_flag_overflow
, s
);
2294 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2300 if (flags
& float_muladd_negate_result
) {
2301 return float32_chs(ur
.s
);
2306 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2309 float64 QEMU_FLATTEN
2310 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2312 union_float64 ua
, ub
, uc
, ur
;
2318 if (unlikely(!can_use_fpu(s
))) {
2321 if (unlikely(flags
& float_muladd_halve_result
)) {
2325 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2326 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2330 if (unlikely(force_soft_fma
)) {
2335 * When (a || b) == 0, there's no need to check for under/over flow,
2336 * since we know the addend is (normal || 0) and the product is 0.
2338 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2342 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2343 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2344 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2346 if (flags
& float_muladd_negate_c
) {
2351 union_float64 ua_orig
= ua
;
2352 union_float64 uc_orig
= uc
;
2354 if (flags
& float_muladd_negate_product
) {
2357 if (flags
& float_muladd_negate_c
) {
2361 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2363 if (unlikely(f64_is_inf(ur
))) {
2364 float_raise(float_flag_overflow
, s
);
2365 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2371 if (flags
& float_muladd_negate_result
) {
2372 return float64_chs(ur
.s
);
2377 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2380 float64
float64r32_muladd(float64 a
, float64 b
, float64 c
,
2381 int flags
, float_status
*status
)
2383 FloatParts64 pa
, pb
, pc
, *pr
;
2385 float64_unpack_canonical(&pa
, a
, status
);
2386 float64_unpack_canonical(&pb
, b
, status
);
2387 float64_unpack_canonical(&pc
, c
, status
);
2388 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2390 return float64r32_round_pack_canonical(pr
, status
);
2393 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2394 int flags
, float_status
*status
)
2396 FloatParts64 pa
, pb
, pc
, *pr
;
2398 bfloat16_unpack_canonical(&pa
, a
, status
);
2399 bfloat16_unpack_canonical(&pb
, b
, status
);
2400 bfloat16_unpack_canonical(&pc
, c
, status
);
2401 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2403 return bfloat16_round_pack_canonical(pr
, status
);
2406 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2407 int flags
, float_status
*status
)
2409 FloatParts128 pa
, pb
, pc
, *pr
;
2411 float128_unpack_canonical(&pa
, a
, status
);
2412 float128_unpack_canonical(&pb
, b
, status
);
2413 float128_unpack_canonical(&pc
, c
, status
);
2414 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2416 return float128_round_pack_canonical(pr
, status
);
2423 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2425 FloatParts64 pa
, pb
, *pr
;
2427 float16_unpack_canonical(&pa
, a
, status
);
2428 float16_unpack_canonical(&pb
, b
, status
);
2429 pr
= parts_div(&pa
, &pb
, status
);
2431 return float16_round_pack_canonical(pr
, status
);
2434 static float32 QEMU_SOFTFLOAT_ATTR
2435 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2437 FloatParts64 pa
, pb
, *pr
;
2439 float32_unpack_canonical(&pa
, a
, status
);
2440 float32_unpack_canonical(&pb
, b
, status
);
2441 pr
= parts_div(&pa
, &pb
, status
);
2443 return float32_round_pack_canonical(pr
, status
);
2446 static float64 QEMU_SOFTFLOAT_ATTR
2447 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2449 FloatParts64 pa
, pb
, *pr
;
2451 float64_unpack_canonical(&pa
, a
, status
);
2452 float64_unpack_canonical(&pb
, b
, status
);
2453 pr
= parts_div(&pa
, &pb
, status
);
2455 return float64_round_pack_canonical(pr
, status
);
2458 static float hard_f32_div(float a
, float b
)
2463 static double hard_f64_div(double a
, double b
)
2468 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2470 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2471 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2472 fpclassify(b
.h
) == FP_NORMAL
;
2474 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2477 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2479 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2480 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2481 fpclassify(b
.h
) == FP_NORMAL
;
2483 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2486 static bool f32_div_post(union_float32 a
, union_float32 b
)
2488 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2489 return fpclassify(a
.h
) != FP_ZERO
;
2491 return !float32_is_zero(a
.s
);
2494 static bool f64_div_post(union_float64 a
, union_float64 b
)
2496 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2497 return fpclassify(a
.h
) != FP_ZERO
;
2499 return !float64_is_zero(a
.s
);
2502 float32 QEMU_FLATTEN
2503 float32_div(float32 a
, float32 b
, float_status
*s
)
2505 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2506 f32_div_pre
, f32_div_post
);
2509 float64 QEMU_FLATTEN
2510 float64_div(float64 a
, float64 b
, float_status
*s
)
2512 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2513 f64_div_pre
, f64_div_post
);
2516 float64
float64r32_div(float64 a
, float64 b
, float_status
*status
)
2518 FloatParts64 pa
, pb
, *pr
;
2520 float64_unpack_canonical(&pa
, a
, status
);
2521 float64_unpack_canonical(&pb
, b
, status
);
2522 pr
= parts_div(&pa
, &pb
, status
);
2524 return float64r32_round_pack_canonical(pr
, status
);
2527 bfloat16 QEMU_FLATTEN
2528 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2530 FloatParts64 pa
, pb
, *pr
;
2532 bfloat16_unpack_canonical(&pa
, a
, status
);
2533 bfloat16_unpack_canonical(&pb
, b
, status
);
2534 pr
= parts_div(&pa
, &pb
, status
);
2536 return bfloat16_round_pack_canonical(pr
, status
);
2539 float128 QEMU_FLATTEN
2540 float128_div(float128 a
, float128 b
, float_status
*status
)
2542 FloatParts128 pa
, pb
, *pr
;
2544 float128_unpack_canonical(&pa
, a
, status
);
2545 float128_unpack_canonical(&pb
, b
, status
);
2546 pr
= parts_div(&pa
, &pb
, status
);
2548 return float128_round_pack_canonical(pr
, status
);
2551 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
2553 FloatParts128 pa
, pb
, *pr
;
2555 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2556 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2557 return floatx80_default_nan(status
);
2560 pr
= parts_div(&pa
, &pb
, status
);
2561 return floatx80_round_pack_canonical(pr
, status
);
2568 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
2570 FloatParts64 pa
, pb
, *pr
;
2572 float32_unpack_canonical(&pa
, a
, status
);
2573 float32_unpack_canonical(&pb
, b
, status
);
2574 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2576 return float32_round_pack_canonical(pr
, status
);
2579 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
2581 FloatParts64 pa
, pb
, *pr
;
2583 float64_unpack_canonical(&pa
, a
, status
);
2584 float64_unpack_canonical(&pb
, b
, status
);
2585 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2587 return float64_round_pack_canonical(pr
, status
);
2590 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
2592 FloatParts128 pa
, pb
, *pr
;
2594 float128_unpack_canonical(&pa
, a
, status
);
2595 float128_unpack_canonical(&pb
, b
, status
);
2596 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2598 return float128_round_pack_canonical(pr
, status
);
2602 * Returns the remainder of the extended double-precision floating-point value
2603 * `a' with respect to the corresponding value `b'.
2604 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2605 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2606 * the remainder based on truncating the quotient toward zero instead and
2607 * *quotient is set to the low 64 bits of the absolute value of the integer
2610 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
,
2611 uint64_t *quotient
, float_status
*status
)
2613 FloatParts128 pa
, pb
, *pr
;
2616 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2617 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2618 return floatx80_default_nan(status
);
2620 pr
= parts_modrem(&pa
, &pb
, mod
? quotient
: NULL
, status
);
2622 return floatx80_round_pack_canonical(pr
, status
);
2625 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
2628 return floatx80_modrem(a
, b
, false, "ient
, status
);
2631 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
2634 return floatx80_modrem(a
, b
, true, "ient
, status
);
2638 * Float to Float conversions
2640 * Returns the result of converting one float format to another. The
2641 * conversion is performed according to the IEC/IEEE Standard for
2642 * Binary Floating-Point Arithmetic.
2644 * Usually this only needs to take care of raising invalid exceptions
2645 * and handling the conversion on NaNs.
2648 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2651 case float_class_snan
:
2652 float_raise(float_flag_invalid_snan
, s
);
2654 case float_class_qnan
:
2656 * There is no NaN in the destination format. Raise Invalid
2657 * and return a zero with the sign of the input NaN.
2659 float_raise(float_flag_invalid
, s
);
2660 a
->cls
= float_class_zero
;
2663 case float_class_inf
:
2665 * There is no Inf in the destination format. Raise Invalid
2666 * and return the maximum normal with the correct sign.
2668 float_raise(float_flag_invalid
, s
);
2669 a
->cls
= float_class_normal
;
2670 a
->exp
= float16_params_ahp
.exp_max
;
2671 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2672 float16_params_ahp
.frac_size
+ 1);
2675 case float_class_normal
:
2676 case float_class_zero
:
2680 g_assert_not_reached();
2684 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2686 if (is_nan(a
->cls
)) {
2687 parts_return_nan(a
, s
);
2691 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2693 if (is_nan(a
->cls
)) {
2694 parts_return_nan(a
, s
);
2698 #define parts_float_to_float(P, S) \
2699 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2701 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2708 if (a
->cls
== float_class_normal
) {
2709 frac_truncjam(a
, b
);
2710 } else if (is_nan(a
->cls
)) {
2711 /* Discard the low bits of the NaN. */
2712 a
->frac
= b
->frac_hi
;
2713 parts_return_nan(a
, s
);
2717 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2725 if (is_nan(a
->cls
)) {
2726 parts_return_nan(a
, s
);
2730 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2732 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2735 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2736 parts_float_to_float(&p
, s
);
2737 return float32_round_pack_canonical(&p
, s
);
2740 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2742 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2745 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2746 parts_float_to_float(&p
, s
);
2747 return float64_round_pack_canonical(&p
, s
);
2750 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2753 const FloatFmt
*fmt
;
2755 float32_unpack_canonical(&p
, a
, s
);
2757 parts_float_to_float(&p
, s
);
2758 fmt
= &float16_params
;
2760 parts_float_to_ahp(&p
, s
);
2761 fmt
= &float16_params_ahp
;
2763 return float16a_round_pack_canonical(&p
, s
, fmt
);
2766 static float64 QEMU_SOFTFLOAT_ATTR
2767 soft_float32_to_float64(float32 a
, float_status
*s
)
2771 float32_unpack_canonical(&p
, a
, s
);
2772 parts_float_to_float(&p
, s
);
2773 return float64_round_pack_canonical(&p
, s
);
2776 float64
float32_to_float64(float32 a
, float_status
*s
)
2778 if (likely(float32_is_normal(a
))) {
2779 /* Widening conversion can never produce inexact results. */
2785 } else if (float32_is_zero(a
)) {
2786 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2788 return soft_float32_to_float64(a
, s
);
2792 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2795 const FloatFmt
*fmt
;
2797 float64_unpack_canonical(&p
, a
, s
);
2799 parts_float_to_float(&p
, s
);
2800 fmt
= &float16_params
;
2802 parts_float_to_ahp(&p
, s
);
2803 fmt
= &float16_params_ahp
;
2805 return float16a_round_pack_canonical(&p
, s
, fmt
);
2808 float32
float64_to_float32(float64 a
, float_status
*s
)
2812 float64_unpack_canonical(&p
, a
, s
);
2813 parts_float_to_float(&p
, s
);
2814 return float32_round_pack_canonical(&p
, s
);
2817 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2821 bfloat16_unpack_canonical(&p
, a
, s
);
2822 parts_float_to_float(&p
, s
);
2823 return float32_round_pack_canonical(&p
, s
);
2826 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2830 bfloat16_unpack_canonical(&p
, a
, s
);
2831 parts_float_to_float(&p
, s
);
2832 return float64_round_pack_canonical(&p
, s
);
2835 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2839 float32_unpack_canonical(&p
, a
, s
);
2840 parts_float_to_float(&p
, s
);
2841 return bfloat16_round_pack_canonical(&p
, s
);
2844 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2848 float64_unpack_canonical(&p
, a
, s
);
2849 parts_float_to_float(&p
, s
);
2850 return bfloat16_round_pack_canonical(&p
, s
);
2853 float32
float128_to_float32(float128 a
, float_status
*s
)
2858 float128_unpack_canonical(&p128
, a
, s
);
2859 parts_float_to_float_narrow(&p64
, &p128
, s
);
2860 return float32_round_pack_canonical(&p64
, s
);
2863 float64
float128_to_float64(float128 a
, float_status
*s
)
2868 float128_unpack_canonical(&p128
, a
, s
);
2869 parts_float_to_float_narrow(&p64
, &p128
, s
);
2870 return float64_round_pack_canonical(&p64
, s
);
2873 float128
float32_to_float128(float32 a
, float_status
*s
)
2878 float32_unpack_canonical(&p64
, a
, s
);
2879 parts_float_to_float_widen(&p128
, &p64
, s
);
2880 return float128_round_pack_canonical(&p128
, s
);
2883 float128
float64_to_float128(float64 a
, float_status
*s
)
2888 float64_unpack_canonical(&p64
, a
, s
);
2889 parts_float_to_float_widen(&p128
, &p64
, s
);
2890 return float128_round_pack_canonical(&p128
, s
);
2893 float32
floatx80_to_float32(floatx80 a
, float_status
*s
)
2898 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2899 parts_float_to_float_narrow(&p64
, &p128
, s
);
2901 parts_default_nan(&p64
, s
);
2903 return float32_round_pack_canonical(&p64
, s
);
2906 float64
floatx80_to_float64(floatx80 a
, float_status
*s
)
2911 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2912 parts_float_to_float_narrow(&p64
, &p128
, s
);
2914 parts_default_nan(&p64
, s
);
2916 return float64_round_pack_canonical(&p64
, s
);
2919 float128
floatx80_to_float128(floatx80 a
, float_status
*s
)
2923 if (floatx80_unpack_canonical(&p
, a
, s
)) {
2924 parts_float_to_float(&p
, s
);
2926 parts_default_nan(&p
, s
);
2928 return float128_round_pack_canonical(&p
, s
);
2931 floatx80
float32_to_floatx80(float32 a
, float_status
*s
)
2936 float32_unpack_canonical(&p64
, a
, s
);
2937 parts_float_to_float_widen(&p128
, &p64
, s
);
2938 return floatx80_round_pack_canonical(&p128
, s
);
2941 floatx80
float64_to_floatx80(float64 a
, float_status
*s
)
2946 float64_unpack_canonical(&p64
, a
, s
);
2947 parts_float_to_float_widen(&p128
, &p64
, s
);
2948 return floatx80_round_pack_canonical(&p128
, s
);
2951 floatx80
float128_to_floatx80(float128 a
, float_status
*s
)
2955 float128_unpack_canonical(&p
, a
, s
);
2956 parts_float_to_float(&p
, s
);
2957 return floatx80_round_pack_canonical(&p
, s
);
2961 * Round to integral value
2964 float16
float16_round_to_int(float16 a
, float_status
*s
)
2968 float16_unpack_canonical(&p
, a
, s
);
2969 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2970 return float16_round_pack_canonical(&p
, s
);
2973 float32
float32_round_to_int(float32 a
, float_status
*s
)
2977 float32_unpack_canonical(&p
, a
, s
);
2978 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2979 return float32_round_pack_canonical(&p
, s
);
2982 float64
float64_round_to_int(float64 a
, float_status
*s
)
2986 float64_unpack_canonical(&p
, a
, s
);
2987 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2988 return float64_round_pack_canonical(&p
, s
);
2991 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2995 bfloat16_unpack_canonical(&p
, a
, s
);
2996 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2997 return bfloat16_round_pack_canonical(&p
, s
);
3000 float128
float128_round_to_int(float128 a
, float_status
*s
)
3004 float128_unpack_canonical(&p
, a
, s
);
3005 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
3006 return float128_round_pack_canonical(&p
, s
);
3009 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
3013 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
3014 return floatx80_default_nan(status
);
3017 parts_round_to_int(&p
, status
->float_rounding_mode
, 0, status
,
3018 &floatx80_params
[status
->floatx80_rounding_precision
]);
3019 return floatx80_round_pack_canonical(&p
, status
);
3023 * Floating-point to signed integer conversions
3026 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3031 float16_unpack_canonical(&p
, a
, s
);
3032 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
3035 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3040 float16_unpack_canonical(&p
, a
, s
);
3041 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3044 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3049 float16_unpack_canonical(&p
, a
, s
);
3050 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3053 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3058 float16_unpack_canonical(&p
, a
, s
);
3059 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3062 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3067 float32_unpack_canonical(&p
, a
, s
);
3068 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3071 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3076 float32_unpack_canonical(&p
, a
, s
);
3077 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3080 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3085 float32_unpack_canonical(&p
, a
, s
);
3086 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3089 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3094 float64_unpack_canonical(&p
, a
, s
);
3095 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3098 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3103 float64_unpack_canonical(&p
, a
, s
);
3104 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3107 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3112 float64_unpack_canonical(&p
, a
, s
);
3113 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3116 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3121 bfloat16_unpack_canonical(&p
, a
, s
);
3122 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3125 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3130 bfloat16_unpack_canonical(&p
, a
, s
);
3131 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3134 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3139 bfloat16_unpack_canonical(&p
, a
, s
);
3140 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3143 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
3144 int scale
, float_status
*s
)
3148 float128_unpack_canonical(&p
, a
, s
);
3149 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3152 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
3153 int scale
, float_status
*s
)
3157 float128_unpack_canonical(&p
, a
, s
);
3158 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3161 static Int128
float128_to_int128_scalbn(float128 a
, FloatRoundMode rmode
,
3162 int scale
, float_status
*s
)
3168 float128_unpack_canonical(&p
, a
, s
);
3171 case float_class_snan
:
3172 flags
|= float_flag_invalid_snan
;
3174 case float_class_qnan
:
3175 flags
|= float_flag_invalid
;
3179 case float_class_inf
:
3180 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3181 r
= p
.sign
? INT128_MIN
: INT128_MAX
;
3184 case float_class_zero
:
3185 return int128_zero();
3187 case float_class_normal
:
3188 if (parts_round_to_int_normal(&p
, rmode
, scale
, 128 - 2)) {
3189 flags
= float_flag_inexact
;
3193 int shift
= 127 - p
.exp
;
3194 r
= int128_urshift(int128_make128(p
.frac_lo
, p
.frac_hi
), shift
);
3198 } else if (p
.exp
== 127 && p
.sign
&& p
.frac_lo
== 0 &&
3199 p
.frac_hi
== DECOMPOSED_IMPLICIT_BIT
) {
3202 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3203 r
= p
.sign
? INT128_MIN
: INT128_MAX
;
3208 g_assert_not_reached();
3211 float_raise(flags
, s
);
3215 static int32_t floatx80_to_int32_scalbn(floatx80 a
, FloatRoundMode rmode
,
3216 int scale
, float_status
*s
)
3220 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3221 parts_default_nan(&p
, s
);
3223 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3226 static int64_t floatx80_to_int64_scalbn(floatx80 a
, FloatRoundMode rmode
,
3227 int scale
, float_status
*s
)
3231 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3232 parts_default_nan(&p
, s
);
3234 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3237 int8_t float16_to_int8(float16 a
, float_status
*s
)
3239 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3242 int16_t float16_to_int16(float16 a
, float_status
*s
)
3244 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3247 int32_t float16_to_int32(float16 a
, float_status
*s
)
3249 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3252 int64_t float16_to_int64(float16 a
, float_status
*s
)
3254 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3257 int16_t float32_to_int16(float32 a
, float_status
*s
)
3259 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3262 int32_t float32_to_int32(float32 a
, float_status
*s
)
3264 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3267 int64_t float32_to_int64(float32 a
, float_status
*s
)
3269 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3272 int16_t float64_to_int16(float64 a
, float_status
*s
)
3274 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3277 int32_t float64_to_int32(float64 a
, float_status
*s
)
3279 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3282 int64_t float64_to_int64(float64 a
, float_status
*s
)
3284 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3287 int32_t float128_to_int32(float128 a
, float_status
*s
)
3289 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3292 int64_t float128_to_int64(float128 a
, float_status
*s
)
3294 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3297 Int128
float128_to_int128(float128 a
, float_status
*s
)
3299 return float128_to_int128_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3302 int32_t floatx80_to_int32(floatx80 a
, float_status
*s
)
3304 return floatx80_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3307 int64_t floatx80_to_int64(floatx80 a
, float_status
*s
)
3309 return floatx80_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3312 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
3314 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3317 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
3319 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3322 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
3324 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3327 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
3329 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3332 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
3334 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3337 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
3339 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3342 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
3344 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3347 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
3349 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3352 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
3354 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3357 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
3359 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3362 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
3364 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3367 Int128
float128_to_int128_round_to_zero(float128 a
, float_status
*s
)
3369 return float128_to_int128_scalbn(a
, float_round_to_zero
, 0, s
);
3372 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*s
)
3374 return floatx80_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3377 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*s
)
3379 return floatx80_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3382 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
3384 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3387 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
3389 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3392 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
3394 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3397 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
3399 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3402 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
3404 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3407 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
3409 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3413 * Floating-point to unsigned integer conversions
3416 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3421 float16_unpack_canonical(&p
, a
, s
);
3422 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
3425 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3430 float16_unpack_canonical(&p
, a
, s
);
3431 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3434 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3439 float16_unpack_canonical(&p
, a
, s
);
3440 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3443 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3448 float16_unpack_canonical(&p
, a
, s
);
3449 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3452 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3457 float32_unpack_canonical(&p
, a
, s
);
3458 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3461 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3466 float32_unpack_canonical(&p
, a
, s
);
3467 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3470 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3475 float32_unpack_canonical(&p
, a
, s
);
3476 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3479 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3484 float64_unpack_canonical(&p
, a
, s
);
3485 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3488 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3493 float64_unpack_canonical(&p
, a
, s
);
3494 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3497 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3502 float64_unpack_canonical(&p
, a
, s
);
3503 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3506 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3507 int scale
, float_status
*s
)
3511 bfloat16_unpack_canonical(&p
, a
, s
);
3512 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3515 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3516 int scale
, float_status
*s
)
3520 bfloat16_unpack_canonical(&p
, a
, s
);
3521 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3524 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3525 int scale
, float_status
*s
)
3529 bfloat16_unpack_canonical(&p
, a
, s
);
3530 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3533 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3534 int scale
, float_status
*s
)
3538 float128_unpack_canonical(&p
, a
, s
);
3539 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3542 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3543 int scale
, float_status
*s
)
3547 float128_unpack_canonical(&p
, a
, s
);
3548 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3551 static Int128
float128_to_uint128_scalbn(float128 a
, FloatRoundMode rmode
,
3552 int scale
, float_status
*s
)
3558 float128_unpack_canonical(&p
, a
, s
);
3561 case float_class_snan
:
3562 flags
|= float_flag_invalid_snan
;
3564 case float_class_qnan
:
3565 flags
|= float_flag_invalid
;
3569 case float_class_inf
:
3570 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3571 r
= p
.sign
? int128_zero() : UINT128_MAX
;
3574 case float_class_zero
:
3575 return int128_zero();
3577 case float_class_normal
:
3578 if (parts_round_to_int_normal(&p
, rmode
, scale
, 128 - 2)) {
3579 flags
= float_flag_inexact
;
3580 if (p
.cls
== float_class_zero
) {
3587 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3589 } else if (p
.exp
<= 127) {
3590 int shift
= 127 - p
.exp
;
3591 r
= int128_urshift(int128_make128(p
.frac_lo
, p
.frac_hi
), shift
);
3593 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3599 g_assert_not_reached();
3602 float_raise(flags
, s
);
3606 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3608 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3611 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3613 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3616 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3618 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3621 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3623 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3626 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3628 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3631 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3633 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3636 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3638 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3641 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3643 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3646 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3648 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3651 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3653 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3656 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3658 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3661 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3663 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3666 Int128
float128_to_uint128(float128 a
, float_status
*s
)
3668 return float128_to_uint128_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3671 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3673 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3676 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3678 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3681 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3683 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3686 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3688 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3691 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3693 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3696 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3698 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3701 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3703 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3706 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3708 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3711 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3713 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3716 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3718 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3721 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3723 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3726 Int128
float128_to_uint128_round_to_zero(float128 a
, float_status
*s
)
3728 return float128_to_uint128_scalbn(a
, float_round_to_zero
, 0, s
);
3731 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3733 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3736 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3738 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3741 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3743 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3746 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3748 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3751 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3753 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3756 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3758 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3762 * Signed integer to floating-point conversions
3765 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3769 parts_sint_to_float(&p
, a
, scale
, status
);
3770 return float16_round_pack_canonical(&p
, status
);
3773 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3775 return int64_to_float16_scalbn(a
, scale
, status
);
3778 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3780 return int64_to_float16_scalbn(a
, scale
, status
);
3783 float16
int64_to_float16(int64_t a
, float_status
*status
)
3785 return int64_to_float16_scalbn(a
, 0, status
);
3788 float16
int32_to_float16(int32_t a
, float_status
*status
)
3790 return int64_to_float16_scalbn(a
, 0, status
);
3793 float16
int16_to_float16(int16_t a
, float_status
*status
)
3795 return int64_to_float16_scalbn(a
, 0, status
);
3798 float16
int8_to_float16(int8_t a
, float_status
*status
)
3800 return int64_to_float16_scalbn(a
, 0, status
);
3803 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3807 /* Without scaling, there are no overflow concerns. */
3808 if (likely(scale
== 0) && can_use_fpu(status
)) {
3814 parts64_sint_to_float(&p
, a
, scale
, status
);
3815 return float32_round_pack_canonical(&p
, status
);
3818 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3820 return int64_to_float32_scalbn(a
, scale
, status
);
3823 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3825 return int64_to_float32_scalbn(a
, scale
, status
);
3828 float32
int64_to_float32(int64_t a
, float_status
*status
)
3830 return int64_to_float32_scalbn(a
, 0, status
);
3833 float32
int32_to_float32(int32_t a
, float_status
*status
)
3835 return int64_to_float32_scalbn(a
, 0, status
);
3838 float32
int16_to_float32(int16_t a
, float_status
*status
)
3840 return int64_to_float32_scalbn(a
, 0, status
);
3843 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3847 /* Without scaling, there are no overflow concerns. */
3848 if (likely(scale
== 0) && can_use_fpu(status
)) {
3854 parts_sint_to_float(&p
, a
, scale
, status
);
3855 return float64_round_pack_canonical(&p
, status
);
3858 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3860 return int64_to_float64_scalbn(a
, scale
, status
);
3863 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3865 return int64_to_float64_scalbn(a
, scale
, status
);
3868 float64
int64_to_float64(int64_t a
, float_status
*status
)
3870 return int64_to_float64_scalbn(a
, 0, status
);
3873 float64
int32_to_float64(int32_t a
, float_status
*status
)
3875 return int64_to_float64_scalbn(a
, 0, status
);
3878 float64
int16_to_float64(int16_t a
, float_status
*status
)
3880 return int64_to_float64_scalbn(a
, 0, status
);
3883 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3887 parts_sint_to_float(&p
, a
, scale
, status
);
3888 return bfloat16_round_pack_canonical(&p
, status
);
3891 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3893 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3896 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3898 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3901 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3903 return int64_to_bfloat16_scalbn(a
, 0, status
);
3906 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3908 return int64_to_bfloat16_scalbn(a
, 0, status
);
3911 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3913 return int64_to_bfloat16_scalbn(a
, 0, status
);
3916 float128
int128_to_float128(Int128 a
, float_status
*status
)
3918 FloatParts128 p
= { };
3922 p
.cls
= float_class_normal
;
3923 if (!int128_nonneg(a
)) {
3928 shift
= clz64(int128_gethi(a
));
3930 shift
+= clz64(int128_getlo(a
));
3933 p
.exp
= 127 - shift
;
3934 a
= int128_lshift(a
, shift
);
3936 p
.frac_hi
= int128_gethi(a
);
3937 p
.frac_lo
= int128_getlo(a
);
3939 p
.cls
= float_class_zero
;
3942 return float128_round_pack_canonical(&p
, status
);
3945 float128
int64_to_float128(int64_t a
, float_status
*status
)
3949 parts_sint_to_float(&p
, a
, 0, status
);
3950 return float128_round_pack_canonical(&p
, status
);
3953 float128
int32_to_float128(int32_t a
, float_status
*status
)
3955 return int64_to_float128(a
, status
);
3958 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
3962 parts_sint_to_float(&p
, a
, 0, status
);
3963 return floatx80_round_pack_canonical(&p
, status
);
3966 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
3968 return int64_to_floatx80(a
, status
);
3972 * Unsigned Integer to floating-point conversions
3975 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3979 parts_uint_to_float(&p
, a
, scale
, status
);
3980 return float16_round_pack_canonical(&p
, status
);
3983 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3985 return uint64_to_float16_scalbn(a
, scale
, status
);
3988 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3990 return uint64_to_float16_scalbn(a
, scale
, status
);
3993 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3995 return uint64_to_float16_scalbn(a
, 0, status
);
3998 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
4000 return uint64_to_float16_scalbn(a
, 0, status
);
4003 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
4005 return uint64_to_float16_scalbn(a
, 0, status
);
4008 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
4010 return uint64_to_float16_scalbn(a
, 0, status
);
4013 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
4017 /* Without scaling, there are no overflow concerns. */
4018 if (likely(scale
== 0) && can_use_fpu(status
)) {
4024 parts_uint_to_float(&p
, a
, scale
, status
);
4025 return float32_round_pack_canonical(&p
, status
);
4028 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
4030 return uint64_to_float32_scalbn(a
, scale
, status
);
4033 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
4035 return uint64_to_float32_scalbn(a
, scale
, status
);
4038 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
4040 return uint64_to_float32_scalbn(a
, 0, status
);
4043 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
4045 return uint64_to_float32_scalbn(a
, 0, status
);
4048 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
4050 return uint64_to_float32_scalbn(a
, 0, status
);
4053 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
4057 /* Without scaling, there are no overflow concerns. */
4058 if (likely(scale
== 0) && can_use_fpu(status
)) {
4064 parts_uint_to_float(&p
, a
, scale
, status
);
4065 return float64_round_pack_canonical(&p
, status
);
4068 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
4070 return uint64_to_float64_scalbn(a
, scale
, status
);
4073 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
4075 return uint64_to_float64_scalbn(a
, scale
, status
);
4078 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
4080 return uint64_to_float64_scalbn(a
, 0, status
);
4083 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
4085 return uint64_to_float64_scalbn(a
, 0, status
);
4088 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
4090 return uint64_to_float64_scalbn(a
, 0, status
);
4093 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
4097 parts_uint_to_float(&p
, a
, scale
, status
);
4098 return bfloat16_round_pack_canonical(&p
, status
);
4101 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
4103 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
4106 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
4108 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
4111 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
4113 return uint64_to_bfloat16_scalbn(a
, 0, status
);
4116 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
4118 return uint64_to_bfloat16_scalbn(a
, 0, status
);
4121 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
4123 return uint64_to_bfloat16_scalbn(a
, 0, status
);
4126 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
4130 parts_uint_to_float(&p
, a
, 0, status
);
4131 return float128_round_pack_canonical(&p
, status
);
4134 float128
uint128_to_float128(Int128 a
, float_status
*status
)
4136 FloatParts128 p
= { };
4140 p
.cls
= float_class_normal
;
4142 shift
= clz64(int128_gethi(a
));
4144 shift
+= clz64(int128_getlo(a
));
4147 p
.exp
= 127 - shift
;
4148 a
= int128_lshift(a
, shift
);
4150 p
.frac_hi
= int128_gethi(a
);
4151 p
.frac_lo
= int128_getlo(a
);
4153 p
.cls
= float_class_zero
;
4156 return float128_round_pack_canonical(&p
, status
);
4160 * Minimum and maximum
4163 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
4165 FloatParts64 pa
, pb
, *pr
;
4167 float16_unpack_canonical(&pa
, a
, s
);
4168 float16_unpack_canonical(&pb
, b
, s
);
4169 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4171 return float16_round_pack_canonical(pr
, s
);
4174 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
4175 float_status
*s
, int flags
)
4177 FloatParts64 pa
, pb
, *pr
;
4179 bfloat16_unpack_canonical(&pa
, a
, s
);
4180 bfloat16_unpack_canonical(&pb
, b
, s
);
4181 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4183 return bfloat16_round_pack_canonical(pr
, s
);
4186 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
4188 FloatParts64 pa
, pb
, *pr
;
4190 float32_unpack_canonical(&pa
, a
, s
);
4191 float32_unpack_canonical(&pb
, b
, s
);
4192 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4194 return float32_round_pack_canonical(pr
, s
);
4197 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
4199 FloatParts64 pa
, pb
, *pr
;
4201 float64_unpack_canonical(&pa
, a
, s
);
4202 float64_unpack_canonical(&pb
, b
, s
);
4203 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4205 return float64_round_pack_canonical(pr
, s
);
4208 static float128
float128_minmax(float128 a
, float128 b
,
4209 float_status
*s
, int flags
)
4211 FloatParts128 pa
, pb
, *pr
;
4213 float128_unpack_canonical(&pa
, a
, s
);
4214 float128_unpack_canonical(&pb
, b
, s
);
4215 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4217 return float128_round_pack_canonical(pr
, s
);
4220 #define MINMAX_1(type, name, flags) \
4221 type type##_##name(type a, type b, float_status *s) \
4222 { return type##_minmax(a, b, s, flags); }
4224 #define MINMAX_2(type) \
4225 MINMAX_1(type, max, 0) \
4226 MINMAX_1(type, maxnum, minmax_isnum) \
4227 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4228 MINMAX_1(type, maximum_number, minmax_isnumber) \
4229 MINMAX_1(type, min, minmax_ismin) \
4230 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4231 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4232 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
4244 * Floating point compare
4247 static FloatRelation QEMU_FLATTEN
4248 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
4250 FloatParts64 pa
, pb
;
4252 float16_unpack_canonical(&pa
, a
, s
);
4253 float16_unpack_canonical(&pb
, b
, s
);
4254 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4257 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
4259 return float16_do_compare(a
, b
, s
, false);
4262 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
4264 return float16_do_compare(a
, b
, s
, true);
4267 static FloatRelation QEMU_SOFTFLOAT_ATTR
4268 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
4270 FloatParts64 pa
, pb
;
4272 float32_unpack_canonical(&pa
, a
, s
);
4273 float32_unpack_canonical(&pb
, b
, s
);
4274 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4277 static FloatRelation QEMU_FLATTEN
4278 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
4280 union_float32 ua
, ub
;
4285 if (QEMU_NO_HARDFLOAT
) {
4289 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
4290 if (isgreaterequal(ua
.h
, ub
.h
)) {
4291 if (isgreater(ua
.h
, ub
.h
)) {
4292 return float_relation_greater
;
4294 return float_relation_equal
;
4296 if (likely(isless(ua
.h
, ub
.h
))) {
4297 return float_relation_less
;
4300 * The only condition remaining is unordered.
4301 * Fall through to set flags.
4304 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4307 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
4309 return float32_hs_compare(a
, b
, s
, false);
4312 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
4314 return float32_hs_compare(a
, b
, s
, true);
4317 static FloatRelation QEMU_SOFTFLOAT_ATTR
4318 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
4320 FloatParts64 pa
, pb
;
4322 float64_unpack_canonical(&pa
, a
, s
);
4323 float64_unpack_canonical(&pb
, b
, s
);
4324 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4327 static FloatRelation QEMU_FLATTEN
4328 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
4330 union_float64 ua
, ub
;
4335 if (QEMU_NO_HARDFLOAT
) {
4339 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
4340 if (isgreaterequal(ua
.h
, ub
.h
)) {
4341 if (isgreater(ua
.h
, ub
.h
)) {
4342 return float_relation_greater
;
4344 return float_relation_equal
;
4346 if (likely(isless(ua
.h
, ub
.h
))) {
4347 return float_relation_less
;
4350 * The only condition remaining is unordered.
4351 * Fall through to set flags.
4354 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4357 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
4359 return float64_hs_compare(a
, b
, s
, false);
4362 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
4364 return float64_hs_compare(a
, b
, s
, true);
4367 static FloatRelation QEMU_FLATTEN
4368 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
4370 FloatParts64 pa
, pb
;
4372 bfloat16_unpack_canonical(&pa
, a
, s
);
4373 bfloat16_unpack_canonical(&pb
, b
, s
);
4374 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4377 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
4379 return bfloat16_do_compare(a
, b
, s
, false);
4382 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
4384 return bfloat16_do_compare(a
, b
, s
, true);
4387 static FloatRelation QEMU_FLATTEN
4388 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
4390 FloatParts128 pa
, pb
;
4392 float128_unpack_canonical(&pa
, a
, s
);
4393 float128_unpack_canonical(&pb
, b
, s
);
4394 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4397 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
4399 return float128_do_compare(a
, b
, s
, false);
4402 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
4404 return float128_do_compare(a
, b
, s
, true);
4407 static FloatRelation QEMU_FLATTEN
4408 floatx80_do_compare(floatx80 a
, floatx80 b
, float_status
*s
, bool is_quiet
)
4410 FloatParts128 pa
, pb
;
4412 if (!floatx80_unpack_canonical(&pa
, a
, s
) ||
4413 !floatx80_unpack_canonical(&pb
, b
, s
)) {
4414 return float_relation_unordered
;
4416 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4419 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*s
)
4421 return floatx80_do_compare(a
, b
, s
, false);
4424 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
, float_status
*s
)
4426 return floatx80_do_compare(a
, b
, s
, true);
4433 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
4437 float16_unpack_canonical(&p
, a
, status
);
4438 parts_scalbn(&p
, n
, status
);
4439 return float16_round_pack_canonical(&p
, status
);
4442 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
4446 float32_unpack_canonical(&p
, a
, status
);
4447 parts_scalbn(&p
, n
, status
);
4448 return float32_round_pack_canonical(&p
, status
);
4451 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
4455 float64_unpack_canonical(&p
, a
, status
);
4456 parts_scalbn(&p
, n
, status
);
4457 return float64_round_pack_canonical(&p
, status
);
4460 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
4464 bfloat16_unpack_canonical(&p
, a
, status
);
4465 parts_scalbn(&p
, n
, status
);
4466 return bfloat16_round_pack_canonical(&p
, status
);
4469 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
4473 float128_unpack_canonical(&p
, a
, status
);
4474 parts_scalbn(&p
, n
, status
);
4475 return float128_round_pack_canonical(&p
, status
);
4478 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
4482 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
4483 return floatx80_default_nan(status
);
4485 parts_scalbn(&p
, n
, status
);
4486 return floatx80_round_pack_canonical(&p
, status
);
4493 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
4497 float16_unpack_canonical(&p
, a
, status
);
4498 parts_sqrt(&p
, status
, &float16_params
);
4499 return float16_round_pack_canonical(&p
, status
);
4502 static float32 QEMU_SOFTFLOAT_ATTR
4503 soft_f32_sqrt(float32 a
, float_status
*status
)
4507 float32_unpack_canonical(&p
, a
, status
);
4508 parts_sqrt(&p
, status
, &float32_params
);
4509 return float32_round_pack_canonical(&p
, status
);
4512 static float64 QEMU_SOFTFLOAT_ATTR
4513 soft_f64_sqrt(float64 a
, float_status
*status
)
4517 float64_unpack_canonical(&p
, a
, status
);
4518 parts_sqrt(&p
, status
, &float64_params
);
4519 return float64_round_pack_canonical(&p
, status
);
4522 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
4524 union_float32 ua
, ur
;
4527 if (unlikely(!can_use_fpu(s
))) {
4531 float32_input_flush1(&ua
.s
, s
);
4532 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
4533 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4534 fpclassify(ua
.h
) == FP_ZERO
) ||
4538 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
4539 float32_is_neg(ua
.s
))) {
4546 return soft_f32_sqrt(ua
.s
, s
);
4549 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
4551 union_float64 ua
, ur
;
4554 if (unlikely(!can_use_fpu(s
))) {
4558 float64_input_flush1(&ua
.s
, s
);
4559 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
4560 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4561 fpclassify(ua
.h
) == FP_ZERO
) ||
4565 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
4566 float64_is_neg(ua
.s
))) {
4573 return soft_f64_sqrt(ua
.s
, s
);
4576 float64
float64r32_sqrt(float64 a
, float_status
*status
)
4580 float64_unpack_canonical(&p
, a
, status
);
4581 parts_sqrt(&p
, status
, &float64_params
);
4582 return float64r32_round_pack_canonical(&p
, status
);
4585 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
4589 bfloat16_unpack_canonical(&p
, a
, status
);
4590 parts_sqrt(&p
, status
, &bfloat16_params
);
4591 return bfloat16_round_pack_canonical(&p
, status
);
4594 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
4598 float128_unpack_canonical(&p
, a
, status
);
4599 parts_sqrt(&p
, status
, &float128_params
);
4600 return float128_round_pack_canonical(&p
, status
);
4603 floatx80
floatx80_sqrt(floatx80 a
, float_status
*s
)
4607 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
4608 return floatx80_default_nan(s
);
4610 parts_sqrt(&p
, s
, &floatx80_params
[s
->floatx80_rounding_precision
]);
4611 return floatx80_round_pack_canonical(&p
, s
);
4617 float32
float32_log2(float32 a
, float_status
*status
)
4621 float32_unpack_canonical(&p
, a
, status
);
4622 parts_log2(&p
, status
, &float32_params
);
4623 return float32_round_pack_canonical(&p
, status
);
4626 float64
float64_log2(float64 a
, float_status
*status
)
4630 float64_unpack_canonical(&p
, a
, status
);
4631 parts_log2(&p
, status
, &float64_params
);
4632 return float64_round_pack_canonical(&p
, status
);
4635 /*----------------------------------------------------------------------------
4636 | The pattern for a default generated NaN.
4637 *----------------------------------------------------------------------------*/
4639 float16
float16_default_nan(float_status
*status
)
4643 parts_default_nan(&p
, status
);
4644 p
.frac
>>= float16_params
.frac_shift
;
4645 return float16_pack_raw(&p
);
4648 float32
float32_default_nan(float_status
*status
)
4652 parts_default_nan(&p
, status
);
4653 p
.frac
>>= float32_params
.frac_shift
;
4654 return float32_pack_raw(&p
);
4657 float64
float64_default_nan(float_status
*status
)
4661 parts_default_nan(&p
, status
);
4662 p
.frac
>>= float64_params
.frac_shift
;
4663 return float64_pack_raw(&p
);
4666 float128
float128_default_nan(float_status
*status
)
4670 parts_default_nan(&p
, status
);
4671 frac_shr(&p
, float128_params
.frac_shift
);
4672 return float128_pack_raw(&p
);
4675 bfloat16
bfloat16_default_nan(float_status
*status
)
4679 parts_default_nan(&p
, status
);
4680 p
.frac
>>= bfloat16_params
.frac_shift
;
4681 return bfloat16_pack_raw(&p
);
4684 /*----------------------------------------------------------------------------
4685 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4686 *----------------------------------------------------------------------------*/
4688 float16
float16_silence_nan(float16 a
, float_status
*status
)
4692 float16_unpack_raw(&p
, a
);
4693 p
.frac
<<= float16_params
.frac_shift
;
4694 parts_silence_nan(&p
, status
);
4695 p
.frac
>>= float16_params
.frac_shift
;
4696 return float16_pack_raw(&p
);
4699 float32
float32_silence_nan(float32 a
, float_status
*status
)
4703 float32_unpack_raw(&p
, a
);
4704 p
.frac
<<= float32_params
.frac_shift
;
4705 parts_silence_nan(&p
, status
);
4706 p
.frac
>>= float32_params
.frac_shift
;
4707 return float32_pack_raw(&p
);
4710 float64
float64_silence_nan(float64 a
, float_status
*status
)
4714 float64_unpack_raw(&p
, a
);
4715 p
.frac
<<= float64_params
.frac_shift
;
4716 parts_silence_nan(&p
, status
);
4717 p
.frac
>>= float64_params
.frac_shift
;
4718 return float64_pack_raw(&p
);
4721 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
4725 bfloat16_unpack_raw(&p
, a
);
4726 p
.frac
<<= bfloat16_params
.frac_shift
;
4727 parts_silence_nan(&p
, status
);
4728 p
.frac
>>= bfloat16_params
.frac_shift
;
4729 return bfloat16_pack_raw(&p
);
4732 float128
float128_silence_nan(float128 a
, float_status
*status
)
4736 float128_unpack_raw(&p
, a
);
4737 frac_shl(&p
, float128_params
.frac_shift
);
4738 parts_silence_nan(&p
, status
);
4739 frac_shr(&p
, float128_params
.frac_shift
);
4740 return float128_pack_raw(&p
);
4743 /*----------------------------------------------------------------------------
4744 | If `a' is denormal and we are in flush-to-zero mode then set the
4745 | input-denormal exception and return zero. Otherwise just return the value.
4746 *----------------------------------------------------------------------------*/
4748 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
4750 if (p
.exp
== 0 && p
.frac
!= 0) {
4751 float_raise(float_flag_input_denormal
, status
);
4758 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
4760 if (status
->flush_inputs_to_zero
) {
4763 float16_unpack_raw(&p
, a
);
4764 if (parts_squash_denormal(p
, status
)) {
4765 return float16_set_sign(float16_zero
, p
.sign
);
4771 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4773 if (status
->flush_inputs_to_zero
) {
4776 float32_unpack_raw(&p
, a
);
4777 if (parts_squash_denormal(p
, status
)) {
4778 return float32_set_sign(float32_zero
, p
.sign
);
4784 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4786 if (status
->flush_inputs_to_zero
) {
4789 float64_unpack_raw(&p
, a
);
4790 if (parts_squash_denormal(p
, status
)) {
4791 return float64_set_sign(float64_zero
, p
.sign
);
4797 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4799 if (status
->flush_inputs_to_zero
) {
4802 bfloat16_unpack_raw(&p
, a
);
4803 if (parts_squash_denormal(p
, status
)) {
4804 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4810 /*----------------------------------------------------------------------------
4811 | Normalizes the subnormal extended double-precision floating-point value
4812 | represented by the denormalized significand `aSig'. The normalized exponent
4813 | and significand are stored at the locations pointed to by `zExpPtr' and
4814 | `zSigPtr', respectively.
4815 *----------------------------------------------------------------------------*/
4817 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4822 shiftCount
= clz64(aSig
);
4823 *zSigPtr
= aSig
<<shiftCount
;
4824 *zExpPtr
= 1 - shiftCount
;
4827 /*----------------------------------------------------------------------------
4828 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4829 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4830 | and returns the proper extended double-precision floating-point value
4831 | corresponding to the abstract input. Ordinarily, the abstract value is
4832 | rounded and packed into the extended double-precision format, with the
4833 | inexact exception raised if the abstract input cannot be represented
4834 | exactly. However, if the abstract value is too large, the overflow and
4835 | inexact exceptions are raised and an infinity or maximal finite value is
4836 | returned. If the abstract value is too small, the input value is rounded to
4837 | a subnormal number, and the underflow and inexact exceptions are raised if
4838 | the abstract input cannot be represented exactly as a subnormal extended
4839 | double-precision floating-point number.
4840 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4841 | the result is rounded to the same number of bits as single or double
4842 | precision, respectively. Otherwise, the result is rounded to the full
4843 | precision of the extended double-precision format.
4844 | The input significand must be normalized or smaller. If the input
4845 | significand is not normalized, `zExp' must be 0; in that case, the result
4846 | returned is a subnormal number, and it must not require rounding. The
4847 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4848 | Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4851 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4852 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4853 float_status
*status
)
4855 FloatRoundMode roundingMode
;
4856 bool roundNearestEven
, increment
, isTiny
;
4857 int64_t roundIncrement
, roundMask
, roundBits
;
4859 roundingMode
= status
->float_rounding_mode
;
4860 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4861 switch (roundingPrecision
) {
4862 case floatx80_precision_x
:
4864 case floatx80_precision_d
:
4865 roundIncrement
= UINT64_C(0x0000000000000400);
4866 roundMask
= UINT64_C(0x00000000000007FF);
4868 case floatx80_precision_s
:
4869 roundIncrement
= UINT64_C(0x0000008000000000);
4870 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4873 g_assert_not_reached();
4875 zSig0
|= ( zSig1
!= 0 );
4876 switch (roundingMode
) {
4877 case float_round_nearest_even
:
4878 case float_round_ties_away
:
4880 case float_round_to_zero
:
4883 case float_round_up
:
4884 roundIncrement
= zSign
? 0 : roundMask
;
4886 case float_round_down
:
4887 roundIncrement
= zSign
? roundMask
: 0;
4892 roundBits
= zSig0
& roundMask
;
4893 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4894 if ( ( 0x7FFE < zExp
)
4895 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4900 if (status
->flush_to_zero
) {
4901 float_raise(float_flag_output_denormal
, status
);
4902 return packFloatx80(zSign
, 0, 0);
4904 isTiny
= status
->tininess_before_rounding
4906 || (zSig0
<= zSig0
+ roundIncrement
);
4907 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4909 roundBits
= zSig0
& roundMask
;
4910 if (isTiny
&& roundBits
) {
4911 float_raise(float_flag_underflow
, status
);
4914 float_raise(float_flag_inexact
, status
);
4916 zSig0
+= roundIncrement
;
4917 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4918 roundIncrement
= roundMask
+ 1;
4919 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4920 roundMask
|= roundIncrement
;
4922 zSig0
&= ~ roundMask
;
4923 return packFloatx80( zSign
, zExp
, zSig0
);
4927 float_raise(float_flag_inexact
, status
);
4929 zSig0
+= roundIncrement
;
4930 if ( zSig0
< roundIncrement
) {
4932 zSig0
= UINT64_C(0x8000000000000000);
4934 roundIncrement
= roundMask
+ 1;
4935 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4936 roundMask
|= roundIncrement
;
4938 zSig0
&= ~ roundMask
;
4939 if ( zSig0
== 0 ) zExp
= 0;
4940 return packFloatx80( zSign
, zExp
, zSig0
);
4942 switch (roundingMode
) {
4943 case float_round_nearest_even
:
4944 case float_round_ties_away
:
4945 increment
= ((int64_t)zSig1
< 0);
4947 case float_round_to_zero
:
4950 case float_round_up
:
4951 increment
= !zSign
&& zSig1
;
4953 case float_round_down
:
4954 increment
= zSign
&& zSig1
;
4959 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4960 if ( ( 0x7FFE < zExp
)
4961 || ( ( zExp
== 0x7FFE )
4962 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4968 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4969 if ( ( roundingMode
== float_round_to_zero
)
4970 || ( zSign
&& ( roundingMode
== float_round_up
) )
4971 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4973 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4975 return packFloatx80(zSign
,
4976 floatx80_infinity_high
,
4977 floatx80_infinity_low
);
4980 isTiny
= status
->tininess_before_rounding
4983 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4984 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4986 if (isTiny
&& zSig1
) {
4987 float_raise(float_flag_underflow
, status
);
4990 float_raise(float_flag_inexact
, status
);
4992 switch (roundingMode
) {
4993 case float_round_nearest_even
:
4994 case float_round_ties_away
:
4995 increment
= ((int64_t)zSig1
< 0);
4997 case float_round_to_zero
:
5000 case float_round_up
:
5001 increment
= !zSign
&& zSig1
;
5003 case float_round_down
:
5004 increment
= zSign
&& zSig1
;
5011 if (!(zSig1
<< 1) && roundNearestEven
) {
5014 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
5016 return packFloatx80( zSign
, zExp
, zSig0
);
5020 float_raise(float_flag_inexact
, status
);
5026 zSig0
= UINT64_C(0x8000000000000000);
5029 if (!(zSig1
<< 1) && roundNearestEven
) {
5035 if ( zSig0
== 0 ) zExp
= 0;
5037 return packFloatx80( zSign
, zExp
, zSig0
);
5041 /*----------------------------------------------------------------------------
5042 | Takes an abstract floating-point value having sign `zSign', exponent
5043 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5044 | and returns the proper extended double-precision floating-point value
5045 | corresponding to the abstract input. This routine is just like
5046 | `roundAndPackFloatx80' except that the input significand does not have to be
5048 *----------------------------------------------------------------------------*/
5050 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
5051 bool zSign
, int32_t zExp
,
5052 uint64_t zSig0
, uint64_t zSig1
,
5053 float_status
*status
)
5062 shiftCount
= clz64(zSig0
);
5063 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
5065 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
5066 zSig0
, zSig1
, status
);
5070 /*----------------------------------------------------------------------------
5071 | Returns the binary exponential of the single-precision floating-point value
5072 | `a'. The operation is performed according to the IEC/IEEE Standard for
5073 | Binary Floating-Point Arithmetic.
5075 | Uses the following identities:
5077 | 1. -------------------------------------------------------------------------
5081 | 2. -------------------------------------------------------------------------
5084 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5086 *----------------------------------------------------------------------------*/
5088 static const float64 float32_exp2_coefficients
[15] =
5090 const_float64( 0x3ff0000000000000ll
), /* 1 */
5091 const_float64( 0x3fe0000000000000ll
), /* 2 */
5092 const_float64( 0x3fc5555555555555ll
), /* 3 */
5093 const_float64( 0x3fa5555555555555ll
), /* 4 */
5094 const_float64( 0x3f81111111111111ll
), /* 5 */
5095 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
5096 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
5097 const_float64( 0x3efa01a01a01a01all
), /* 8 */
5098 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
5099 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
5100 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
5101 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
5102 const_float64( 0x3de6124613a86d09ll
), /* 13 */
5103 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
5104 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
5107 float32
float32_exp2(float32 a
, float_status
*status
)
5109 FloatParts64 xp
, xnp
, tp
, rp
;
5112 float32_unpack_canonical(&xp
, a
, status
);
5113 if (unlikely(xp
.cls
!= float_class_normal
)) {
5115 case float_class_snan
:
5116 case float_class_qnan
:
5117 parts_return_nan(&xp
, status
);
5118 return float32_round_pack_canonical(&xp
, status
);
5119 case float_class_inf
:
5120 return xp
.sign
? float32_zero
: a
;
5121 case float_class_zero
:
5126 g_assert_not_reached();
5129 float_raise(float_flag_inexact
, status
);
5131 float64_unpack_canonical(&tp
, float64_ln2
, status
);
5132 xp
= *parts_mul(&xp
, &tp
, status
);
5135 float64_unpack_canonical(&rp
, float64_one
, status
);
5136 for (i
= 0 ; i
< 15 ; i
++) {
5137 float64_unpack_canonical(&tp
, float32_exp2_coefficients
[i
], status
);
5138 rp
= *parts_muladd(&tp
, &xp
, &rp
, 0, status
);
5139 xnp
= *parts_mul(&xnp
, &xp
, status
);
5142 return float32_round_pack_canonical(&rp
, status
);
5145 /*----------------------------------------------------------------------------
5146 | Rounds the extended double-precision floating-point value `a'
5147 | to the precision provided by floatx80_rounding_precision and returns the
5148 | result as an extended double-precision floating-point value.
5149 | The operation is performed according to the IEC/IEEE Standard for Binary
5150 | Floating-Point Arithmetic.
5151 *----------------------------------------------------------------------------*/
5153 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
5157 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
5158 return floatx80_default_nan(status
);
5160 return floatx80_round_pack_canonical(&p
, status
);
5163 static void __attribute__((constructor
)) softfloat_init(void)
5165 union_float64 ua
, ub
, uc
, ur
;
5167 if (QEMU_NO_HARDFLOAT
) {
5171 * Test that the host's FMA is not obviously broken. For example,
5172 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5173 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5175 ua
.s
= 0x0020000000000001ULL
;
5176 ub
.s
= 0x3ca0000000000000ULL
;
5177 uc
.s
= 0x0020000000000000ULL
;
5178 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
5179 if (ur
.s
!= 0x0020000000000001ULL
) {
5180 force_soft_fma
= true;