4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
409 typedef enum __attribute__ ((__packed__
)) {
410 float_class_unclassified
,
414 float_class_qnan
, /* all NaNs from here */
418 #define float_cmask(bit) (1u << (bit))
421 float_cmask_zero
= float_cmask(float_class_zero
),
422 float_cmask_normal
= float_cmask(float_class_normal
),
423 float_cmask_inf
= float_cmask(float_class_inf
),
424 float_cmask_qnan
= float_cmask(float_class_qnan
),
425 float_cmask_snan
= float_cmask(float_class_snan
),
427 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
428 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
431 /* Flags for parts_minmax. */
433 /* Set for minimum; clear for maximum. */
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
449 return unlikely(c
>= float_class_qnan
);
452 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
454 return c
== float_class_snan
;
457 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
459 return c
== float_class_qnan
;
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
476 /* Routines that know the structure may reference the singular name. */
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
501 uint64_t frac_hm
; /* high-middle */
502 uint64_t frac_lm
; /* low-middle */
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT 63
508 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * round_mask: bits below lsb which must be rounded
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
531 /* Expand fields based on the size of exponent and fraction */
532 #define FLOAT_PARAMS_(E) \
534 .exp_bias = ((1 << E) - 1) >> 1, \
535 .exp_max = (1 << E) - 1
537 #define FLOAT_PARAMS(E, F) \
540 .frac_shift = (-F - 1) & 63, \
541 .round_mask = (1ull << ((-F - 1) & 63)) - 1
543 static const FloatFmt float16_params
= {
547 static const FloatFmt float16_params_ahp
= {
552 static const FloatFmt bfloat16_params
= {
556 static const FloatFmt float32_params
= {
560 static const FloatFmt float64_params
= {
564 static const FloatFmt float128_params
= {
565 FLOAT_PARAMS(15, 112)
568 #define FLOATX80_PARAMS(R) \
570 .frac_size = R == 64 ? 63 : R, \
572 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
574 static const FloatFmt floatx80_params
[3] = {
575 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
576 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
577 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
580 /* Unpack a float to parts, but do not canonicalize. */
581 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
583 const int f_size
= fmt
->frac_size
;
584 const int e_size
= fmt
->exp_size
;
586 *r
= (FloatParts64
) {
587 .cls
= float_class_unclassified
,
588 .sign
= extract64(raw
, f_size
+ e_size
, 1),
589 .exp
= extract64(raw
, f_size
, e_size
),
590 .frac
= extract64(raw
, 0, f_size
)
594 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
596 unpack_raw64(p
, &float16_params
, f
);
599 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
601 unpack_raw64(p
, &bfloat16_params
, f
);
604 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
606 unpack_raw64(p
, &float32_params
, f
);
609 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
611 unpack_raw64(p
, &float64_params
, f
);
614 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
616 *p
= (FloatParts128
) {
617 .cls
= float_class_unclassified
,
618 .sign
= extract32(f
.high
, 15, 1),
619 .exp
= extract32(f
.high
, 0, 15),
624 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
626 const int f_size
= float128_params
.frac_size
- 64;
627 const int e_size
= float128_params
.exp_size
;
629 *p
= (FloatParts128
) {
630 .cls
= float_class_unclassified
,
631 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
632 .exp
= extract64(f
.high
, f_size
, e_size
),
633 .frac_hi
= extract64(f
.high
, 0, f_size
),
638 /* Pack a float from parts, but do not canonicalize. */
639 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
641 const int f_size
= fmt
->frac_size
;
642 const int e_size
= fmt
->exp_size
;
645 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
646 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
647 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
651 static inline float16
float16_pack_raw(const FloatParts64
*p
)
653 return make_float16(pack_raw64(p
, &float16_params
));
656 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
658 return pack_raw64(p
, &bfloat16_params
);
661 static inline float32
float32_pack_raw(const FloatParts64
*p
)
663 return make_float32(pack_raw64(p
, &float32_params
));
666 static inline float64
float64_pack_raw(const FloatParts64
*p
)
668 return make_float64(pack_raw64(p
, &float64_params
));
671 static float128
float128_pack_raw(const FloatParts128
*p
)
673 const int f_size
= float128_params
.frac_size
- 64;
674 const int e_size
= float128_params
.exp_size
;
677 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
678 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
679 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
680 return make_float128(hi
, p
->frac_lo
);
683 /*----------------------------------------------------------------------------
684 | Functions and definitions to determine: (1) whether tininess for underflow
685 | is detected before or after rounding by default, (2) what (if anything)
686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
688 | are propagated from function inputs to output. These details are target-
690 *----------------------------------------------------------------------------*/
691 #include "softfloat-specialize.c.inc"
693 #define PARTS_GENERIC_64_128(NAME, P) \
694 _Generic((P), FloatParts64 *: parts64_##NAME, \
695 FloatParts128 *: parts128_##NAME)
697 #define PARTS_GENERIC_64_128_256(NAME, P) \
698 _Generic((P), FloatParts64 *: parts64_##NAME, \
699 FloatParts128 *: parts128_##NAME, \
700 FloatParts256 *: parts256_##NAME)
702 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
703 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
705 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
706 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
708 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
710 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
712 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
715 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
717 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
718 FloatParts64
*c
, float_status
*s
,
719 int ab_mask
, int abc_mask
);
720 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
724 int ab_mask
, int abc_mask
);
726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
727 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
729 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
730 const FloatFmt
*fmt
);
731 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
732 const FloatFmt
*fmt
);
734 #define parts_canonicalize(A, S, F) \
735 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
737 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
738 const FloatFmt
*fmt
);
739 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
740 const FloatFmt
*fmt
);
742 #define parts_uncanon_normal(A, S, F) \
743 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
745 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
746 const FloatFmt
*fmt
);
747 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
748 const FloatFmt
*fmt
);
750 #define parts_uncanon(A, S, F) \
751 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
753 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
754 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
755 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
757 #define parts_add_normal(A, B) \
758 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
760 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
761 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
762 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
764 #define parts_sub_normal(A, B) \
765 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
767 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
768 float_status
*s
, bool subtract
);
769 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
770 float_status
*s
, bool subtract
);
772 #define parts_addsub(A, B, S, Z) \
773 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
775 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
777 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
780 #define parts_mul(A, B, S) \
781 PARTS_GENERIC_64_128(mul, A)(A, B, S)
783 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
784 FloatParts64
*c
, int flags
,
786 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
787 FloatParts128
*c
, int flags
,
790 #define parts_muladd(A, B, C, Z, S) \
791 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
793 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
795 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
798 #define parts_div(A, B, S) \
799 PARTS_GENERIC_64_128(div, A)(A, B, S)
801 static FloatParts64
*parts64_modrem(FloatParts64
*a
, FloatParts64
*b
,
802 uint64_t *mod_quot
, float_status
*s
);
803 static FloatParts128
*parts128_modrem(FloatParts128
*a
, FloatParts128
*b
,
804 uint64_t *mod_quot
, float_status
*s
);
806 #define parts_modrem(A, B, Q, S) \
807 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
809 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
810 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
812 #define parts_sqrt(A, S, F) \
813 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
815 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
816 int scale
, int frac_size
);
817 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
818 int scale
, int frac_size
);
820 #define parts_round_to_int_normal(A, R, C, F) \
821 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
823 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
824 int scale
, float_status
*s
,
825 const FloatFmt
*fmt
);
826 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
827 int scale
, float_status
*s
,
828 const FloatFmt
*fmt
);
830 #define parts_round_to_int(A, R, C, S, F) \
831 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
833 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
834 int scale
, int64_t min
, int64_t max
,
836 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
837 int scale
, int64_t min
, int64_t max
,
840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
841 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
843 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
844 int scale
, uint64_t max
,
846 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
847 int scale
, uint64_t max
,
850 #define parts_float_to_uint(P, R, Z, M, S) \
851 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
853 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
854 int scale
, float_status
*s
);
855 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
856 int scale
, float_status
*s
);
858 #define parts_sint_to_float(P, I, Z, S) \
859 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
861 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
862 int scale
, float_status
*s
);
863 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
864 int scale
, float_status
*s
);
866 #define parts_uint_to_float(P, I, Z, S) \
867 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
869 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
870 float_status
*s
, int flags
);
871 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
872 float_status
*s
, int flags
);
874 #define parts_minmax(A, B, S, F) \
875 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
877 static FloatRelation
parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
878 float_status
*s
, bool q
);
879 static FloatRelation
parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
880 float_status
*s
, bool q
);
882 #define parts_compare(A, B, S, Q) \
883 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
885 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
886 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
888 #define parts_scalbn(A, N, S) \
889 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
891 static void parts64_log2(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
892 static void parts128_log2(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
894 #define parts_log2(A, S, F) \
895 PARTS_GENERIC_64_128(log2, A)(A, S, F)
898 * Helper functions for softfloat-parts.c.inc, per-size operations.
901 #define FRAC_GENERIC_64_128(NAME, P) \
902 _Generic((P), FloatParts64 *: frac64_##NAME, \
903 FloatParts128 *: frac128_##NAME)
905 #define FRAC_GENERIC_64_128_256(NAME, P) \
906 _Generic((P), FloatParts64 *: frac64_##NAME, \
907 FloatParts128 *: frac128_##NAME, \
908 FloatParts256 *: frac256_##NAME)
910 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
912 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
915 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
918 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
919 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
923 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
926 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
927 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
928 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
929 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
933 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
935 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
937 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
940 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
942 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
943 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
946 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
948 static void frac64_allones(FloatParts64
*a
)
953 static void frac128_allones(FloatParts128
*a
)
955 a
->frac_hi
= a
->frac_lo
= -1;
958 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
960 static FloatRelation
frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
962 return (a
->frac
== b
->frac
? float_relation_equal
963 : a
->frac
< b
->frac
? float_relation_less
964 : float_relation_greater
);
967 static FloatRelation
frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
969 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
971 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
973 return float_relation_equal
;
976 return ta
< tb
? float_relation_less
: float_relation_greater
;
979 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
981 static void frac64_clear(FloatParts64
*a
)
986 static void frac128_clear(FloatParts128
*a
)
988 a
->frac_hi
= a
->frac_lo
= 0;
991 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
993 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
995 uint64_t n1
, n0
, r
, q
;
999 * We want a 2*N / N-bit division to produce exactly an N-bit
1000 * result, so that we do not lose any precision and so that we
1001 * do not have to renormalize afterward. If A.frac < B.frac,
1002 * then division would produce an (N-1)-bit result; shift A left
1003 * by one to produce the an N-bit result, and return true to
1004 * decrement the exponent to match.
1006 * The udiv_qrnnd algorithm that we're using requires normalization,
1007 * i.e. the msb of the denominator must be set, which is already true.
1009 ret
= a
->frac
< b
->frac
;
1017 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1019 /* Set lsb if there is a remainder, to set inexact. */
1020 a
->frac
= q
| (r
!= 0);
1025 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1027 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1028 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1031 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1032 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1034 ret
= lt128(a0
, a1
, b0
, b1
);
1036 a1
= shr_double(a0
, a1
, 1);
1040 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1041 q0
= estimateDiv128To64(a0
, a1
, b0
);
1044 * Estimate is high because B1 was not included (unless B1 == 0).
1045 * Reduce quotient and increase remainder until remainder is non-negative.
1046 * This loop will execute 0 to 2 times.
1048 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1049 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1052 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1055 /* Repeat using the remainder, producing a second word of quotient. */
1056 q1
= estimateDiv128To64(r1
, r2
, b0
);
1057 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1058 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1061 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1064 /* Any remainder indicates inexact; set sticky bit. */
1065 q1
|= (r2
| r3
) != 0;
1072 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1074 static bool frac64_eqz(FloatParts64
*a
)
1076 return a
->frac
== 0;
1079 static bool frac128_eqz(FloatParts128
*a
)
1081 return (a
->frac_hi
| a
->frac_lo
) == 0;
1084 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1086 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1088 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1091 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1093 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1094 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1097 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1099 static void frac64_neg(FloatParts64
*a
)
1104 static void frac128_neg(FloatParts128
*a
)
1107 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1108 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1111 static void frac256_neg(FloatParts256
*a
)
1114 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1115 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1116 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1117 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1120 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1122 static int frac64_normalize(FloatParts64
*a
)
1125 int shift
= clz64(a
->frac
);
1132 static int frac128_normalize(FloatParts128
*a
)
1135 int shl
= clz64(a
->frac_hi
);
1136 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1139 } else if (a
->frac_lo
) {
1140 int shl
= clz64(a
->frac_lo
);
1141 a
->frac_hi
= a
->frac_lo
<< shl
;
1148 static int frac256_normalize(FloatParts256
*a
)
1150 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1151 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1163 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1166 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1169 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1172 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1182 a0
= shl_double(a0
, a1
, shl
);
1183 a1
= shl_double(a1
, a2
, shl
);
1184 a2
= shl_double(a2
, a3
, shl
);
1195 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1197 static void frac64_modrem(FloatParts64
*a
, FloatParts64
*b
, uint64_t *mod_quot
)
1199 uint64_t a0
, a1
, b0
, t0
, t1
, q
, quot
;
1200 int exp_diff
= a
->exp
- b
->exp
;
1206 if (exp_diff
< -1) {
1212 if (exp_diff
== -1) {
1218 quot
= q
= b0
<= a0
;
1224 while (exp_diff
> 0) {
1225 q
= estimateDiv128To64(a0
, a1
, b0
);
1226 q
= q
> 2 ? q
- 2 : 0;
1227 mul64To128(b0
, q
, &t0
, &t1
);
1228 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1229 shortShift128Left(a0
, a1
, 62, &a0
, &a1
);
1231 quot
= (quot
<< 62) + q
;
1236 q
= estimateDiv128To64(a0
, a1
, b0
);
1237 q
= q
> 2 ? (q
- 2) >> (64 - exp_diff
) : 0;
1238 mul64To128(b0
, q
<< (64 - exp_diff
), &t0
, &t1
);
1239 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1240 shortShift128Left(0, b0
, 64 - exp_diff
, &t0
, &t1
);
1241 while (le128(t0
, t1
, a0
, a1
)) {
1243 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1245 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1254 sub128(t0
, t1
, a0
, a1
, &t0
, &t1
);
1255 if (lt128(t0
, t1
, a0
, a1
) ||
1256 (eq128(t0
, t1
, a0
, a1
) && (q
& 1))) {
1265 shortShift128Left(a0
, a1
, shift
, &a0
, &a1
);
1266 } else if (likely(a1
)) {
1272 a
->cls
= float_class_zero
;
1276 a
->exp
= b
->exp
+ exp_diff
- shift
;
1277 a
->frac
= a0
| (a1
!= 0);
1280 static void frac128_modrem(FloatParts128
*a
, FloatParts128
*b
,
1283 uint64_t a0
, a1
, a2
, b0
, b1
, t0
, t1
, t2
, q
, quot
;
1284 int exp_diff
= a
->exp
- b
->exp
;
1291 if (exp_diff
< -1) {
1297 if (exp_diff
== -1) {
1298 shift128Right(a0
, a1
, 1, &a0
, &a1
);
1305 quot
= q
= le128(b0
, b1
, a0
, a1
);
1307 sub128(a0
, a1
, b0
, b1
, &a0
, &a1
);
1311 while (exp_diff
> 0) {
1312 q
= estimateDiv128To64(a0
, a1
, b0
);
1313 q
= q
> 4 ? q
- 4 : 0;
1314 mul128By64To192(b0
, b1
, q
, &t0
, &t1
, &t2
);
1315 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1316 shortShift192Left(a0
, a1
, a2
, 61, &a0
, &a1
, &a2
);
1318 quot
= (quot
<< 61) + q
;
1323 q
= estimateDiv128To64(a0
, a1
, b0
);
1324 q
= q
> 4 ? (q
- 4) >> (64 - exp_diff
) : 0;
1325 mul128By64To192(b0
, b1
, q
<< (64 - exp_diff
), &t0
, &t1
, &t2
);
1326 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1327 shortShift192Left(0, b0
, b1
, 64 - exp_diff
, &t0
, &t1
, &t2
);
1328 while (le192(t0
, t1
, t2
, a0
, a1
, a2
)) {
1330 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1332 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1342 sub192(t0
, t1
, t2
, a0
, a1
, a2
, &t0
, &t1
, &t2
);
1343 if (lt192(t0
, t1
, t2
, a0
, a1
, a2
) ||
1344 (eq192(t0
, t1
, t2
, a0
, a1
, a2
) && (q
& 1))) {
1354 shortShift192Left(a0
, a1
, a2
, shift
, &a0
, &a1
, &a2
);
1355 } else if (likely(a1
)) {
1357 shortShift128Left(a1
, a2
, shift
, &a0
, &a1
);
1360 } else if (likely(a2
)) {
1366 a
->cls
= float_class_zero
;
1370 a
->exp
= b
->exp
+ exp_diff
- shift
;
1372 a
->frac_lo
= a1
| (a2
!= 0);
1375 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1377 static void frac64_shl(FloatParts64
*a
, int c
)
1382 static void frac128_shl(FloatParts128
*a
, int c
)
1384 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1392 a0
= shl_double(a0
, a1
, c
);
1400 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1402 static void frac64_shr(FloatParts64
*a
, int c
)
1407 static void frac128_shr(FloatParts128
*a
, int c
)
1409 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1417 a1
= shr_double(a0
, a1
, c
);
1425 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1427 static void frac64_shrjam(FloatParts64
*a
, int c
)
1429 uint64_t a0
= a
->frac
;
1431 if (likely(c
!= 0)) {
1432 if (likely(c
< 64)) {
1433 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1441 static void frac128_shrjam(FloatParts128
*a
, int c
)
1443 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1444 uint64_t sticky
= 0;
1446 if (unlikely(c
== 0)) {
1448 } else if (likely(c
< 64)) {
1450 } else if (likely(c
< 128)) {
1464 sticky
|= shr_double(a1
, 0, c
);
1465 a1
= shr_double(a0
, a1
, c
);
1469 a
->frac_lo
= a1
| (sticky
!= 0);
1473 static void frac256_shrjam(FloatParts256
*a
, int c
)
1475 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1476 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1477 uint64_t sticky
= 0;
1479 if (unlikely(c
== 0)) {
1481 } else if (likely(c
< 64)) {
1483 } else if (likely(c
< 256)) {
1484 if (unlikely(c
& 128)) {
1486 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1488 if (unlikely(c
& 64)) {
1490 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1497 sticky
= a0
| a1
| a2
| a3
;
1498 a0
= a1
= a2
= a3
= 0;
1502 sticky
|= shr_double(a3
, 0, c
);
1503 a3
= shr_double(a2
, a3
, c
);
1504 a2
= shr_double(a1
, a2
, c
);
1505 a1
= shr_double(a0
, a1
, c
);
1509 a
->frac_lo
= a3
| (sticky
!= 0);
1515 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1517 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1519 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1522 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1525 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1526 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1530 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1533 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1534 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1535 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1536 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1540 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1542 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1544 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1547 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1549 r
->frac_hi
= a
->frac_hi
;
1550 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1553 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1555 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1557 r
->frac_hi
= a
->frac
;
1561 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1563 r
->frac_hi
= a
->frac_hi
;
1564 r
->frac_hm
= a
->frac_lo
;
1569 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1572 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1573 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1574 * and thus MIT licenced.
1576 static const uint16_t rsqrt_tab
[128] = {
1577 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1578 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1579 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1580 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1581 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1582 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1583 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1584 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1585 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1586 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1587 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1588 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1589 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1590 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1591 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1592 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1595 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1596 #define FloatPartsN glue(FloatParts,N)
1597 #define FloatPartsW glue(FloatParts,W)
1602 #include "softfloat-parts-addsub.c.inc"
1603 #include "softfloat-parts.c.inc"
1610 #include "softfloat-parts-addsub.c.inc"
1611 #include "softfloat-parts.c.inc"
1617 #include "softfloat-parts-addsub.c.inc"
1626 * Pack/unpack routines with a specific FloatFmt.
1629 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1630 float_status
*s
, const FloatFmt
*params
)
1632 float16_unpack_raw(p
, f
);
1633 parts_canonicalize(p
, s
, params
);
1636 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1639 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1642 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1645 bfloat16_unpack_raw(p
, f
);
1646 parts_canonicalize(p
, s
, &bfloat16_params
);
1649 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1651 const FloatFmt
*params
)
1653 parts_uncanon(p
, s
, params
);
1654 return float16_pack_raw(p
);
1657 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1660 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1663 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1666 parts_uncanon(p
, s
, &bfloat16_params
);
1667 return bfloat16_pack_raw(p
);
1670 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1673 float32_unpack_raw(p
, f
);
1674 parts_canonicalize(p
, s
, &float32_params
);
1677 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1680 parts_uncanon(p
, s
, &float32_params
);
1681 return float32_pack_raw(p
);
1684 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1687 float64_unpack_raw(p
, f
);
1688 parts_canonicalize(p
, s
, &float64_params
);
1691 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1694 parts_uncanon(p
, s
, &float64_params
);
1695 return float64_pack_raw(p
);
1698 static float64
float64r32_round_pack_canonical(FloatParts64
*p
,
1701 parts_uncanon(p
, s
, &float32_params
);
1704 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1705 * We need to adjust the fraction higher so that the least N bits are
1706 * zero, and the fraction is adjacent to the float64 implicit bit.
1709 case float_class_normal
:
1710 if (unlikely(p
->exp
== 0)) {
1712 * The result is denormal for float32, but can be represented
1713 * in normalized form for float64. Adjust, per canonicalize.
1715 int shift
= frac_normalize(p
);
1716 p
->exp
= (float32_params
.frac_shift
-
1717 float32_params
.exp_bias
- shift
+ 1 +
1718 float64_params
.exp_bias
);
1719 frac_shr(p
, float64_params
.frac_shift
);
1721 frac_shl(p
, float32_params
.frac_shift
- float64_params
.frac_shift
);
1722 p
->exp
+= float64_params
.exp_bias
- float32_params
.exp_bias
;
1725 case float_class_snan
:
1726 case float_class_qnan
:
1727 frac_shl(p
, float32_params
.frac_shift
- float64_params
.frac_shift
);
1728 p
->exp
= float64_params
.exp_max
;
1730 case float_class_inf
:
1731 p
->exp
= float64_params
.exp_max
;
1733 case float_class_zero
:
1736 g_assert_not_reached();
1739 return float64_pack_raw(p
);
1742 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1745 float128_unpack_raw(p
, f
);
1746 parts_canonicalize(p
, s
, &float128_params
);
1749 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1752 parts_uncanon(p
, s
, &float128_params
);
1753 return float128_pack_raw(p
);
1756 /* Returns false if the encoding is invalid. */
1757 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1760 /* Ensure rounding precision is set before beginning. */
1761 switch (s
->floatx80_rounding_precision
) {
1762 case floatx80_precision_x
:
1763 case floatx80_precision_d
:
1764 case floatx80_precision_s
:
1767 g_assert_not_reached();
1770 if (unlikely(floatx80_invalid_encoding(f
))) {
1771 float_raise(float_flag_invalid
, s
);
1775 floatx80_unpack_raw(p
, f
);
1777 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1778 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1780 /* The explicit integer bit is ignored, after invalid checks. */
1781 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1782 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1783 : parts_is_snan_frac(p
->frac_hi
, s
)
1784 ? float_class_snan
: float_class_qnan
);
1789 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1792 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1797 case float_class_normal
:
1798 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1799 parts_uncanon_normal(p
, s
, fmt
);
1807 frac_truncjam(&p64
, p
);
1808 parts_uncanon_normal(&p64
, s
, fmt
);
1812 if (exp
!= fmt
->exp_max
) {
1815 /* rounded to inf -- fall through to set frac correctly */
1817 case float_class_inf
:
1818 /* x86 and m68k differ in the setting of the integer bit. */
1819 frac
= floatx80_infinity_low
;
1823 case float_class_zero
:
1828 case float_class_snan
:
1829 case float_class_qnan
:
1830 /* NaNs have the integer bit set. */
1831 frac
= p
->frac_hi
| (1ull << 63);
1836 g_assert_not_reached();
1839 return packFloatx80(p
->sign
, exp
, frac
);
1843 * Addition and subtraction
1846 static float16 QEMU_FLATTEN
1847 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1849 FloatParts64 pa
, pb
, *pr
;
1851 float16_unpack_canonical(&pa
, a
, status
);
1852 float16_unpack_canonical(&pb
, b
, status
);
1853 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1855 return float16_round_pack_canonical(pr
, status
);
1858 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1860 return float16_addsub(a
, b
, status
, false);
1863 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1865 return float16_addsub(a
, b
, status
, true);
1868 static float32 QEMU_SOFTFLOAT_ATTR
1869 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1871 FloatParts64 pa
, pb
, *pr
;
1873 float32_unpack_canonical(&pa
, a
, status
);
1874 float32_unpack_canonical(&pb
, b
, status
);
1875 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1877 return float32_round_pack_canonical(pr
, status
);
1880 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1882 return soft_f32_addsub(a
, b
, status
, false);
1885 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1887 return soft_f32_addsub(a
, b
, status
, true);
1890 static float64 QEMU_SOFTFLOAT_ATTR
1891 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1893 FloatParts64 pa
, pb
, *pr
;
1895 float64_unpack_canonical(&pa
, a
, status
);
1896 float64_unpack_canonical(&pb
, b
, status
);
1897 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1899 return float64_round_pack_canonical(pr
, status
);
1902 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1904 return soft_f64_addsub(a
, b
, status
, false);
1907 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1909 return soft_f64_addsub(a
, b
, status
, true);
1912 static float hard_f32_add(float a
, float b
)
1917 static float hard_f32_sub(float a
, float b
)
1922 static double hard_f64_add(double a
, double b
)
1927 static double hard_f64_sub(double a
, double b
)
1932 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1934 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1935 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1937 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1940 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1942 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1943 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1945 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1949 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1950 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1952 return float32_gen2(a
, b
, s
, hard
, soft
,
1953 f32_is_zon2
, f32_addsubmul_post
);
1956 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1957 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1959 return float64_gen2(a
, b
, s
, hard
, soft
,
1960 f64_is_zon2
, f64_addsubmul_post
);
1963 float32 QEMU_FLATTEN
1964 float32_add(float32 a
, float32 b
, float_status
*s
)
1966 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1969 float32 QEMU_FLATTEN
1970 float32_sub(float32 a
, float32 b
, float_status
*s
)
1972 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1975 float64 QEMU_FLATTEN
1976 float64_add(float64 a
, float64 b
, float_status
*s
)
1978 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1981 float64 QEMU_FLATTEN
1982 float64_sub(float64 a
, float64 b
, float_status
*s
)
1984 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1987 static float64
float64r32_addsub(float64 a
, float64 b
, float_status
*status
,
1990 FloatParts64 pa
, pb
, *pr
;
1992 float64_unpack_canonical(&pa
, a
, status
);
1993 float64_unpack_canonical(&pb
, b
, status
);
1994 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1996 return float64r32_round_pack_canonical(pr
, status
);
1999 float64
float64r32_add(float64 a
, float64 b
, float_status
*status
)
2001 return float64r32_addsub(a
, b
, status
, false);
2004 float64
float64r32_sub(float64 a
, float64 b
, float_status
*status
)
2006 return float64r32_addsub(a
, b
, status
, true);
2009 static bfloat16 QEMU_FLATTEN
2010 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
2012 FloatParts64 pa
, pb
, *pr
;
2014 bfloat16_unpack_canonical(&pa
, a
, status
);
2015 bfloat16_unpack_canonical(&pb
, b
, status
);
2016 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2018 return bfloat16_round_pack_canonical(pr
, status
);
2021 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
2023 return bfloat16_addsub(a
, b
, status
, false);
2026 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
2028 return bfloat16_addsub(a
, b
, status
, true);
2031 static float128 QEMU_FLATTEN
2032 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
2034 FloatParts128 pa
, pb
, *pr
;
2036 float128_unpack_canonical(&pa
, a
, status
);
2037 float128_unpack_canonical(&pb
, b
, status
);
2038 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2040 return float128_round_pack_canonical(pr
, status
);
2043 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
2045 return float128_addsub(a
, b
, status
, false);
2048 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
2050 return float128_addsub(a
, b
, status
, true);
2053 static floatx80 QEMU_FLATTEN
2054 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
2056 FloatParts128 pa
, pb
, *pr
;
2058 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2059 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2060 return floatx80_default_nan(status
);
2063 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2064 return floatx80_round_pack_canonical(pr
, status
);
2067 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
2069 return floatx80_addsub(a
, b
, status
, false);
2072 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
2074 return floatx80_addsub(a
, b
, status
, true);
2081 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
2083 FloatParts64 pa
, pb
, *pr
;
2085 float16_unpack_canonical(&pa
, a
, status
);
2086 float16_unpack_canonical(&pb
, b
, status
);
2087 pr
= parts_mul(&pa
, &pb
, status
);
2089 return float16_round_pack_canonical(pr
, status
);
2092 static float32 QEMU_SOFTFLOAT_ATTR
2093 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
2095 FloatParts64 pa
, pb
, *pr
;
2097 float32_unpack_canonical(&pa
, a
, status
);
2098 float32_unpack_canonical(&pb
, b
, status
);
2099 pr
= parts_mul(&pa
, &pb
, status
);
2101 return float32_round_pack_canonical(pr
, status
);
2104 static float64 QEMU_SOFTFLOAT_ATTR
2105 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
2107 FloatParts64 pa
, pb
, *pr
;
2109 float64_unpack_canonical(&pa
, a
, status
);
2110 float64_unpack_canonical(&pb
, b
, status
);
2111 pr
= parts_mul(&pa
, &pb
, status
);
2113 return float64_round_pack_canonical(pr
, status
);
2116 static float hard_f32_mul(float a
, float b
)
2121 static double hard_f64_mul(double a
, double b
)
2126 float32 QEMU_FLATTEN
2127 float32_mul(float32 a
, float32 b
, float_status
*s
)
2129 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
2130 f32_is_zon2
, f32_addsubmul_post
);
2133 float64 QEMU_FLATTEN
2134 float64_mul(float64 a
, float64 b
, float_status
*s
)
2136 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
2137 f64_is_zon2
, f64_addsubmul_post
);
2140 float64
float64r32_mul(float64 a
, float64 b
, float_status
*status
)
2142 FloatParts64 pa
, pb
, *pr
;
2144 float64_unpack_canonical(&pa
, a
, status
);
2145 float64_unpack_canonical(&pb
, b
, status
);
2146 pr
= parts_mul(&pa
, &pb
, status
);
2148 return float64r32_round_pack_canonical(pr
, status
);
2151 bfloat16 QEMU_FLATTEN
2152 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
2154 FloatParts64 pa
, pb
, *pr
;
2156 bfloat16_unpack_canonical(&pa
, a
, status
);
2157 bfloat16_unpack_canonical(&pb
, b
, status
);
2158 pr
= parts_mul(&pa
, &pb
, status
);
2160 return bfloat16_round_pack_canonical(pr
, status
);
2163 float128 QEMU_FLATTEN
2164 float128_mul(float128 a
, float128 b
, float_status
*status
)
2166 FloatParts128 pa
, pb
, *pr
;
2168 float128_unpack_canonical(&pa
, a
, status
);
2169 float128_unpack_canonical(&pb
, b
, status
);
2170 pr
= parts_mul(&pa
, &pb
, status
);
2172 return float128_round_pack_canonical(pr
, status
);
2175 floatx80 QEMU_FLATTEN
2176 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
2178 FloatParts128 pa
, pb
, *pr
;
2180 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2181 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2182 return floatx80_default_nan(status
);
2185 pr
= parts_mul(&pa
, &pb
, status
);
2186 return floatx80_round_pack_canonical(pr
, status
);
2190 * Fused multiply-add
2193 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
2194 int flags
, float_status
*status
)
2196 FloatParts64 pa
, pb
, pc
, *pr
;
2198 float16_unpack_canonical(&pa
, a
, status
);
2199 float16_unpack_canonical(&pb
, b
, status
);
2200 float16_unpack_canonical(&pc
, c
, status
);
2201 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2203 return float16_round_pack_canonical(pr
, status
);
2206 static float32 QEMU_SOFTFLOAT_ATTR
2207 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
2208 float_status
*status
)
2210 FloatParts64 pa
, pb
, pc
, *pr
;
2212 float32_unpack_canonical(&pa
, a
, status
);
2213 float32_unpack_canonical(&pb
, b
, status
);
2214 float32_unpack_canonical(&pc
, c
, status
);
2215 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2217 return float32_round_pack_canonical(pr
, status
);
2220 static float64 QEMU_SOFTFLOAT_ATTR
2221 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
2222 float_status
*status
)
2224 FloatParts64 pa
, pb
, pc
, *pr
;
2226 float64_unpack_canonical(&pa
, a
, status
);
2227 float64_unpack_canonical(&pb
, b
, status
);
2228 float64_unpack_canonical(&pc
, c
, status
);
2229 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2231 return float64_round_pack_canonical(pr
, status
);
2234 static bool force_soft_fma
;
2236 float32 QEMU_FLATTEN
2237 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2239 union_float32 ua
, ub
, uc
, ur
;
2245 if (unlikely(!can_use_fpu(s
))) {
2248 if (unlikely(flags
& float_muladd_halve_result
)) {
2252 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2253 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2257 if (unlikely(force_soft_fma
)) {
2262 * When (a || b) == 0, there's no need to check for under/over flow,
2263 * since we know the addend is (normal || 0) and the product is 0.
2265 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2269 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2270 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2271 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2273 if (flags
& float_muladd_negate_c
) {
2278 union_float32 ua_orig
= ua
;
2279 union_float32 uc_orig
= uc
;
2281 if (flags
& float_muladd_negate_product
) {
2284 if (flags
& float_muladd_negate_c
) {
2288 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2290 if (unlikely(f32_is_inf(ur
))) {
2291 float_raise(float_flag_overflow
, s
);
2292 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2298 if (flags
& float_muladd_negate_result
) {
2299 return float32_chs(ur
.s
);
2304 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2307 float64 QEMU_FLATTEN
2308 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2310 union_float64 ua
, ub
, uc
, ur
;
2316 if (unlikely(!can_use_fpu(s
))) {
2319 if (unlikely(flags
& float_muladd_halve_result
)) {
2323 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2324 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2328 if (unlikely(force_soft_fma
)) {
2333 * When (a || b) == 0, there's no need to check for under/over flow,
2334 * since we know the addend is (normal || 0) and the product is 0.
2336 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2340 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2341 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2342 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2344 if (flags
& float_muladd_negate_c
) {
2349 union_float64 ua_orig
= ua
;
2350 union_float64 uc_orig
= uc
;
2352 if (flags
& float_muladd_negate_product
) {
2355 if (flags
& float_muladd_negate_c
) {
2359 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2361 if (unlikely(f64_is_inf(ur
))) {
2362 float_raise(float_flag_overflow
, s
);
2363 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2369 if (flags
& float_muladd_negate_result
) {
2370 return float64_chs(ur
.s
);
2375 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2378 float64
float64r32_muladd(float64 a
, float64 b
, float64 c
,
2379 int flags
, float_status
*status
)
2381 FloatParts64 pa
, pb
, pc
, *pr
;
2383 float64_unpack_canonical(&pa
, a
, status
);
2384 float64_unpack_canonical(&pb
, b
, status
);
2385 float64_unpack_canonical(&pc
, c
, status
);
2386 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2388 return float64r32_round_pack_canonical(pr
, status
);
2391 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2392 int flags
, float_status
*status
)
2394 FloatParts64 pa
, pb
, pc
, *pr
;
2396 bfloat16_unpack_canonical(&pa
, a
, status
);
2397 bfloat16_unpack_canonical(&pb
, b
, status
);
2398 bfloat16_unpack_canonical(&pc
, c
, status
);
2399 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2401 return bfloat16_round_pack_canonical(pr
, status
);
2404 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2405 int flags
, float_status
*status
)
2407 FloatParts128 pa
, pb
, pc
, *pr
;
2409 float128_unpack_canonical(&pa
, a
, status
);
2410 float128_unpack_canonical(&pb
, b
, status
);
2411 float128_unpack_canonical(&pc
, c
, status
);
2412 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2414 return float128_round_pack_canonical(pr
, status
);
2421 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2423 FloatParts64 pa
, pb
, *pr
;
2425 float16_unpack_canonical(&pa
, a
, status
);
2426 float16_unpack_canonical(&pb
, b
, status
);
2427 pr
= parts_div(&pa
, &pb
, status
);
2429 return float16_round_pack_canonical(pr
, status
);
2432 static float32 QEMU_SOFTFLOAT_ATTR
2433 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2435 FloatParts64 pa
, pb
, *pr
;
2437 float32_unpack_canonical(&pa
, a
, status
);
2438 float32_unpack_canonical(&pb
, b
, status
);
2439 pr
= parts_div(&pa
, &pb
, status
);
2441 return float32_round_pack_canonical(pr
, status
);
2444 static float64 QEMU_SOFTFLOAT_ATTR
2445 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2447 FloatParts64 pa
, pb
, *pr
;
2449 float64_unpack_canonical(&pa
, a
, status
);
2450 float64_unpack_canonical(&pb
, b
, status
);
2451 pr
= parts_div(&pa
, &pb
, status
);
2453 return float64_round_pack_canonical(pr
, status
);
2456 static float hard_f32_div(float a
, float b
)
2461 static double hard_f64_div(double a
, double b
)
2466 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2468 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2469 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2470 fpclassify(b
.h
) == FP_NORMAL
;
2472 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2475 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2477 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2478 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2479 fpclassify(b
.h
) == FP_NORMAL
;
2481 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2484 static bool f32_div_post(union_float32 a
, union_float32 b
)
2486 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2487 return fpclassify(a
.h
) != FP_ZERO
;
2489 return !float32_is_zero(a
.s
);
2492 static bool f64_div_post(union_float64 a
, union_float64 b
)
2494 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2495 return fpclassify(a
.h
) != FP_ZERO
;
2497 return !float64_is_zero(a
.s
);
2500 float32 QEMU_FLATTEN
2501 float32_div(float32 a
, float32 b
, float_status
*s
)
2503 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2504 f32_div_pre
, f32_div_post
);
2507 float64 QEMU_FLATTEN
2508 float64_div(float64 a
, float64 b
, float_status
*s
)
2510 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2511 f64_div_pre
, f64_div_post
);
2514 float64
float64r32_div(float64 a
, float64 b
, float_status
*status
)
2516 FloatParts64 pa
, pb
, *pr
;
2518 float64_unpack_canonical(&pa
, a
, status
);
2519 float64_unpack_canonical(&pb
, b
, status
);
2520 pr
= parts_div(&pa
, &pb
, status
);
2522 return float64r32_round_pack_canonical(pr
, status
);
2525 bfloat16 QEMU_FLATTEN
2526 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2528 FloatParts64 pa
, pb
, *pr
;
2530 bfloat16_unpack_canonical(&pa
, a
, status
);
2531 bfloat16_unpack_canonical(&pb
, b
, status
);
2532 pr
= parts_div(&pa
, &pb
, status
);
2534 return bfloat16_round_pack_canonical(pr
, status
);
2537 float128 QEMU_FLATTEN
2538 float128_div(float128 a
, float128 b
, float_status
*status
)
2540 FloatParts128 pa
, pb
, *pr
;
2542 float128_unpack_canonical(&pa
, a
, status
);
2543 float128_unpack_canonical(&pb
, b
, status
);
2544 pr
= parts_div(&pa
, &pb
, status
);
2546 return float128_round_pack_canonical(pr
, status
);
2549 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
2551 FloatParts128 pa
, pb
, *pr
;
2553 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2554 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2555 return floatx80_default_nan(status
);
2558 pr
= parts_div(&pa
, &pb
, status
);
2559 return floatx80_round_pack_canonical(pr
, status
);
2566 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
2568 FloatParts64 pa
, pb
, *pr
;
2570 float32_unpack_canonical(&pa
, a
, status
);
2571 float32_unpack_canonical(&pb
, b
, status
);
2572 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2574 return float32_round_pack_canonical(pr
, status
);
2577 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
2579 FloatParts64 pa
, pb
, *pr
;
2581 float64_unpack_canonical(&pa
, a
, status
);
2582 float64_unpack_canonical(&pb
, b
, status
);
2583 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2585 return float64_round_pack_canonical(pr
, status
);
2588 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
2590 FloatParts128 pa
, pb
, *pr
;
2592 float128_unpack_canonical(&pa
, a
, status
);
2593 float128_unpack_canonical(&pb
, b
, status
);
2594 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2596 return float128_round_pack_canonical(pr
, status
);
2600 * Returns the remainder of the extended double-precision floating-point value
2601 * `a' with respect to the corresponding value `b'.
2602 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2603 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2604 * the remainder based on truncating the quotient toward zero instead and
2605 * *quotient is set to the low 64 bits of the absolute value of the integer
2608 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
,
2609 uint64_t *quotient
, float_status
*status
)
2611 FloatParts128 pa
, pb
, *pr
;
2614 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2615 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2616 return floatx80_default_nan(status
);
2618 pr
= parts_modrem(&pa
, &pb
, mod
? quotient
: NULL
, status
);
2620 return floatx80_round_pack_canonical(pr
, status
);
2623 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
2626 return floatx80_modrem(a
, b
, false, "ient
, status
);
2629 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
2632 return floatx80_modrem(a
, b
, true, "ient
, status
);
2636 * Float to Float conversions
2638 * Returns the result of converting one float format to another. The
2639 * conversion is performed according to the IEC/IEEE Standard for
2640 * Binary Floating-Point Arithmetic.
2642 * Usually this only needs to take care of raising invalid exceptions
2643 * and handling the conversion on NaNs.
2646 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2649 case float_class_snan
:
2650 float_raise(float_flag_invalid_snan
, s
);
2652 case float_class_qnan
:
2654 * There is no NaN in the destination format. Raise Invalid
2655 * and return a zero with the sign of the input NaN.
2657 float_raise(float_flag_invalid
, s
);
2658 a
->cls
= float_class_zero
;
2661 case float_class_inf
:
2663 * There is no Inf in the destination format. Raise Invalid
2664 * and return the maximum normal with the correct sign.
2666 float_raise(float_flag_invalid
, s
);
2667 a
->cls
= float_class_normal
;
2668 a
->exp
= float16_params_ahp
.exp_max
;
2669 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2670 float16_params_ahp
.frac_size
+ 1);
2673 case float_class_normal
:
2674 case float_class_zero
:
2678 g_assert_not_reached();
2682 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2684 if (is_nan(a
->cls
)) {
2685 parts_return_nan(a
, s
);
2689 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2691 if (is_nan(a
->cls
)) {
2692 parts_return_nan(a
, s
);
2696 #define parts_float_to_float(P, S) \
2697 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2699 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2706 if (a
->cls
== float_class_normal
) {
2707 frac_truncjam(a
, b
);
2708 } else if (is_nan(a
->cls
)) {
2709 /* Discard the low bits of the NaN. */
2710 a
->frac
= b
->frac_hi
;
2711 parts_return_nan(a
, s
);
2715 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2723 if (is_nan(a
->cls
)) {
2724 parts_return_nan(a
, s
);
2728 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2730 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2733 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2734 parts_float_to_float(&p
, s
);
2735 return float32_round_pack_canonical(&p
, s
);
2738 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2740 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2743 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2744 parts_float_to_float(&p
, s
);
2745 return float64_round_pack_canonical(&p
, s
);
2748 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2751 const FloatFmt
*fmt
;
2753 float32_unpack_canonical(&p
, a
, s
);
2755 parts_float_to_float(&p
, s
);
2756 fmt
= &float16_params
;
2758 parts_float_to_ahp(&p
, s
);
2759 fmt
= &float16_params_ahp
;
2761 return float16a_round_pack_canonical(&p
, s
, fmt
);
2764 static float64 QEMU_SOFTFLOAT_ATTR
2765 soft_float32_to_float64(float32 a
, float_status
*s
)
2769 float32_unpack_canonical(&p
, a
, s
);
2770 parts_float_to_float(&p
, s
);
2771 return float64_round_pack_canonical(&p
, s
);
2774 float64
float32_to_float64(float32 a
, float_status
*s
)
2776 if (likely(float32_is_normal(a
))) {
2777 /* Widening conversion can never produce inexact results. */
2783 } else if (float32_is_zero(a
)) {
2784 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2786 return soft_float32_to_float64(a
, s
);
2790 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2793 const FloatFmt
*fmt
;
2795 float64_unpack_canonical(&p
, a
, s
);
2797 parts_float_to_float(&p
, s
);
2798 fmt
= &float16_params
;
2800 parts_float_to_ahp(&p
, s
);
2801 fmt
= &float16_params_ahp
;
2803 return float16a_round_pack_canonical(&p
, s
, fmt
);
2806 float32
float64_to_float32(float64 a
, float_status
*s
)
2810 float64_unpack_canonical(&p
, a
, s
);
2811 parts_float_to_float(&p
, s
);
2812 return float32_round_pack_canonical(&p
, s
);
2815 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2819 bfloat16_unpack_canonical(&p
, a
, s
);
2820 parts_float_to_float(&p
, s
);
2821 return float32_round_pack_canonical(&p
, s
);
2824 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2828 bfloat16_unpack_canonical(&p
, a
, s
);
2829 parts_float_to_float(&p
, s
);
2830 return float64_round_pack_canonical(&p
, s
);
2833 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2837 float32_unpack_canonical(&p
, a
, s
);
2838 parts_float_to_float(&p
, s
);
2839 return bfloat16_round_pack_canonical(&p
, s
);
2842 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2846 float64_unpack_canonical(&p
, a
, s
);
2847 parts_float_to_float(&p
, s
);
2848 return bfloat16_round_pack_canonical(&p
, s
);
2851 float32
float128_to_float32(float128 a
, float_status
*s
)
2856 float128_unpack_canonical(&p128
, a
, s
);
2857 parts_float_to_float_narrow(&p64
, &p128
, s
);
2858 return float32_round_pack_canonical(&p64
, s
);
2861 float64
float128_to_float64(float128 a
, float_status
*s
)
2866 float128_unpack_canonical(&p128
, a
, s
);
2867 parts_float_to_float_narrow(&p64
, &p128
, s
);
2868 return float64_round_pack_canonical(&p64
, s
);
2871 float128
float32_to_float128(float32 a
, float_status
*s
)
2876 float32_unpack_canonical(&p64
, a
, s
);
2877 parts_float_to_float_widen(&p128
, &p64
, s
);
2878 return float128_round_pack_canonical(&p128
, s
);
2881 float128
float64_to_float128(float64 a
, float_status
*s
)
2886 float64_unpack_canonical(&p64
, a
, s
);
2887 parts_float_to_float_widen(&p128
, &p64
, s
);
2888 return float128_round_pack_canonical(&p128
, s
);
2891 float32
floatx80_to_float32(floatx80 a
, float_status
*s
)
2896 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2897 parts_float_to_float_narrow(&p64
, &p128
, s
);
2899 parts_default_nan(&p64
, s
);
2901 return float32_round_pack_canonical(&p64
, s
);
2904 float64
floatx80_to_float64(floatx80 a
, float_status
*s
)
2909 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2910 parts_float_to_float_narrow(&p64
, &p128
, s
);
2912 parts_default_nan(&p64
, s
);
2914 return float64_round_pack_canonical(&p64
, s
);
2917 float128
floatx80_to_float128(floatx80 a
, float_status
*s
)
2921 if (floatx80_unpack_canonical(&p
, a
, s
)) {
2922 parts_float_to_float(&p
, s
);
2924 parts_default_nan(&p
, s
);
2926 return float128_round_pack_canonical(&p
, s
);
2929 floatx80
float32_to_floatx80(float32 a
, float_status
*s
)
2934 float32_unpack_canonical(&p64
, a
, s
);
2935 parts_float_to_float_widen(&p128
, &p64
, s
);
2936 return floatx80_round_pack_canonical(&p128
, s
);
2939 floatx80
float64_to_floatx80(float64 a
, float_status
*s
)
2944 float64_unpack_canonical(&p64
, a
, s
);
2945 parts_float_to_float_widen(&p128
, &p64
, s
);
2946 return floatx80_round_pack_canonical(&p128
, s
);
2949 floatx80
float128_to_floatx80(float128 a
, float_status
*s
)
2953 float128_unpack_canonical(&p
, a
, s
);
2954 parts_float_to_float(&p
, s
);
2955 return floatx80_round_pack_canonical(&p
, s
);
2959 * Round to integral value
2962 float16
float16_round_to_int(float16 a
, float_status
*s
)
2966 float16_unpack_canonical(&p
, a
, s
);
2967 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2968 return float16_round_pack_canonical(&p
, s
);
2971 float32
float32_round_to_int(float32 a
, float_status
*s
)
2975 float32_unpack_canonical(&p
, a
, s
);
2976 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2977 return float32_round_pack_canonical(&p
, s
);
2980 float64
float64_round_to_int(float64 a
, float_status
*s
)
2984 float64_unpack_canonical(&p
, a
, s
);
2985 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2986 return float64_round_pack_canonical(&p
, s
);
2989 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2993 bfloat16_unpack_canonical(&p
, a
, s
);
2994 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2995 return bfloat16_round_pack_canonical(&p
, s
);
2998 float128
float128_round_to_int(float128 a
, float_status
*s
)
3002 float128_unpack_canonical(&p
, a
, s
);
3003 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
3004 return float128_round_pack_canonical(&p
, s
);
3007 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
3011 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
3012 return floatx80_default_nan(status
);
3015 parts_round_to_int(&p
, status
->float_rounding_mode
, 0, status
,
3016 &floatx80_params
[status
->floatx80_rounding_precision
]);
3017 return floatx80_round_pack_canonical(&p
, status
);
3021 * Floating-point to signed integer conversions
3024 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3029 float16_unpack_canonical(&p
, a
, s
);
3030 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
3033 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3038 float16_unpack_canonical(&p
, a
, s
);
3039 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3042 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3047 float16_unpack_canonical(&p
, a
, s
);
3048 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3051 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3056 float16_unpack_canonical(&p
, a
, s
);
3057 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3060 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3065 float32_unpack_canonical(&p
, a
, s
);
3066 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3069 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3074 float32_unpack_canonical(&p
, a
, s
);
3075 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3078 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3083 float32_unpack_canonical(&p
, a
, s
);
3084 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3087 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3092 float64_unpack_canonical(&p
, a
, s
);
3093 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3096 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3101 float64_unpack_canonical(&p
, a
, s
);
3102 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3105 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3110 float64_unpack_canonical(&p
, a
, s
);
3111 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3114 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3119 bfloat16_unpack_canonical(&p
, a
, s
);
3120 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3123 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3128 bfloat16_unpack_canonical(&p
, a
, s
);
3129 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3132 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3137 bfloat16_unpack_canonical(&p
, a
, s
);
3138 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3141 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
3142 int scale
, float_status
*s
)
3146 float128_unpack_canonical(&p
, a
, s
);
3147 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3150 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
3151 int scale
, float_status
*s
)
3155 float128_unpack_canonical(&p
, a
, s
);
3156 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3159 static Int128
float128_to_int128_scalbn(float128 a
, FloatRoundMode rmode
,
3160 int scale
, float_status
*s
)
3166 float128_unpack_canonical(&p
, a
, s
);
3169 case float_class_snan
:
3170 flags
|= float_flag_invalid_snan
;
3172 case float_class_qnan
:
3173 flags
|= float_flag_invalid
;
3177 case float_class_inf
:
3178 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3179 r
= p
.sign
? INT128_MIN
: INT128_MAX
;
3182 case float_class_zero
:
3183 return int128_zero();
3185 case float_class_normal
:
3186 if (parts_round_to_int_normal(&p
, rmode
, scale
, 128 - 2)) {
3187 flags
= float_flag_inexact
;
3191 int shift
= 127 - p
.exp
;
3192 r
= int128_urshift(int128_make128(p
.frac_lo
, p
.frac_hi
), shift
);
3196 } else if (p
.exp
== 127 && p
.sign
&& p
.frac_lo
== 0 &&
3197 p
.frac_hi
== DECOMPOSED_IMPLICIT_BIT
) {
3200 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3201 r
= p
.sign
? INT128_MIN
: INT128_MAX
;
3206 g_assert_not_reached();
3209 float_raise(flags
, s
);
3213 static int32_t floatx80_to_int32_scalbn(floatx80 a
, FloatRoundMode rmode
,
3214 int scale
, float_status
*s
)
3218 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3219 parts_default_nan(&p
, s
);
3221 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3224 static int64_t floatx80_to_int64_scalbn(floatx80 a
, FloatRoundMode rmode
,
3225 int scale
, float_status
*s
)
3229 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3230 parts_default_nan(&p
, s
);
3232 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3235 int8_t float16_to_int8(float16 a
, float_status
*s
)
3237 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3240 int16_t float16_to_int16(float16 a
, float_status
*s
)
3242 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3245 int32_t float16_to_int32(float16 a
, float_status
*s
)
3247 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3250 int64_t float16_to_int64(float16 a
, float_status
*s
)
3252 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3255 int16_t float32_to_int16(float32 a
, float_status
*s
)
3257 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3260 int32_t float32_to_int32(float32 a
, float_status
*s
)
3262 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3265 int64_t float32_to_int64(float32 a
, float_status
*s
)
3267 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3270 int16_t float64_to_int16(float64 a
, float_status
*s
)
3272 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3275 int32_t float64_to_int32(float64 a
, float_status
*s
)
3277 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3280 int64_t float64_to_int64(float64 a
, float_status
*s
)
3282 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3285 int32_t float128_to_int32(float128 a
, float_status
*s
)
3287 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3290 int64_t float128_to_int64(float128 a
, float_status
*s
)
3292 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3295 Int128
float128_to_int128(float128 a
, float_status
*s
)
3297 return float128_to_int128_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3300 int32_t floatx80_to_int32(floatx80 a
, float_status
*s
)
3302 return floatx80_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3305 int64_t floatx80_to_int64(floatx80 a
, float_status
*s
)
3307 return floatx80_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3310 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
3312 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3315 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
3317 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3320 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
3322 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3325 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
3327 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3330 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
3332 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3335 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
3337 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3340 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
3342 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3345 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
3347 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3350 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
3352 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3355 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
3357 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3360 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
3362 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3365 Int128
float128_to_int128_round_to_zero(float128 a
, float_status
*s
)
3367 return float128_to_int128_scalbn(a
, float_round_to_zero
, 0, s
);
3370 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*s
)
3372 return floatx80_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3375 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*s
)
3377 return floatx80_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3380 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
3382 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3385 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
3387 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3390 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
3392 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3395 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
3397 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3400 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
3402 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3405 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
3407 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3411 * Floating-point to unsigned integer conversions
3414 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3419 float16_unpack_canonical(&p
, a
, s
);
3420 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
3423 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3428 float16_unpack_canonical(&p
, a
, s
);
3429 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3432 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3437 float16_unpack_canonical(&p
, a
, s
);
3438 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3441 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3446 float16_unpack_canonical(&p
, a
, s
);
3447 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3450 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3455 float32_unpack_canonical(&p
, a
, s
);
3456 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3459 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3464 float32_unpack_canonical(&p
, a
, s
);
3465 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3468 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3473 float32_unpack_canonical(&p
, a
, s
);
3474 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3477 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3482 float64_unpack_canonical(&p
, a
, s
);
3483 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3486 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3491 float64_unpack_canonical(&p
, a
, s
);
3492 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3495 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3500 float64_unpack_canonical(&p
, a
, s
);
3501 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3504 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3505 int scale
, float_status
*s
)
3509 bfloat16_unpack_canonical(&p
, a
, s
);
3510 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3513 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3514 int scale
, float_status
*s
)
3518 bfloat16_unpack_canonical(&p
, a
, s
);
3519 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3522 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3523 int scale
, float_status
*s
)
3527 bfloat16_unpack_canonical(&p
, a
, s
);
3528 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3531 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3532 int scale
, float_status
*s
)
3536 float128_unpack_canonical(&p
, a
, s
);
3537 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3540 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3541 int scale
, float_status
*s
)
3545 float128_unpack_canonical(&p
, a
, s
);
3546 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3549 static Int128
float128_to_uint128_scalbn(float128 a
, FloatRoundMode rmode
,
3550 int scale
, float_status
*s
)
3556 float128_unpack_canonical(&p
, a
, s
);
3559 case float_class_snan
:
3560 flags
|= float_flag_invalid_snan
;
3562 case float_class_qnan
:
3563 flags
|= float_flag_invalid
;
3567 case float_class_inf
:
3568 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3569 r
= p
.sign
? int128_zero() : UINT128_MAX
;
3572 case float_class_zero
:
3573 return int128_zero();
3575 case float_class_normal
:
3576 if (parts_round_to_int_normal(&p
, rmode
, scale
, 128 - 2)) {
3577 flags
= float_flag_inexact
;
3578 if (p
.cls
== float_class_zero
) {
3585 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3587 } else if (p
.exp
<= 127) {
3588 int shift
= 127 - p
.exp
;
3589 r
= int128_urshift(int128_make128(p
.frac_lo
, p
.frac_hi
), shift
);
3591 flags
= float_flag_invalid
| float_flag_invalid_cvti
;
3597 g_assert_not_reached();
3600 float_raise(flags
, s
);
3604 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3606 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3609 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3611 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3614 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3616 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3619 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3621 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3624 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3626 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3629 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3631 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3634 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3636 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3639 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3641 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3644 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3646 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3649 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3651 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3654 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3656 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3659 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3661 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3664 Int128
float128_to_uint128(float128 a
, float_status
*s
)
3666 return float128_to_uint128_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3669 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3671 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3674 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3676 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3679 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3681 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3684 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3686 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3689 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3691 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3694 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3696 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3699 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3701 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3704 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3706 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3709 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3711 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3714 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3716 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3719 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3721 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3724 Int128
float128_to_uint128_round_to_zero(float128 a
, float_status
*s
)
3726 return float128_to_uint128_scalbn(a
, float_round_to_zero
, 0, s
);
3729 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3731 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3734 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3736 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3739 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3741 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3744 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3746 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3749 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3751 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3754 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3756 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3760 * Signed integer to floating-point conversions
3763 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3767 parts_sint_to_float(&p
, a
, scale
, status
);
3768 return float16_round_pack_canonical(&p
, status
);
3771 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3773 return int64_to_float16_scalbn(a
, scale
, status
);
3776 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3778 return int64_to_float16_scalbn(a
, scale
, status
);
3781 float16
int64_to_float16(int64_t a
, float_status
*status
)
3783 return int64_to_float16_scalbn(a
, 0, status
);
3786 float16
int32_to_float16(int32_t a
, float_status
*status
)
3788 return int64_to_float16_scalbn(a
, 0, status
);
3791 float16
int16_to_float16(int16_t a
, float_status
*status
)
3793 return int64_to_float16_scalbn(a
, 0, status
);
3796 float16
int8_to_float16(int8_t a
, float_status
*status
)
3798 return int64_to_float16_scalbn(a
, 0, status
);
3801 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3805 /* Without scaling, there are no overflow concerns. */
3806 if (likely(scale
== 0) && can_use_fpu(status
)) {
3812 parts64_sint_to_float(&p
, a
, scale
, status
);
3813 return float32_round_pack_canonical(&p
, status
);
3816 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3818 return int64_to_float32_scalbn(a
, scale
, status
);
3821 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3823 return int64_to_float32_scalbn(a
, scale
, status
);
3826 float32
int64_to_float32(int64_t a
, float_status
*status
)
3828 return int64_to_float32_scalbn(a
, 0, status
);
3831 float32
int32_to_float32(int32_t a
, float_status
*status
)
3833 return int64_to_float32_scalbn(a
, 0, status
);
3836 float32
int16_to_float32(int16_t a
, float_status
*status
)
3838 return int64_to_float32_scalbn(a
, 0, status
);
3841 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3845 /* Without scaling, there are no overflow concerns. */
3846 if (likely(scale
== 0) && can_use_fpu(status
)) {
3852 parts_sint_to_float(&p
, a
, scale
, status
);
3853 return float64_round_pack_canonical(&p
, status
);
3856 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3858 return int64_to_float64_scalbn(a
, scale
, status
);
3861 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3863 return int64_to_float64_scalbn(a
, scale
, status
);
3866 float64
int64_to_float64(int64_t a
, float_status
*status
)
3868 return int64_to_float64_scalbn(a
, 0, status
);
3871 float64
int32_to_float64(int32_t a
, float_status
*status
)
3873 return int64_to_float64_scalbn(a
, 0, status
);
3876 float64
int16_to_float64(int16_t a
, float_status
*status
)
3878 return int64_to_float64_scalbn(a
, 0, status
);
3881 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3885 parts_sint_to_float(&p
, a
, scale
, status
);
3886 return bfloat16_round_pack_canonical(&p
, status
);
3889 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3891 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3894 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3896 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3899 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3901 return int64_to_bfloat16_scalbn(a
, 0, status
);
3904 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3906 return int64_to_bfloat16_scalbn(a
, 0, status
);
3909 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3911 return int64_to_bfloat16_scalbn(a
, 0, status
);
3914 float128
int128_to_float128(Int128 a
, float_status
*status
)
3916 FloatParts128 p
= { };
3920 p
.cls
= float_class_normal
;
3921 if (!int128_nonneg(a
)) {
3926 shift
= clz64(int128_gethi(a
));
3928 shift
+= clz64(int128_getlo(a
));
3931 p
.exp
= 127 - shift
;
3932 a
= int128_lshift(a
, shift
);
3934 p
.frac_hi
= int128_gethi(a
);
3935 p
.frac_lo
= int128_getlo(a
);
3937 p
.cls
= float_class_zero
;
3940 return float128_round_pack_canonical(&p
, status
);
3943 float128
int64_to_float128(int64_t a
, float_status
*status
)
3947 parts_sint_to_float(&p
, a
, 0, status
);
3948 return float128_round_pack_canonical(&p
, status
);
3951 float128
int32_to_float128(int32_t a
, float_status
*status
)
3953 return int64_to_float128(a
, status
);
3956 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
3960 parts_sint_to_float(&p
, a
, 0, status
);
3961 return floatx80_round_pack_canonical(&p
, status
);
3964 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
3966 return int64_to_floatx80(a
, status
);
3970 * Unsigned Integer to floating-point conversions
3973 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3977 parts_uint_to_float(&p
, a
, scale
, status
);
3978 return float16_round_pack_canonical(&p
, status
);
3981 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3983 return uint64_to_float16_scalbn(a
, scale
, status
);
3986 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3988 return uint64_to_float16_scalbn(a
, scale
, status
);
3991 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3993 return uint64_to_float16_scalbn(a
, 0, status
);
3996 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3998 return uint64_to_float16_scalbn(a
, 0, status
);
4001 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
4003 return uint64_to_float16_scalbn(a
, 0, status
);
4006 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
4008 return uint64_to_float16_scalbn(a
, 0, status
);
4011 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
4015 /* Without scaling, there are no overflow concerns. */
4016 if (likely(scale
== 0) && can_use_fpu(status
)) {
4022 parts_uint_to_float(&p
, a
, scale
, status
);
4023 return float32_round_pack_canonical(&p
, status
);
4026 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
4028 return uint64_to_float32_scalbn(a
, scale
, status
);
4031 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
4033 return uint64_to_float32_scalbn(a
, scale
, status
);
4036 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
4038 return uint64_to_float32_scalbn(a
, 0, status
);
4041 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
4043 return uint64_to_float32_scalbn(a
, 0, status
);
4046 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
4048 return uint64_to_float32_scalbn(a
, 0, status
);
4051 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
4055 /* Without scaling, there are no overflow concerns. */
4056 if (likely(scale
== 0) && can_use_fpu(status
)) {
4062 parts_uint_to_float(&p
, a
, scale
, status
);
4063 return float64_round_pack_canonical(&p
, status
);
4066 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
4068 return uint64_to_float64_scalbn(a
, scale
, status
);
4071 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
4073 return uint64_to_float64_scalbn(a
, scale
, status
);
4076 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
4078 return uint64_to_float64_scalbn(a
, 0, status
);
4081 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
4083 return uint64_to_float64_scalbn(a
, 0, status
);
4086 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
4088 return uint64_to_float64_scalbn(a
, 0, status
);
4091 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
4095 parts_uint_to_float(&p
, a
, scale
, status
);
4096 return bfloat16_round_pack_canonical(&p
, status
);
4099 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
4101 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
4104 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
4106 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
4109 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
4111 return uint64_to_bfloat16_scalbn(a
, 0, status
);
4114 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
4116 return uint64_to_bfloat16_scalbn(a
, 0, status
);
4119 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
4121 return uint64_to_bfloat16_scalbn(a
, 0, status
);
4124 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
4128 parts_uint_to_float(&p
, a
, 0, status
);
4129 return float128_round_pack_canonical(&p
, status
);
4132 float128
uint128_to_float128(Int128 a
, float_status
*status
)
4134 FloatParts128 p
= { };
4138 p
.cls
= float_class_normal
;
4140 shift
= clz64(int128_gethi(a
));
4142 shift
+= clz64(int128_getlo(a
));
4145 p
.exp
= 127 - shift
;
4146 a
= int128_lshift(a
, shift
);
4148 p
.frac_hi
= int128_gethi(a
);
4149 p
.frac_lo
= int128_getlo(a
);
4151 p
.cls
= float_class_zero
;
4154 return float128_round_pack_canonical(&p
, status
);
4158 * Minimum and maximum
4161 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
4163 FloatParts64 pa
, pb
, *pr
;
4165 float16_unpack_canonical(&pa
, a
, s
);
4166 float16_unpack_canonical(&pb
, b
, s
);
4167 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4169 return float16_round_pack_canonical(pr
, s
);
4172 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
4173 float_status
*s
, int flags
)
4175 FloatParts64 pa
, pb
, *pr
;
4177 bfloat16_unpack_canonical(&pa
, a
, s
);
4178 bfloat16_unpack_canonical(&pb
, b
, s
);
4179 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4181 return bfloat16_round_pack_canonical(pr
, s
);
4184 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
4186 FloatParts64 pa
, pb
, *pr
;
4188 float32_unpack_canonical(&pa
, a
, s
);
4189 float32_unpack_canonical(&pb
, b
, s
);
4190 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4192 return float32_round_pack_canonical(pr
, s
);
4195 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
4197 FloatParts64 pa
, pb
, *pr
;
4199 float64_unpack_canonical(&pa
, a
, s
);
4200 float64_unpack_canonical(&pb
, b
, s
);
4201 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4203 return float64_round_pack_canonical(pr
, s
);
4206 static float128
float128_minmax(float128 a
, float128 b
,
4207 float_status
*s
, int flags
)
4209 FloatParts128 pa
, pb
, *pr
;
4211 float128_unpack_canonical(&pa
, a
, s
);
4212 float128_unpack_canonical(&pb
, b
, s
);
4213 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4215 return float128_round_pack_canonical(pr
, s
);
4218 #define MINMAX_1(type, name, flags) \
4219 type type##_##name(type a, type b, float_status *s) \
4220 { return type##_minmax(a, b, s, flags); }
4222 #define MINMAX_2(type) \
4223 MINMAX_1(type, max, 0) \
4224 MINMAX_1(type, maxnum, minmax_isnum) \
4225 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4226 MINMAX_1(type, maximum_number, minmax_isnumber) \
4227 MINMAX_1(type, min, minmax_ismin) \
4228 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4229 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4230 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
4242 * Floating point compare
4245 static FloatRelation QEMU_FLATTEN
4246 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
4248 FloatParts64 pa
, pb
;
4250 float16_unpack_canonical(&pa
, a
, s
);
4251 float16_unpack_canonical(&pb
, b
, s
);
4252 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4255 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
4257 return float16_do_compare(a
, b
, s
, false);
4260 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
4262 return float16_do_compare(a
, b
, s
, true);
4265 static FloatRelation QEMU_SOFTFLOAT_ATTR
4266 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
4268 FloatParts64 pa
, pb
;
4270 float32_unpack_canonical(&pa
, a
, s
);
4271 float32_unpack_canonical(&pb
, b
, s
);
4272 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4275 static FloatRelation QEMU_FLATTEN
4276 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
4278 union_float32 ua
, ub
;
4283 if (QEMU_NO_HARDFLOAT
) {
4287 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
4288 if (isgreaterequal(ua
.h
, ub
.h
)) {
4289 if (isgreater(ua
.h
, ub
.h
)) {
4290 return float_relation_greater
;
4292 return float_relation_equal
;
4294 if (likely(isless(ua
.h
, ub
.h
))) {
4295 return float_relation_less
;
4298 * The only condition remaining is unordered.
4299 * Fall through to set flags.
4302 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4305 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
4307 return float32_hs_compare(a
, b
, s
, false);
4310 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
4312 return float32_hs_compare(a
, b
, s
, true);
4315 static FloatRelation QEMU_SOFTFLOAT_ATTR
4316 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
4318 FloatParts64 pa
, pb
;
4320 float64_unpack_canonical(&pa
, a
, s
);
4321 float64_unpack_canonical(&pb
, b
, s
);
4322 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4325 static FloatRelation QEMU_FLATTEN
4326 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
4328 union_float64 ua
, ub
;
4333 if (QEMU_NO_HARDFLOAT
) {
4337 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
4338 if (isgreaterequal(ua
.h
, ub
.h
)) {
4339 if (isgreater(ua
.h
, ub
.h
)) {
4340 return float_relation_greater
;
4342 return float_relation_equal
;
4344 if (likely(isless(ua
.h
, ub
.h
))) {
4345 return float_relation_less
;
4348 * The only condition remaining is unordered.
4349 * Fall through to set flags.
4352 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4355 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
4357 return float64_hs_compare(a
, b
, s
, false);
4360 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
4362 return float64_hs_compare(a
, b
, s
, true);
4365 static FloatRelation QEMU_FLATTEN
4366 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
4368 FloatParts64 pa
, pb
;
4370 bfloat16_unpack_canonical(&pa
, a
, s
);
4371 bfloat16_unpack_canonical(&pb
, b
, s
);
4372 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4375 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
4377 return bfloat16_do_compare(a
, b
, s
, false);
4380 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
4382 return bfloat16_do_compare(a
, b
, s
, true);
4385 static FloatRelation QEMU_FLATTEN
4386 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
4388 FloatParts128 pa
, pb
;
4390 float128_unpack_canonical(&pa
, a
, s
);
4391 float128_unpack_canonical(&pb
, b
, s
);
4392 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4395 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
4397 return float128_do_compare(a
, b
, s
, false);
4400 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
4402 return float128_do_compare(a
, b
, s
, true);
4405 static FloatRelation QEMU_FLATTEN
4406 floatx80_do_compare(floatx80 a
, floatx80 b
, float_status
*s
, bool is_quiet
)
4408 FloatParts128 pa
, pb
;
4410 if (!floatx80_unpack_canonical(&pa
, a
, s
) ||
4411 !floatx80_unpack_canonical(&pb
, b
, s
)) {
4412 return float_relation_unordered
;
4414 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4417 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*s
)
4419 return floatx80_do_compare(a
, b
, s
, false);
4422 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
, float_status
*s
)
4424 return floatx80_do_compare(a
, b
, s
, true);
4431 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
4435 float16_unpack_canonical(&p
, a
, status
);
4436 parts_scalbn(&p
, n
, status
);
4437 return float16_round_pack_canonical(&p
, status
);
4440 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
4444 float32_unpack_canonical(&p
, a
, status
);
4445 parts_scalbn(&p
, n
, status
);
4446 return float32_round_pack_canonical(&p
, status
);
4449 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
4453 float64_unpack_canonical(&p
, a
, status
);
4454 parts_scalbn(&p
, n
, status
);
4455 return float64_round_pack_canonical(&p
, status
);
4458 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
4462 bfloat16_unpack_canonical(&p
, a
, status
);
4463 parts_scalbn(&p
, n
, status
);
4464 return bfloat16_round_pack_canonical(&p
, status
);
4467 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
4471 float128_unpack_canonical(&p
, a
, status
);
4472 parts_scalbn(&p
, n
, status
);
4473 return float128_round_pack_canonical(&p
, status
);
4476 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
4480 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
4481 return floatx80_default_nan(status
);
4483 parts_scalbn(&p
, n
, status
);
4484 return floatx80_round_pack_canonical(&p
, status
);
4491 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
4495 float16_unpack_canonical(&p
, a
, status
);
4496 parts_sqrt(&p
, status
, &float16_params
);
4497 return float16_round_pack_canonical(&p
, status
);
4500 static float32 QEMU_SOFTFLOAT_ATTR
4501 soft_f32_sqrt(float32 a
, float_status
*status
)
4505 float32_unpack_canonical(&p
, a
, status
);
4506 parts_sqrt(&p
, status
, &float32_params
);
4507 return float32_round_pack_canonical(&p
, status
);
4510 static float64 QEMU_SOFTFLOAT_ATTR
4511 soft_f64_sqrt(float64 a
, float_status
*status
)
4515 float64_unpack_canonical(&p
, a
, status
);
4516 parts_sqrt(&p
, status
, &float64_params
);
4517 return float64_round_pack_canonical(&p
, status
);
4520 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
4522 union_float32 ua
, ur
;
4525 if (unlikely(!can_use_fpu(s
))) {
4529 float32_input_flush1(&ua
.s
, s
);
4530 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
4531 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4532 fpclassify(ua
.h
) == FP_ZERO
) ||
4536 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
4537 float32_is_neg(ua
.s
))) {
4544 return soft_f32_sqrt(ua
.s
, s
);
4547 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
4549 union_float64 ua
, ur
;
4552 if (unlikely(!can_use_fpu(s
))) {
4556 float64_input_flush1(&ua
.s
, s
);
4557 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
4558 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4559 fpclassify(ua
.h
) == FP_ZERO
) ||
4563 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
4564 float64_is_neg(ua
.s
))) {
4571 return soft_f64_sqrt(ua
.s
, s
);
4574 float64
float64r32_sqrt(float64 a
, float_status
*status
)
4578 float64_unpack_canonical(&p
, a
, status
);
4579 parts_sqrt(&p
, status
, &float64_params
);
4580 return float64r32_round_pack_canonical(&p
, status
);
4583 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
4587 bfloat16_unpack_canonical(&p
, a
, status
);
4588 parts_sqrt(&p
, status
, &bfloat16_params
);
4589 return bfloat16_round_pack_canonical(&p
, status
);
4592 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
4596 float128_unpack_canonical(&p
, a
, status
);
4597 parts_sqrt(&p
, status
, &float128_params
);
4598 return float128_round_pack_canonical(&p
, status
);
4601 floatx80
floatx80_sqrt(floatx80 a
, float_status
*s
)
4605 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
4606 return floatx80_default_nan(s
);
4608 parts_sqrt(&p
, s
, &floatx80_params
[s
->floatx80_rounding_precision
]);
4609 return floatx80_round_pack_canonical(&p
, s
);
4615 float32
float32_log2(float32 a
, float_status
*status
)
4619 float32_unpack_canonical(&p
, a
, status
);
4620 parts_log2(&p
, status
, &float32_params
);
4621 return float32_round_pack_canonical(&p
, status
);
4624 float64
float64_log2(float64 a
, float_status
*status
)
4628 float64_unpack_canonical(&p
, a
, status
);
4629 parts_log2(&p
, status
, &float64_params
);
4630 return float64_round_pack_canonical(&p
, status
);
4633 /*----------------------------------------------------------------------------
4634 | The pattern for a default generated NaN.
4635 *----------------------------------------------------------------------------*/
4637 float16
float16_default_nan(float_status
*status
)
4641 parts_default_nan(&p
, status
);
4642 p
.frac
>>= float16_params
.frac_shift
;
4643 return float16_pack_raw(&p
);
4646 float32
float32_default_nan(float_status
*status
)
4650 parts_default_nan(&p
, status
);
4651 p
.frac
>>= float32_params
.frac_shift
;
4652 return float32_pack_raw(&p
);
4655 float64
float64_default_nan(float_status
*status
)
4659 parts_default_nan(&p
, status
);
4660 p
.frac
>>= float64_params
.frac_shift
;
4661 return float64_pack_raw(&p
);
4664 float128
float128_default_nan(float_status
*status
)
4668 parts_default_nan(&p
, status
);
4669 frac_shr(&p
, float128_params
.frac_shift
);
4670 return float128_pack_raw(&p
);
4673 bfloat16
bfloat16_default_nan(float_status
*status
)
4677 parts_default_nan(&p
, status
);
4678 p
.frac
>>= bfloat16_params
.frac_shift
;
4679 return bfloat16_pack_raw(&p
);
4682 /*----------------------------------------------------------------------------
4683 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4684 *----------------------------------------------------------------------------*/
4686 float16
float16_silence_nan(float16 a
, float_status
*status
)
4690 float16_unpack_raw(&p
, a
);
4691 p
.frac
<<= float16_params
.frac_shift
;
4692 parts_silence_nan(&p
, status
);
4693 p
.frac
>>= float16_params
.frac_shift
;
4694 return float16_pack_raw(&p
);
4697 float32
float32_silence_nan(float32 a
, float_status
*status
)
4701 float32_unpack_raw(&p
, a
);
4702 p
.frac
<<= float32_params
.frac_shift
;
4703 parts_silence_nan(&p
, status
);
4704 p
.frac
>>= float32_params
.frac_shift
;
4705 return float32_pack_raw(&p
);
4708 float64
float64_silence_nan(float64 a
, float_status
*status
)
4712 float64_unpack_raw(&p
, a
);
4713 p
.frac
<<= float64_params
.frac_shift
;
4714 parts_silence_nan(&p
, status
);
4715 p
.frac
>>= float64_params
.frac_shift
;
4716 return float64_pack_raw(&p
);
4719 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
4723 bfloat16_unpack_raw(&p
, a
);
4724 p
.frac
<<= bfloat16_params
.frac_shift
;
4725 parts_silence_nan(&p
, status
);
4726 p
.frac
>>= bfloat16_params
.frac_shift
;
4727 return bfloat16_pack_raw(&p
);
4730 float128
float128_silence_nan(float128 a
, float_status
*status
)
4734 float128_unpack_raw(&p
, a
);
4735 frac_shl(&p
, float128_params
.frac_shift
);
4736 parts_silence_nan(&p
, status
);
4737 frac_shr(&p
, float128_params
.frac_shift
);
4738 return float128_pack_raw(&p
);
4741 /*----------------------------------------------------------------------------
4742 | If `a' is denormal and we are in flush-to-zero mode then set the
4743 | input-denormal exception and return zero. Otherwise just return the value.
4744 *----------------------------------------------------------------------------*/
4746 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
4748 if (p
.exp
== 0 && p
.frac
!= 0) {
4749 float_raise(float_flag_input_denormal
, status
);
4756 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
4758 if (status
->flush_inputs_to_zero
) {
4761 float16_unpack_raw(&p
, a
);
4762 if (parts_squash_denormal(p
, status
)) {
4763 return float16_set_sign(float16_zero
, p
.sign
);
4769 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4771 if (status
->flush_inputs_to_zero
) {
4774 float32_unpack_raw(&p
, a
);
4775 if (parts_squash_denormal(p
, status
)) {
4776 return float32_set_sign(float32_zero
, p
.sign
);
4782 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4784 if (status
->flush_inputs_to_zero
) {
4787 float64_unpack_raw(&p
, a
);
4788 if (parts_squash_denormal(p
, status
)) {
4789 return float64_set_sign(float64_zero
, p
.sign
);
4795 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4797 if (status
->flush_inputs_to_zero
) {
4800 bfloat16_unpack_raw(&p
, a
);
4801 if (parts_squash_denormal(p
, status
)) {
4802 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4808 /*----------------------------------------------------------------------------
4809 | Normalizes the subnormal extended double-precision floating-point value
4810 | represented by the denormalized significand `aSig'. The normalized exponent
4811 | and significand are stored at the locations pointed to by `zExpPtr' and
4812 | `zSigPtr', respectively.
4813 *----------------------------------------------------------------------------*/
4815 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4820 shiftCount
= clz64(aSig
);
4821 *zSigPtr
= aSig
<<shiftCount
;
4822 *zExpPtr
= 1 - shiftCount
;
4825 /*----------------------------------------------------------------------------
4826 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4827 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4828 | and returns the proper extended double-precision floating-point value
4829 | corresponding to the abstract input. Ordinarily, the abstract value is
4830 | rounded and packed into the extended double-precision format, with the
4831 | inexact exception raised if the abstract input cannot be represented
4832 | exactly. However, if the abstract value is too large, the overflow and
4833 | inexact exceptions are raised and an infinity or maximal finite value is
4834 | returned. If the abstract value is too small, the input value is rounded to
4835 | a subnormal number, and the underflow and inexact exceptions are raised if
4836 | the abstract input cannot be represented exactly as a subnormal extended
4837 | double-precision floating-point number.
4838 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4839 | the result is rounded to the same number of bits as single or double
4840 | precision, respectively. Otherwise, the result is rounded to the full
4841 | precision of the extended double-precision format.
4842 | The input significand must be normalized or smaller. If the input
4843 | significand is not normalized, `zExp' must be 0; in that case, the result
4844 | returned is a subnormal number, and it must not require rounding. The
4845 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4846 | Floating-Point Arithmetic.
4847 *----------------------------------------------------------------------------*/
4849 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4850 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4851 float_status
*status
)
4853 FloatRoundMode roundingMode
;
4854 bool roundNearestEven
, increment
, isTiny
;
4855 int64_t roundIncrement
, roundMask
, roundBits
;
4857 roundingMode
= status
->float_rounding_mode
;
4858 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4859 switch (roundingPrecision
) {
4860 case floatx80_precision_x
:
4862 case floatx80_precision_d
:
4863 roundIncrement
= UINT64_C(0x0000000000000400);
4864 roundMask
= UINT64_C(0x00000000000007FF);
4866 case floatx80_precision_s
:
4867 roundIncrement
= UINT64_C(0x0000008000000000);
4868 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4871 g_assert_not_reached();
4873 zSig0
|= ( zSig1
!= 0 );
4874 switch (roundingMode
) {
4875 case float_round_nearest_even
:
4876 case float_round_ties_away
:
4878 case float_round_to_zero
:
4881 case float_round_up
:
4882 roundIncrement
= zSign
? 0 : roundMask
;
4884 case float_round_down
:
4885 roundIncrement
= zSign
? roundMask
: 0;
4890 roundBits
= zSig0
& roundMask
;
4891 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4892 if ( ( 0x7FFE < zExp
)
4893 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4898 if (status
->flush_to_zero
) {
4899 float_raise(float_flag_output_denormal
, status
);
4900 return packFloatx80(zSign
, 0, 0);
4902 isTiny
= status
->tininess_before_rounding
4904 || (zSig0
<= zSig0
+ roundIncrement
);
4905 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4907 roundBits
= zSig0
& roundMask
;
4908 if (isTiny
&& roundBits
) {
4909 float_raise(float_flag_underflow
, status
);
4912 float_raise(float_flag_inexact
, status
);
4914 zSig0
+= roundIncrement
;
4915 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4916 roundIncrement
= roundMask
+ 1;
4917 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4918 roundMask
|= roundIncrement
;
4920 zSig0
&= ~ roundMask
;
4921 return packFloatx80( zSign
, zExp
, zSig0
);
4925 float_raise(float_flag_inexact
, status
);
4927 zSig0
+= roundIncrement
;
4928 if ( zSig0
< roundIncrement
) {
4930 zSig0
= UINT64_C(0x8000000000000000);
4932 roundIncrement
= roundMask
+ 1;
4933 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4934 roundMask
|= roundIncrement
;
4936 zSig0
&= ~ roundMask
;
4937 if ( zSig0
== 0 ) zExp
= 0;
4938 return packFloatx80( zSign
, zExp
, zSig0
);
4940 switch (roundingMode
) {
4941 case float_round_nearest_even
:
4942 case float_round_ties_away
:
4943 increment
= ((int64_t)zSig1
< 0);
4945 case float_round_to_zero
:
4948 case float_round_up
:
4949 increment
= !zSign
&& zSig1
;
4951 case float_round_down
:
4952 increment
= zSign
&& zSig1
;
4957 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4958 if ( ( 0x7FFE < zExp
)
4959 || ( ( zExp
== 0x7FFE )
4960 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4966 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4967 if ( ( roundingMode
== float_round_to_zero
)
4968 || ( zSign
&& ( roundingMode
== float_round_up
) )
4969 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4971 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4973 return packFloatx80(zSign
,
4974 floatx80_infinity_high
,
4975 floatx80_infinity_low
);
4978 isTiny
= status
->tininess_before_rounding
4981 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4982 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4984 if (isTiny
&& zSig1
) {
4985 float_raise(float_flag_underflow
, status
);
4988 float_raise(float_flag_inexact
, status
);
4990 switch (roundingMode
) {
4991 case float_round_nearest_even
:
4992 case float_round_ties_away
:
4993 increment
= ((int64_t)zSig1
< 0);
4995 case float_round_to_zero
:
4998 case float_round_up
:
4999 increment
= !zSign
&& zSig1
;
5001 case float_round_down
:
5002 increment
= zSign
&& zSig1
;
5009 if (!(zSig1
<< 1) && roundNearestEven
) {
5012 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
5014 return packFloatx80( zSign
, zExp
, zSig0
);
5018 float_raise(float_flag_inexact
, status
);
5024 zSig0
= UINT64_C(0x8000000000000000);
5027 if (!(zSig1
<< 1) && roundNearestEven
) {
5033 if ( zSig0
== 0 ) zExp
= 0;
5035 return packFloatx80( zSign
, zExp
, zSig0
);
5039 /*----------------------------------------------------------------------------
5040 | Takes an abstract floating-point value having sign `zSign', exponent
5041 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5042 | and returns the proper extended double-precision floating-point value
5043 | corresponding to the abstract input. This routine is just like
5044 | `roundAndPackFloatx80' except that the input significand does not have to be
5046 *----------------------------------------------------------------------------*/
5048 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
5049 bool zSign
, int32_t zExp
,
5050 uint64_t zSig0
, uint64_t zSig1
,
5051 float_status
*status
)
5060 shiftCount
= clz64(zSig0
);
5061 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
5063 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
5064 zSig0
, zSig1
, status
);
5068 /*----------------------------------------------------------------------------
5069 | Returns the binary exponential of the single-precision floating-point value
5070 | `a'. The operation is performed according to the IEC/IEEE Standard for
5071 | Binary Floating-Point Arithmetic.
5073 | Uses the following identities:
5075 | 1. -------------------------------------------------------------------------
5079 | 2. -------------------------------------------------------------------------
5082 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5084 *----------------------------------------------------------------------------*/
5086 static const float64 float32_exp2_coefficients
[15] =
5088 const_float64( 0x3ff0000000000000ll
), /* 1 */
5089 const_float64( 0x3fe0000000000000ll
), /* 2 */
5090 const_float64( 0x3fc5555555555555ll
), /* 3 */
5091 const_float64( 0x3fa5555555555555ll
), /* 4 */
5092 const_float64( 0x3f81111111111111ll
), /* 5 */
5093 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
5094 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
5095 const_float64( 0x3efa01a01a01a01all
), /* 8 */
5096 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
5097 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
5098 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
5099 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
5100 const_float64( 0x3de6124613a86d09ll
), /* 13 */
5101 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
5102 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
5105 float32
float32_exp2(float32 a
, float_status
*status
)
5107 FloatParts64 xp
, xnp
, tp
, rp
;
5110 float32_unpack_canonical(&xp
, a
, status
);
5111 if (unlikely(xp
.cls
!= float_class_normal
)) {
5113 case float_class_snan
:
5114 case float_class_qnan
:
5115 parts_return_nan(&xp
, status
);
5116 return float32_round_pack_canonical(&xp
, status
);
5117 case float_class_inf
:
5118 return xp
.sign
? float32_zero
: a
;
5119 case float_class_zero
:
5124 g_assert_not_reached();
5127 float_raise(float_flag_inexact
, status
);
5129 float64_unpack_canonical(&tp
, float64_ln2
, status
);
5130 xp
= *parts_mul(&xp
, &tp
, status
);
5133 float64_unpack_canonical(&rp
, float64_one
, status
);
5134 for (i
= 0 ; i
< 15 ; i
++) {
5135 float64_unpack_canonical(&tp
, float32_exp2_coefficients
[i
], status
);
5136 rp
= *parts_muladd(&tp
, &xp
, &rp
, 0, status
);
5137 xnp
= *parts_mul(&xnp
, &xp
, status
);
5140 return float32_round_pack_canonical(&rp
, status
);
5143 /*----------------------------------------------------------------------------
5144 | Rounds the extended double-precision floating-point value `a'
5145 | to the precision provided by floatx80_rounding_precision and returns the
5146 | result as an extended double-precision floating-point value.
5147 | The operation is performed according to the IEC/IEEE Standard for Binary
5148 | Floating-Point Arithmetic.
5149 *----------------------------------------------------------------------------*/
5151 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
5155 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
5156 return floatx80_default_nan(status
);
5158 return floatx80_round_pack_canonical(&p
, status
);
5161 static void __attribute__((constructor
)) softfloat_init(void)
5163 union_float64 ua
, ub
, uc
, ur
;
5165 if (QEMU_NO_HARDFLOAT
) {
5169 * Test that the host's FMA is not obviously broken. For example,
5170 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5171 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5173 ua
.s
= 0x0020000000000001ULL
;
5174 ub
.s
= 0x3ca0000000000000ULL
;
5175 uc
.s
= 0x0020000000000000ULL
;
5176 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
5177 if (ur
.s
!= 0x0020000000000001ULL
) {
5178 force_soft_fma
= true;