4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
409 typedef enum __attribute__ ((__packed__
)) {
410 float_class_unclassified
,
414 float_class_qnan
, /* all NaNs from here */
418 #define float_cmask(bit) (1u << (bit))
421 float_cmask_zero
= float_cmask(float_class_zero
),
422 float_cmask_normal
= float_cmask(float_class_normal
),
423 float_cmask_inf
= float_cmask(float_class_inf
),
424 float_cmask_qnan
= float_cmask(float_class_qnan
),
425 float_cmask_snan
= float_cmask(float_class_snan
),
427 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
428 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
431 /* Flags for parts_minmax. */
433 /* Set for minimum; clear for maximum. */
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
449 return unlikely(c
>= float_class_qnan
);
452 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
454 return c
== float_class_snan
;
457 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
459 return c
== float_class_qnan
;
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
476 /* Routines that know the structure may reference the singular name. */
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
501 uint64_t frac_hm
; /* high-middle */
502 uint64_t frac_lm
; /* low-middle */
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT 63
508 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * round_mask: bits below lsb which must be rounded
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
531 /* Expand fields based on the size of exponent and fraction */
532 #define FLOAT_PARAMS_(E) \
534 .exp_bias = ((1 << E) - 1) >> 1, \
535 .exp_max = (1 << E) - 1
537 #define FLOAT_PARAMS(E, F) \
540 .frac_shift = (-F - 1) & 63, \
541 .round_mask = (1ull << ((-F - 1) & 63)) - 1
543 static const FloatFmt float16_params
= {
547 static const FloatFmt float16_params_ahp
= {
552 static const FloatFmt bfloat16_params
= {
556 static const FloatFmt float32_params
= {
560 static const FloatFmt float64_params
= {
564 static const FloatFmt float128_params
= {
565 FLOAT_PARAMS(15, 112)
568 #define FLOATX80_PARAMS(R) \
570 .frac_size = R == 64 ? 63 : R, \
572 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
574 static const FloatFmt floatx80_params
[3] = {
575 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
576 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
577 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
580 /* Unpack a float to parts, but do not canonicalize. */
581 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
583 const int f_size
= fmt
->frac_size
;
584 const int e_size
= fmt
->exp_size
;
586 *r
= (FloatParts64
) {
587 .cls
= float_class_unclassified
,
588 .sign
= extract64(raw
, f_size
+ e_size
, 1),
589 .exp
= extract64(raw
, f_size
, e_size
),
590 .frac
= extract64(raw
, 0, f_size
)
594 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
596 unpack_raw64(p
, &float16_params
, f
);
599 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
601 unpack_raw64(p
, &bfloat16_params
, f
);
604 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
606 unpack_raw64(p
, &float32_params
, f
);
609 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
611 unpack_raw64(p
, &float64_params
, f
);
614 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
616 *p
= (FloatParts128
) {
617 .cls
= float_class_unclassified
,
618 .sign
= extract32(f
.high
, 15, 1),
619 .exp
= extract32(f
.high
, 0, 15),
624 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
626 const int f_size
= float128_params
.frac_size
- 64;
627 const int e_size
= float128_params
.exp_size
;
629 *p
= (FloatParts128
) {
630 .cls
= float_class_unclassified
,
631 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
632 .exp
= extract64(f
.high
, f_size
, e_size
),
633 .frac_hi
= extract64(f
.high
, 0, f_size
),
638 /* Pack a float from parts, but do not canonicalize. */
639 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
641 const int f_size
= fmt
->frac_size
;
642 const int e_size
= fmt
->exp_size
;
645 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
646 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
647 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
651 static inline float16
float16_pack_raw(const FloatParts64
*p
)
653 return make_float16(pack_raw64(p
, &float16_params
));
656 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
658 return pack_raw64(p
, &bfloat16_params
);
661 static inline float32
float32_pack_raw(const FloatParts64
*p
)
663 return make_float32(pack_raw64(p
, &float32_params
));
666 static inline float64
float64_pack_raw(const FloatParts64
*p
)
668 return make_float64(pack_raw64(p
, &float64_params
));
671 static float128
float128_pack_raw(const FloatParts128
*p
)
673 const int f_size
= float128_params
.frac_size
- 64;
674 const int e_size
= float128_params
.exp_size
;
677 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
678 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
679 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
680 return make_float128(hi
, p
->frac_lo
);
683 /*----------------------------------------------------------------------------
684 | Functions and definitions to determine: (1) whether tininess for underflow
685 | is detected before or after rounding by default, (2) what (if anything)
686 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
687 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
688 | are propagated from function inputs to output. These details are target-
690 *----------------------------------------------------------------------------*/
691 #include "softfloat-specialize.c.inc"
693 #define PARTS_GENERIC_64_128(NAME, P) \
694 _Generic((P), FloatParts64 *: parts64_##NAME, \
695 FloatParts128 *: parts128_##NAME)
697 #define PARTS_GENERIC_64_128_256(NAME, P) \
698 _Generic((P), FloatParts64 *: parts64_##NAME, \
699 FloatParts128 *: parts128_##NAME, \
700 FloatParts256 *: parts256_##NAME)
702 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
703 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
705 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
706 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
708 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
710 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
712 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
715 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
717 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
718 FloatParts64
*c
, float_status
*s
,
719 int ab_mask
, int abc_mask
);
720 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
724 int ab_mask
, int abc_mask
);
726 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
727 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
729 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
730 const FloatFmt
*fmt
);
731 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
732 const FloatFmt
*fmt
);
734 #define parts_canonicalize(A, S, F) \
735 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
737 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
738 const FloatFmt
*fmt
);
739 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
740 const FloatFmt
*fmt
);
742 #define parts_uncanon_normal(A, S, F) \
743 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
745 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
746 const FloatFmt
*fmt
);
747 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
748 const FloatFmt
*fmt
);
750 #define parts_uncanon(A, S, F) \
751 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
753 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
754 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
755 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
757 #define parts_add_normal(A, B) \
758 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
760 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
761 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
762 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
764 #define parts_sub_normal(A, B) \
765 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
767 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
768 float_status
*s
, bool subtract
);
769 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
770 float_status
*s
, bool subtract
);
772 #define parts_addsub(A, B, S, Z) \
773 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
775 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
777 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
780 #define parts_mul(A, B, S) \
781 PARTS_GENERIC_64_128(mul, A)(A, B, S)
783 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
784 FloatParts64
*c
, int flags
,
786 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
787 FloatParts128
*c
, int flags
,
790 #define parts_muladd(A, B, C, Z, S) \
791 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
793 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
795 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
798 #define parts_div(A, B, S) \
799 PARTS_GENERIC_64_128(div, A)(A, B, S)
801 static FloatParts64
*parts64_modrem(FloatParts64
*a
, FloatParts64
*b
,
802 uint64_t *mod_quot
, float_status
*s
);
803 static FloatParts128
*parts128_modrem(FloatParts128
*a
, FloatParts128
*b
,
804 uint64_t *mod_quot
, float_status
*s
);
806 #define parts_modrem(A, B, Q, S) \
807 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
809 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
810 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
812 #define parts_sqrt(A, S, F) \
813 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
815 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
816 int scale
, int frac_size
);
817 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
818 int scale
, int frac_size
);
820 #define parts_round_to_int_normal(A, R, C, F) \
821 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
823 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
824 int scale
, float_status
*s
,
825 const FloatFmt
*fmt
);
826 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
827 int scale
, float_status
*s
,
828 const FloatFmt
*fmt
);
830 #define parts_round_to_int(A, R, C, S, F) \
831 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
833 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
834 int scale
, int64_t min
, int64_t max
,
836 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
837 int scale
, int64_t min
, int64_t max
,
840 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
841 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
843 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
844 int scale
, uint64_t max
,
846 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
847 int scale
, uint64_t max
,
850 #define parts_float_to_uint(P, R, Z, M, S) \
851 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
853 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
854 int scale
, float_status
*s
);
855 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
856 int scale
, float_status
*s
);
858 #define parts_sint_to_float(P, I, Z, S) \
859 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
861 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
862 int scale
, float_status
*s
);
863 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
864 int scale
, float_status
*s
);
866 #define parts_uint_to_float(P, I, Z, S) \
867 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
869 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
870 float_status
*s
, int flags
);
871 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
872 float_status
*s
, int flags
);
874 #define parts_minmax(A, B, S, F) \
875 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
877 static int parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
878 float_status
*s
, bool q
);
879 static int parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
880 float_status
*s
, bool q
);
882 #define parts_compare(A, B, S, Q) \
883 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
885 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
886 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
888 #define parts_scalbn(A, N, S) \
889 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
891 static void parts64_log2(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
892 static void parts128_log2(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
894 #define parts_log2(A, S, F) \
895 PARTS_GENERIC_64_128(log2, A)(A, S, F)
898 * Helper functions for softfloat-parts.c.inc, per-size operations.
901 #define FRAC_GENERIC_64_128(NAME, P) \
902 _Generic((P), FloatParts64 *: frac64_##NAME, \
903 FloatParts128 *: frac128_##NAME)
905 #define FRAC_GENERIC_64_128_256(NAME, P) \
906 _Generic((P), FloatParts64 *: frac64_##NAME, \
907 FloatParts128 *: frac128_##NAME, \
908 FloatParts256 *: frac256_##NAME)
910 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
912 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
915 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
918 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
919 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
923 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
926 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
927 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
928 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
929 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
933 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
935 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
937 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
940 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
942 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
943 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
946 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
948 static void frac64_allones(FloatParts64
*a
)
953 static void frac128_allones(FloatParts128
*a
)
955 a
->frac_hi
= a
->frac_lo
= -1;
958 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
960 static int frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
962 return a
->frac
== b
->frac
? 0 : a
->frac
< b
->frac
? -1 : 1;
965 static int frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
967 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
969 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
974 return ta
< tb
? -1 : 1;
977 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
979 static void frac64_clear(FloatParts64
*a
)
984 static void frac128_clear(FloatParts128
*a
)
986 a
->frac_hi
= a
->frac_lo
= 0;
989 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
991 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
993 uint64_t n1
, n0
, r
, q
;
997 * We want a 2*N / N-bit division to produce exactly an N-bit
998 * result, so that we do not lose any precision and so that we
999 * do not have to renormalize afterward. If A.frac < B.frac,
1000 * then division would produce an (N-1)-bit result; shift A left
1001 * by one to produce the an N-bit result, and return true to
1002 * decrement the exponent to match.
1004 * The udiv_qrnnd algorithm that we're using requires normalization,
1005 * i.e. the msb of the denominator must be set, which is already true.
1007 ret
= a
->frac
< b
->frac
;
1015 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1017 /* Set lsb if there is a remainder, to set inexact. */
1018 a
->frac
= q
| (r
!= 0);
1023 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1025 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1026 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1029 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1030 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1032 ret
= lt128(a0
, a1
, b0
, b1
);
1034 a1
= shr_double(a0
, a1
, 1);
1038 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039 q0
= estimateDiv128To64(a0
, a1
, b0
);
1042 * Estimate is high because B1 was not included (unless B1 == 0).
1043 * Reduce quotient and increase remainder until remainder is non-negative.
1044 * This loop will execute 0 to 2 times.
1046 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1047 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1050 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1053 /* Repeat using the remainder, producing a second word of quotient. */
1054 q1
= estimateDiv128To64(r1
, r2
, b0
);
1055 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1056 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1059 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1062 /* Any remainder indicates inexact; set sticky bit. */
1063 q1
|= (r2
| r3
) != 0;
1070 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1072 static bool frac64_eqz(FloatParts64
*a
)
1074 return a
->frac
== 0;
1077 static bool frac128_eqz(FloatParts128
*a
)
1079 return (a
->frac_hi
| a
->frac_lo
) == 0;
1082 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1084 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1086 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1089 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1091 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1092 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1095 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1097 static void frac64_neg(FloatParts64
*a
)
1102 static void frac128_neg(FloatParts128
*a
)
1105 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1106 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1109 static void frac256_neg(FloatParts256
*a
)
1112 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1113 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1114 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1115 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1118 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1120 static int frac64_normalize(FloatParts64
*a
)
1123 int shift
= clz64(a
->frac
);
1130 static int frac128_normalize(FloatParts128
*a
)
1133 int shl
= clz64(a
->frac_hi
);
1134 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1137 } else if (a
->frac_lo
) {
1138 int shl
= clz64(a
->frac_lo
);
1139 a
->frac_hi
= a
->frac_lo
<< shl
;
1146 static int frac256_normalize(FloatParts256
*a
)
1148 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1149 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1161 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1164 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1167 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1170 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1180 a0
= shl_double(a0
, a1
, shl
);
1181 a1
= shl_double(a1
, a2
, shl
);
1182 a2
= shl_double(a2
, a3
, shl
);
1193 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1195 static void frac64_modrem(FloatParts64
*a
, FloatParts64
*b
, uint64_t *mod_quot
)
1197 uint64_t a0
, a1
, b0
, t0
, t1
, q
, quot
;
1198 int exp_diff
= a
->exp
- b
->exp
;
1204 if (exp_diff
< -1) {
1210 if (exp_diff
== -1) {
1216 quot
= q
= b0
<= a0
;
1222 while (exp_diff
> 0) {
1223 q
= estimateDiv128To64(a0
, a1
, b0
);
1224 q
= q
> 2 ? q
- 2 : 0;
1225 mul64To128(b0
, q
, &t0
, &t1
);
1226 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1227 shortShift128Left(a0
, a1
, 62, &a0
, &a1
);
1229 quot
= (quot
<< 62) + q
;
1234 q
= estimateDiv128To64(a0
, a1
, b0
);
1235 q
= q
> 2 ? (q
- 2) >> (64 - exp_diff
) : 0;
1236 mul64To128(b0
, q
<< (64 - exp_diff
), &t0
, &t1
);
1237 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1238 shortShift128Left(0, b0
, 64 - exp_diff
, &t0
, &t1
);
1239 while (le128(t0
, t1
, a0
, a1
)) {
1241 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1243 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1252 sub128(t0
, t1
, a0
, a1
, &t0
, &t1
);
1253 if (lt128(t0
, t1
, a0
, a1
) ||
1254 (eq128(t0
, t1
, a0
, a1
) && (q
& 1))) {
1263 shortShift128Left(a0
, a1
, shift
, &a0
, &a1
);
1264 } else if (likely(a1
)) {
1270 a
->cls
= float_class_zero
;
1274 a
->exp
= b
->exp
+ exp_diff
- shift
;
1275 a
->frac
= a0
| (a1
!= 0);
1278 static void frac128_modrem(FloatParts128
*a
, FloatParts128
*b
,
1281 uint64_t a0
, a1
, a2
, b0
, b1
, t0
, t1
, t2
, q
, quot
;
1282 int exp_diff
= a
->exp
- b
->exp
;
1289 if (exp_diff
< -1) {
1295 if (exp_diff
== -1) {
1296 shift128Right(a0
, a1
, 1, &a0
, &a1
);
1303 quot
= q
= le128(b0
, b1
, a0
, a1
);
1305 sub128(a0
, a1
, b0
, b1
, &a0
, &a1
);
1309 while (exp_diff
> 0) {
1310 q
= estimateDiv128To64(a0
, a1
, b0
);
1311 q
= q
> 4 ? q
- 4 : 0;
1312 mul128By64To192(b0
, b1
, q
, &t0
, &t1
, &t2
);
1313 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1314 shortShift192Left(a0
, a1
, a2
, 61, &a0
, &a1
, &a2
);
1316 quot
= (quot
<< 61) + q
;
1321 q
= estimateDiv128To64(a0
, a1
, b0
);
1322 q
= q
> 4 ? (q
- 4) >> (64 - exp_diff
) : 0;
1323 mul128By64To192(b0
, b1
, q
<< (64 - exp_diff
), &t0
, &t1
, &t2
);
1324 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1325 shortShift192Left(0, b0
, b1
, 64 - exp_diff
, &t0
, &t1
, &t2
);
1326 while (le192(t0
, t1
, t2
, a0
, a1
, a2
)) {
1328 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1330 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1340 sub192(t0
, t1
, t2
, a0
, a1
, a2
, &t0
, &t1
, &t2
);
1341 if (lt192(t0
, t1
, t2
, a0
, a1
, a2
) ||
1342 (eq192(t0
, t1
, t2
, a0
, a1
, a2
) && (q
& 1))) {
1352 shortShift192Left(a0
, a1
, a2
, shift
, &a0
, &a1
, &a2
);
1353 } else if (likely(a1
)) {
1355 shortShift128Left(a1
, a2
, shift
, &a0
, &a1
);
1358 } else if (likely(a2
)) {
1364 a
->cls
= float_class_zero
;
1368 a
->exp
= b
->exp
+ exp_diff
- shift
;
1370 a
->frac_lo
= a1
| (a2
!= 0);
1373 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1375 static void frac64_shl(FloatParts64
*a
, int c
)
1380 static void frac128_shl(FloatParts128
*a
, int c
)
1382 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1390 a0
= shl_double(a0
, a1
, c
);
1398 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1400 static void frac64_shr(FloatParts64
*a
, int c
)
1405 static void frac128_shr(FloatParts128
*a
, int c
)
1407 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1415 a1
= shr_double(a0
, a1
, c
);
1423 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1425 static void frac64_shrjam(FloatParts64
*a
, int c
)
1427 uint64_t a0
= a
->frac
;
1429 if (likely(c
!= 0)) {
1430 if (likely(c
< 64)) {
1431 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1439 static void frac128_shrjam(FloatParts128
*a
, int c
)
1441 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1442 uint64_t sticky
= 0;
1444 if (unlikely(c
== 0)) {
1446 } else if (likely(c
< 64)) {
1448 } else if (likely(c
< 128)) {
1462 sticky
|= shr_double(a1
, 0, c
);
1463 a1
= shr_double(a0
, a1
, c
);
1467 a
->frac_lo
= a1
| (sticky
!= 0);
1471 static void frac256_shrjam(FloatParts256
*a
, int c
)
1473 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1474 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1475 uint64_t sticky
= 0;
1477 if (unlikely(c
== 0)) {
1479 } else if (likely(c
< 64)) {
1481 } else if (likely(c
< 256)) {
1482 if (unlikely(c
& 128)) {
1484 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1486 if (unlikely(c
& 64)) {
1488 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1495 sticky
= a0
| a1
| a2
| a3
;
1496 a0
= a1
= a2
= a3
= 0;
1500 sticky
|= shr_double(a3
, 0, c
);
1501 a3
= shr_double(a2
, a3
, c
);
1502 a2
= shr_double(a1
, a2
, c
);
1503 a1
= shr_double(a0
, a1
, c
);
1507 a
->frac_lo
= a3
| (sticky
!= 0);
1513 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1515 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1517 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1520 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1523 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1524 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1528 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1531 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1532 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1533 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1534 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1538 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1540 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1542 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1545 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1547 r
->frac_hi
= a
->frac_hi
;
1548 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1551 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1553 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1555 r
->frac_hi
= a
->frac
;
1559 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1561 r
->frac_hi
= a
->frac_hi
;
1562 r
->frac_hm
= a
->frac_lo
;
1567 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1570 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1571 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572 * and thus MIT licenced.
1574 static const uint16_t rsqrt_tab
[128] = {
1575 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1593 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1594 #define FloatPartsN glue(FloatParts,N)
1595 #define FloatPartsW glue(FloatParts,W)
1600 #include "softfloat-parts-addsub.c.inc"
1601 #include "softfloat-parts.c.inc"
1608 #include "softfloat-parts-addsub.c.inc"
1609 #include "softfloat-parts.c.inc"
1615 #include "softfloat-parts-addsub.c.inc"
1624 * Pack/unpack routines with a specific FloatFmt.
1627 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1628 float_status
*s
, const FloatFmt
*params
)
1630 float16_unpack_raw(p
, f
);
1631 parts_canonicalize(p
, s
, params
);
1634 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1637 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1640 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1643 bfloat16_unpack_raw(p
, f
);
1644 parts_canonicalize(p
, s
, &bfloat16_params
);
1647 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1649 const FloatFmt
*params
)
1651 parts_uncanon(p
, s
, params
);
1652 return float16_pack_raw(p
);
1655 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1658 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1661 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1664 parts_uncanon(p
, s
, &bfloat16_params
);
1665 return bfloat16_pack_raw(p
);
1668 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1671 float32_unpack_raw(p
, f
);
1672 parts_canonicalize(p
, s
, &float32_params
);
1675 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1678 parts_uncanon(p
, s
, &float32_params
);
1679 return float32_pack_raw(p
);
1682 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1685 float64_unpack_raw(p
, f
);
1686 parts_canonicalize(p
, s
, &float64_params
);
1689 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1692 parts_uncanon(p
, s
, &float64_params
);
1693 return float64_pack_raw(p
);
1696 static float64
float64r32_round_pack_canonical(FloatParts64
*p
,
1699 parts_uncanon(p
, s
, &float32_params
);
1702 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1703 * We need to adjust the fraction higher so that the least N bits are
1704 * zero, and the fraction is adjacent to the float64 implicit bit.
1707 case float_class_normal
:
1708 if (unlikely(p
->exp
== 0)) {
1710 * The result is denormal for float32, but can be represented
1711 * in normalized form for float64. Adjust, per canonicalize.
1713 int shift
= frac_normalize(p
);
1714 p
->exp
= (float32_params
.frac_shift
-
1715 float32_params
.exp_bias
- shift
+ 1 +
1716 float64_params
.exp_bias
);
1717 frac_shr(p
, float64_params
.frac_shift
);
1719 frac_shl(p
, float32_params
.frac_shift
- float64_params
.frac_shift
);
1720 p
->exp
+= float64_params
.exp_bias
- float32_params
.exp_bias
;
1723 case float_class_snan
:
1724 case float_class_qnan
:
1725 frac_shl(p
, float32_params
.frac_shift
- float64_params
.frac_shift
);
1726 p
->exp
= float64_params
.exp_max
;
1728 case float_class_inf
:
1729 p
->exp
= float64_params
.exp_max
;
1731 case float_class_zero
:
1734 g_assert_not_reached();
1737 return float64_pack_raw(p
);
1740 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1743 float128_unpack_raw(p
, f
);
1744 parts_canonicalize(p
, s
, &float128_params
);
1747 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1750 parts_uncanon(p
, s
, &float128_params
);
1751 return float128_pack_raw(p
);
1754 /* Returns false if the encoding is invalid. */
1755 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1758 /* Ensure rounding precision is set before beginning. */
1759 switch (s
->floatx80_rounding_precision
) {
1760 case floatx80_precision_x
:
1761 case floatx80_precision_d
:
1762 case floatx80_precision_s
:
1765 g_assert_not_reached();
1768 if (unlikely(floatx80_invalid_encoding(f
))) {
1769 float_raise(float_flag_invalid
, s
);
1773 floatx80_unpack_raw(p
, f
);
1775 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1776 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1778 /* The explicit integer bit is ignored, after invalid checks. */
1779 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1780 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1781 : parts_is_snan_frac(p
->frac_hi
, s
)
1782 ? float_class_snan
: float_class_qnan
);
1787 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1790 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1795 case float_class_normal
:
1796 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1797 parts_uncanon_normal(p
, s
, fmt
);
1805 frac_truncjam(&p64
, p
);
1806 parts_uncanon_normal(&p64
, s
, fmt
);
1810 if (exp
!= fmt
->exp_max
) {
1813 /* rounded to inf -- fall through to set frac correctly */
1815 case float_class_inf
:
1816 /* x86 and m68k differ in the setting of the integer bit. */
1817 frac
= floatx80_infinity_low
;
1821 case float_class_zero
:
1826 case float_class_snan
:
1827 case float_class_qnan
:
1828 /* NaNs have the integer bit set. */
1829 frac
= p
->frac_hi
| (1ull << 63);
1834 g_assert_not_reached();
1837 return packFloatx80(p
->sign
, exp
, frac
);
1841 * Addition and subtraction
1844 static float16 QEMU_FLATTEN
1845 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1847 FloatParts64 pa
, pb
, *pr
;
1849 float16_unpack_canonical(&pa
, a
, status
);
1850 float16_unpack_canonical(&pb
, b
, status
);
1851 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1853 return float16_round_pack_canonical(pr
, status
);
1856 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1858 return float16_addsub(a
, b
, status
, false);
1861 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1863 return float16_addsub(a
, b
, status
, true);
1866 static float32 QEMU_SOFTFLOAT_ATTR
1867 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1869 FloatParts64 pa
, pb
, *pr
;
1871 float32_unpack_canonical(&pa
, a
, status
);
1872 float32_unpack_canonical(&pb
, b
, status
);
1873 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1875 return float32_round_pack_canonical(pr
, status
);
1878 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1880 return soft_f32_addsub(a
, b
, status
, false);
1883 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1885 return soft_f32_addsub(a
, b
, status
, true);
1888 static float64 QEMU_SOFTFLOAT_ATTR
1889 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1891 FloatParts64 pa
, pb
, *pr
;
1893 float64_unpack_canonical(&pa
, a
, status
);
1894 float64_unpack_canonical(&pb
, b
, status
);
1895 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1897 return float64_round_pack_canonical(pr
, status
);
1900 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1902 return soft_f64_addsub(a
, b
, status
, false);
1905 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1907 return soft_f64_addsub(a
, b
, status
, true);
1910 static float hard_f32_add(float a
, float b
)
1915 static float hard_f32_sub(float a
, float b
)
1920 static double hard_f64_add(double a
, double b
)
1925 static double hard_f64_sub(double a
, double b
)
1930 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1932 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1933 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1935 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1938 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1940 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1941 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1943 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1947 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1948 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1950 return float32_gen2(a
, b
, s
, hard
, soft
,
1951 f32_is_zon2
, f32_addsubmul_post
);
1954 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1955 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1957 return float64_gen2(a
, b
, s
, hard
, soft
,
1958 f64_is_zon2
, f64_addsubmul_post
);
1961 float32 QEMU_FLATTEN
1962 float32_add(float32 a
, float32 b
, float_status
*s
)
1964 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1967 float32 QEMU_FLATTEN
1968 float32_sub(float32 a
, float32 b
, float_status
*s
)
1970 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1973 float64 QEMU_FLATTEN
1974 float64_add(float64 a
, float64 b
, float_status
*s
)
1976 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1979 float64 QEMU_FLATTEN
1980 float64_sub(float64 a
, float64 b
, float_status
*s
)
1982 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1985 static float64
float64r32_addsub(float64 a
, float64 b
, float_status
*status
,
1988 FloatParts64 pa
, pb
, *pr
;
1990 float64_unpack_canonical(&pa
, a
, status
);
1991 float64_unpack_canonical(&pb
, b
, status
);
1992 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1994 return float64r32_round_pack_canonical(pr
, status
);
1997 float64
float64r32_add(float64 a
, float64 b
, float_status
*status
)
1999 return float64r32_addsub(a
, b
, status
, false);
2002 float64
float64r32_sub(float64 a
, float64 b
, float_status
*status
)
2004 return float64r32_addsub(a
, b
, status
, true);
2007 static bfloat16 QEMU_FLATTEN
2008 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
2010 FloatParts64 pa
, pb
, *pr
;
2012 bfloat16_unpack_canonical(&pa
, a
, status
);
2013 bfloat16_unpack_canonical(&pb
, b
, status
);
2014 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2016 return bfloat16_round_pack_canonical(pr
, status
);
2019 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
2021 return bfloat16_addsub(a
, b
, status
, false);
2024 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
2026 return bfloat16_addsub(a
, b
, status
, true);
2029 static float128 QEMU_FLATTEN
2030 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
2032 FloatParts128 pa
, pb
, *pr
;
2034 float128_unpack_canonical(&pa
, a
, status
);
2035 float128_unpack_canonical(&pb
, b
, status
);
2036 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2038 return float128_round_pack_canonical(pr
, status
);
2041 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
2043 return float128_addsub(a
, b
, status
, false);
2046 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
2048 return float128_addsub(a
, b
, status
, true);
2051 static floatx80 QEMU_FLATTEN
2052 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
2054 FloatParts128 pa
, pb
, *pr
;
2056 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2057 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2058 return floatx80_default_nan(status
);
2061 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
2062 return floatx80_round_pack_canonical(pr
, status
);
2065 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
2067 return floatx80_addsub(a
, b
, status
, false);
2070 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
2072 return floatx80_addsub(a
, b
, status
, true);
2079 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
2081 FloatParts64 pa
, pb
, *pr
;
2083 float16_unpack_canonical(&pa
, a
, status
);
2084 float16_unpack_canonical(&pb
, b
, status
);
2085 pr
= parts_mul(&pa
, &pb
, status
);
2087 return float16_round_pack_canonical(pr
, status
);
2090 static float32 QEMU_SOFTFLOAT_ATTR
2091 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
2093 FloatParts64 pa
, pb
, *pr
;
2095 float32_unpack_canonical(&pa
, a
, status
);
2096 float32_unpack_canonical(&pb
, b
, status
);
2097 pr
= parts_mul(&pa
, &pb
, status
);
2099 return float32_round_pack_canonical(pr
, status
);
2102 static float64 QEMU_SOFTFLOAT_ATTR
2103 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
2105 FloatParts64 pa
, pb
, *pr
;
2107 float64_unpack_canonical(&pa
, a
, status
);
2108 float64_unpack_canonical(&pb
, b
, status
);
2109 pr
= parts_mul(&pa
, &pb
, status
);
2111 return float64_round_pack_canonical(pr
, status
);
2114 static float hard_f32_mul(float a
, float b
)
2119 static double hard_f64_mul(double a
, double b
)
2124 float32 QEMU_FLATTEN
2125 float32_mul(float32 a
, float32 b
, float_status
*s
)
2127 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
2128 f32_is_zon2
, f32_addsubmul_post
);
2131 float64 QEMU_FLATTEN
2132 float64_mul(float64 a
, float64 b
, float_status
*s
)
2134 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
2135 f64_is_zon2
, f64_addsubmul_post
);
2138 float64
float64r32_mul(float64 a
, float64 b
, float_status
*status
)
2140 FloatParts64 pa
, pb
, *pr
;
2142 float64_unpack_canonical(&pa
, a
, status
);
2143 float64_unpack_canonical(&pb
, b
, status
);
2144 pr
= parts_mul(&pa
, &pb
, status
);
2146 return float64r32_round_pack_canonical(pr
, status
);
2149 bfloat16 QEMU_FLATTEN
2150 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
2152 FloatParts64 pa
, pb
, *pr
;
2154 bfloat16_unpack_canonical(&pa
, a
, status
);
2155 bfloat16_unpack_canonical(&pb
, b
, status
);
2156 pr
= parts_mul(&pa
, &pb
, status
);
2158 return bfloat16_round_pack_canonical(pr
, status
);
2161 float128 QEMU_FLATTEN
2162 float128_mul(float128 a
, float128 b
, float_status
*status
)
2164 FloatParts128 pa
, pb
, *pr
;
2166 float128_unpack_canonical(&pa
, a
, status
);
2167 float128_unpack_canonical(&pb
, b
, status
);
2168 pr
= parts_mul(&pa
, &pb
, status
);
2170 return float128_round_pack_canonical(pr
, status
);
2173 floatx80 QEMU_FLATTEN
2174 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
2176 FloatParts128 pa
, pb
, *pr
;
2178 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2179 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2180 return floatx80_default_nan(status
);
2183 pr
= parts_mul(&pa
, &pb
, status
);
2184 return floatx80_round_pack_canonical(pr
, status
);
2188 * Fused multiply-add
2191 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
2192 int flags
, float_status
*status
)
2194 FloatParts64 pa
, pb
, pc
, *pr
;
2196 float16_unpack_canonical(&pa
, a
, status
);
2197 float16_unpack_canonical(&pb
, b
, status
);
2198 float16_unpack_canonical(&pc
, c
, status
);
2199 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2201 return float16_round_pack_canonical(pr
, status
);
2204 static float32 QEMU_SOFTFLOAT_ATTR
2205 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
2206 float_status
*status
)
2208 FloatParts64 pa
, pb
, pc
, *pr
;
2210 float32_unpack_canonical(&pa
, a
, status
);
2211 float32_unpack_canonical(&pb
, b
, status
);
2212 float32_unpack_canonical(&pc
, c
, status
);
2213 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2215 return float32_round_pack_canonical(pr
, status
);
2218 static float64 QEMU_SOFTFLOAT_ATTR
2219 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
2220 float_status
*status
)
2222 FloatParts64 pa
, pb
, pc
, *pr
;
2224 float64_unpack_canonical(&pa
, a
, status
);
2225 float64_unpack_canonical(&pb
, b
, status
);
2226 float64_unpack_canonical(&pc
, c
, status
);
2227 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2229 return float64_round_pack_canonical(pr
, status
);
2232 static bool force_soft_fma
;
2234 float32 QEMU_FLATTEN
2235 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2237 union_float32 ua
, ub
, uc
, ur
;
2243 if (unlikely(!can_use_fpu(s
))) {
2246 if (unlikely(flags
& float_muladd_halve_result
)) {
2250 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2251 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2255 if (unlikely(force_soft_fma
)) {
2260 * When (a || b) == 0, there's no need to check for under/over flow,
2261 * since we know the addend is (normal || 0) and the product is 0.
2263 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2267 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2268 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2269 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2271 if (flags
& float_muladd_negate_c
) {
2276 union_float32 ua_orig
= ua
;
2277 union_float32 uc_orig
= uc
;
2279 if (flags
& float_muladd_negate_product
) {
2282 if (flags
& float_muladd_negate_c
) {
2286 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2288 if (unlikely(f32_is_inf(ur
))) {
2289 float_raise(float_flag_overflow
, s
);
2290 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2296 if (flags
& float_muladd_negate_result
) {
2297 return float32_chs(ur
.s
);
2302 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2305 float64 QEMU_FLATTEN
2306 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2308 union_float64 ua
, ub
, uc
, ur
;
2314 if (unlikely(!can_use_fpu(s
))) {
2317 if (unlikely(flags
& float_muladd_halve_result
)) {
2321 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2322 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2326 if (unlikely(force_soft_fma
)) {
2331 * When (a || b) == 0, there's no need to check for under/over flow,
2332 * since we know the addend is (normal || 0) and the product is 0.
2334 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2338 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2339 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2340 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2342 if (flags
& float_muladd_negate_c
) {
2347 union_float64 ua_orig
= ua
;
2348 union_float64 uc_orig
= uc
;
2350 if (flags
& float_muladd_negate_product
) {
2353 if (flags
& float_muladd_negate_c
) {
2357 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2359 if (unlikely(f64_is_inf(ur
))) {
2360 float_raise(float_flag_overflow
, s
);
2361 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2367 if (flags
& float_muladd_negate_result
) {
2368 return float64_chs(ur
.s
);
2373 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2376 float64
float64r32_muladd(float64 a
, float64 b
, float64 c
,
2377 int flags
, float_status
*status
)
2379 FloatParts64 pa
, pb
, pc
, *pr
;
2381 float64_unpack_canonical(&pa
, a
, status
);
2382 float64_unpack_canonical(&pb
, b
, status
);
2383 float64_unpack_canonical(&pc
, c
, status
);
2384 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2386 return float64r32_round_pack_canonical(pr
, status
);
2389 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2390 int flags
, float_status
*status
)
2392 FloatParts64 pa
, pb
, pc
, *pr
;
2394 bfloat16_unpack_canonical(&pa
, a
, status
);
2395 bfloat16_unpack_canonical(&pb
, b
, status
);
2396 bfloat16_unpack_canonical(&pc
, c
, status
);
2397 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2399 return bfloat16_round_pack_canonical(pr
, status
);
2402 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2403 int flags
, float_status
*status
)
2405 FloatParts128 pa
, pb
, pc
, *pr
;
2407 float128_unpack_canonical(&pa
, a
, status
);
2408 float128_unpack_canonical(&pb
, b
, status
);
2409 float128_unpack_canonical(&pc
, c
, status
);
2410 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2412 return float128_round_pack_canonical(pr
, status
);
2419 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2421 FloatParts64 pa
, pb
, *pr
;
2423 float16_unpack_canonical(&pa
, a
, status
);
2424 float16_unpack_canonical(&pb
, b
, status
);
2425 pr
= parts_div(&pa
, &pb
, status
);
2427 return float16_round_pack_canonical(pr
, status
);
2430 static float32 QEMU_SOFTFLOAT_ATTR
2431 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2433 FloatParts64 pa
, pb
, *pr
;
2435 float32_unpack_canonical(&pa
, a
, status
);
2436 float32_unpack_canonical(&pb
, b
, status
);
2437 pr
= parts_div(&pa
, &pb
, status
);
2439 return float32_round_pack_canonical(pr
, status
);
2442 static float64 QEMU_SOFTFLOAT_ATTR
2443 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2445 FloatParts64 pa
, pb
, *pr
;
2447 float64_unpack_canonical(&pa
, a
, status
);
2448 float64_unpack_canonical(&pb
, b
, status
);
2449 pr
= parts_div(&pa
, &pb
, status
);
2451 return float64_round_pack_canonical(pr
, status
);
2454 static float hard_f32_div(float a
, float b
)
2459 static double hard_f64_div(double a
, double b
)
2464 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2466 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2467 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2468 fpclassify(b
.h
) == FP_NORMAL
;
2470 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2473 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2475 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2476 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2477 fpclassify(b
.h
) == FP_NORMAL
;
2479 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2482 static bool f32_div_post(union_float32 a
, union_float32 b
)
2484 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2485 return fpclassify(a
.h
) != FP_ZERO
;
2487 return !float32_is_zero(a
.s
);
2490 static bool f64_div_post(union_float64 a
, union_float64 b
)
2492 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2493 return fpclassify(a
.h
) != FP_ZERO
;
2495 return !float64_is_zero(a
.s
);
2498 float32 QEMU_FLATTEN
2499 float32_div(float32 a
, float32 b
, float_status
*s
)
2501 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2502 f32_div_pre
, f32_div_post
);
2505 float64 QEMU_FLATTEN
2506 float64_div(float64 a
, float64 b
, float_status
*s
)
2508 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2509 f64_div_pre
, f64_div_post
);
2512 float64
float64r32_div(float64 a
, float64 b
, float_status
*status
)
2514 FloatParts64 pa
, pb
, *pr
;
2516 float64_unpack_canonical(&pa
, a
, status
);
2517 float64_unpack_canonical(&pb
, b
, status
);
2518 pr
= parts_div(&pa
, &pb
, status
);
2520 return float64r32_round_pack_canonical(pr
, status
);
2523 bfloat16 QEMU_FLATTEN
2524 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2526 FloatParts64 pa
, pb
, *pr
;
2528 bfloat16_unpack_canonical(&pa
, a
, status
);
2529 bfloat16_unpack_canonical(&pb
, b
, status
);
2530 pr
= parts_div(&pa
, &pb
, status
);
2532 return bfloat16_round_pack_canonical(pr
, status
);
2535 float128 QEMU_FLATTEN
2536 float128_div(float128 a
, float128 b
, float_status
*status
)
2538 FloatParts128 pa
, pb
, *pr
;
2540 float128_unpack_canonical(&pa
, a
, status
);
2541 float128_unpack_canonical(&pb
, b
, status
);
2542 pr
= parts_div(&pa
, &pb
, status
);
2544 return float128_round_pack_canonical(pr
, status
);
2547 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
2549 FloatParts128 pa
, pb
, *pr
;
2551 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2552 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2553 return floatx80_default_nan(status
);
2556 pr
= parts_div(&pa
, &pb
, status
);
2557 return floatx80_round_pack_canonical(pr
, status
);
2564 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
2566 FloatParts64 pa
, pb
, *pr
;
2568 float32_unpack_canonical(&pa
, a
, status
);
2569 float32_unpack_canonical(&pb
, b
, status
);
2570 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2572 return float32_round_pack_canonical(pr
, status
);
2575 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
2577 FloatParts64 pa
, pb
, *pr
;
2579 float64_unpack_canonical(&pa
, a
, status
);
2580 float64_unpack_canonical(&pb
, b
, status
);
2581 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2583 return float64_round_pack_canonical(pr
, status
);
2586 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
2588 FloatParts128 pa
, pb
, *pr
;
2590 float128_unpack_canonical(&pa
, a
, status
);
2591 float128_unpack_canonical(&pb
, b
, status
);
2592 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2594 return float128_round_pack_canonical(pr
, status
);
2598 * Returns the remainder of the extended double-precision floating-point value
2599 * `a' with respect to the corresponding value `b'.
2600 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2601 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2602 * the remainder based on truncating the quotient toward zero instead and
2603 * *quotient is set to the low 64 bits of the absolute value of the integer
2606 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
,
2607 uint64_t *quotient
, float_status
*status
)
2609 FloatParts128 pa
, pb
, *pr
;
2612 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2613 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2614 return floatx80_default_nan(status
);
2616 pr
= parts_modrem(&pa
, &pb
, mod
? quotient
: NULL
, status
);
2618 return floatx80_round_pack_canonical(pr
, status
);
2621 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
2624 return floatx80_modrem(a
, b
, false, "ient
, status
);
2627 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
2630 return floatx80_modrem(a
, b
, true, "ient
, status
);
2634 * Float to Float conversions
2636 * Returns the result of converting one float format to another. The
2637 * conversion is performed according to the IEC/IEEE Standard for
2638 * Binary Floating-Point Arithmetic.
2640 * Usually this only needs to take care of raising invalid exceptions
2641 * and handling the conversion on NaNs.
2644 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2647 case float_class_snan
:
2648 float_raise(float_flag_invalid_snan
, s
);
2650 case float_class_qnan
:
2652 * There is no NaN in the destination format. Raise Invalid
2653 * and return a zero with the sign of the input NaN.
2655 float_raise(float_flag_invalid
, s
);
2656 a
->cls
= float_class_zero
;
2659 case float_class_inf
:
2661 * There is no Inf in the destination format. Raise Invalid
2662 * and return the maximum normal with the correct sign.
2664 float_raise(float_flag_invalid
, s
);
2665 a
->cls
= float_class_normal
;
2666 a
->exp
= float16_params_ahp
.exp_max
;
2667 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2668 float16_params_ahp
.frac_size
+ 1);
2671 case float_class_normal
:
2672 case float_class_zero
:
2676 g_assert_not_reached();
2680 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2682 if (is_nan(a
->cls
)) {
2683 parts_return_nan(a
, s
);
2687 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2689 if (is_nan(a
->cls
)) {
2690 parts_return_nan(a
, s
);
2694 #define parts_float_to_float(P, S) \
2695 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2697 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2704 if (a
->cls
== float_class_normal
) {
2705 frac_truncjam(a
, b
);
2706 } else if (is_nan(a
->cls
)) {
2707 /* Discard the low bits of the NaN. */
2708 a
->frac
= b
->frac_hi
;
2709 parts_return_nan(a
, s
);
2713 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2721 if (is_nan(a
->cls
)) {
2722 parts_return_nan(a
, s
);
2726 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2728 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2731 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2732 parts_float_to_float(&p
, s
);
2733 return float32_round_pack_canonical(&p
, s
);
2736 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2738 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2741 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2742 parts_float_to_float(&p
, s
);
2743 return float64_round_pack_canonical(&p
, s
);
2746 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2749 const FloatFmt
*fmt
;
2751 float32_unpack_canonical(&p
, a
, s
);
2753 parts_float_to_float(&p
, s
);
2754 fmt
= &float16_params
;
2756 parts_float_to_ahp(&p
, s
);
2757 fmt
= &float16_params_ahp
;
2759 return float16a_round_pack_canonical(&p
, s
, fmt
);
2762 static float64 QEMU_SOFTFLOAT_ATTR
2763 soft_float32_to_float64(float32 a
, float_status
*s
)
2767 float32_unpack_canonical(&p
, a
, s
);
2768 parts_float_to_float(&p
, s
);
2769 return float64_round_pack_canonical(&p
, s
);
2772 float64
float32_to_float64(float32 a
, float_status
*s
)
2774 if (likely(float32_is_normal(a
))) {
2775 /* Widening conversion can never produce inexact results. */
2781 } else if (float32_is_zero(a
)) {
2782 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2784 return soft_float32_to_float64(a
, s
);
2788 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2791 const FloatFmt
*fmt
;
2793 float64_unpack_canonical(&p
, a
, s
);
2795 parts_float_to_float(&p
, s
);
2796 fmt
= &float16_params
;
2798 parts_float_to_ahp(&p
, s
);
2799 fmt
= &float16_params_ahp
;
2801 return float16a_round_pack_canonical(&p
, s
, fmt
);
2804 float32
float64_to_float32(float64 a
, float_status
*s
)
2808 float64_unpack_canonical(&p
, a
, s
);
2809 parts_float_to_float(&p
, s
);
2810 return float32_round_pack_canonical(&p
, s
);
2813 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2817 bfloat16_unpack_canonical(&p
, a
, s
);
2818 parts_float_to_float(&p
, s
);
2819 return float32_round_pack_canonical(&p
, s
);
2822 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2826 bfloat16_unpack_canonical(&p
, a
, s
);
2827 parts_float_to_float(&p
, s
);
2828 return float64_round_pack_canonical(&p
, s
);
2831 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2835 float32_unpack_canonical(&p
, a
, s
);
2836 parts_float_to_float(&p
, s
);
2837 return bfloat16_round_pack_canonical(&p
, s
);
2840 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2844 float64_unpack_canonical(&p
, a
, s
);
2845 parts_float_to_float(&p
, s
);
2846 return bfloat16_round_pack_canonical(&p
, s
);
2849 float32
float128_to_float32(float128 a
, float_status
*s
)
2854 float128_unpack_canonical(&p128
, a
, s
);
2855 parts_float_to_float_narrow(&p64
, &p128
, s
);
2856 return float32_round_pack_canonical(&p64
, s
);
2859 float64
float128_to_float64(float128 a
, float_status
*s
)
2864 float128_unpack_canonical(&p128
, a
, s
);
2865 parts_float_to_float_narrow(&p64
, &p128
, s
);
2866 return float64_round_pack_canonical(&p64
, s
);
2869 float128
float32_to_float128(float32 a
, float_status
*s
)
2874 float32_unpack_canonical(&p64
, a
, s
);
2875 parts_float_to_float_widen(&p128
, &p64
, s
);
2876 return float128_round_pack_canonical(&p128
, s
);
2879 float128
float64_to_float128(float64 a
, float_status
*s
)
2884 float64_unpack_canonical(&p64
, a
, s
);
2885 parts_float_to_float_widen(&p128
, &p64
, s
);
2886 return float128_round_pack_canonical(&p128
, s
);
2889 float32
floatx80_to_float32(floatx80 a
, float_status
*s
)
2894 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2895 parts_float_to_float_narrow(&p64
, &p128
, s
);
2897 parts_default_nan(&p64
, s
);
2899 return float32_round_pack_canonical(&p64
, s
);
2902 float64
floatx80_to_float64(floatx80 a
, float_status
*s
)
2907 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2908 parts_float_to_float_narrow(&p64
, &p128
, s
);
2910 parts_default_nan(&p64
, s
);
2912 return float64_round_pack_canonical(&p64
, s
);
2915 float128
floatx80_to_float128(floatx80 a
, float_status
*s
)
2919 if (floatx80_unpack_canonical(&p
, a
, s
)) {
2920 parts_float_to_float(&p
, s
);
2922 parts_default_nan(&p
, s
);
2924 return float128_round_pack_canonical(&p
, s
);
2927 floatx80
float32_to_floatx80(float32 a
, float_status
*s
)
2932 float32_unpack_canonical(&p64
, a
, s
);
2933 parts_float_to_float_widen(&p128
, &p64
, s
);
2934 return floatx80_round_pack_canonical(&p128
, s
);
2937 floatx80
float64_to_floatx80(float64 a
, float_status
*s
)
2942 float64_unpack_canonical(&p64
, a
, s
);
2943 parts_float_to_float_widen(&p128
, &p64
, s
);
2944 return floatx80_round_pack_canonical(&p128
, s
);
2947 floatx80
float128_to_floatx80(float128 a
, float_status
*s
)
2951 float128_unpack_canonical(&p
, a
, s
);
2952 parts_float_to_float(&p
, s
);
2953 return floatx80_round_pack_canonical(&p
, s
);
2957 * Round to integral value
2960 float16
float16_round_to_int(float16 a
, float_status
*s
)
2964 float16_unpack_canonical(&p
, a
, s
);
2965 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2966 return float16_round_pack_canonical(&p
, s
);
2969 float32
float32_round_to_int(float32 a
, float_status
*s
)
2973 float32_unpack_canonical(&p
, a
, s
);
2974 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2975 return float32_round_pack_canonical(&p
, s
);
2978 float64
float64_round_to_int(float64 a
, float_status
*s
)
2982 float64_unpack_canonical(&p
, a
, s
);
2983 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2984 return float64_round_pack_canonical(&p
, s
);
2987 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2991 bfloat16_unpack_canonical(&p
, a
, s
);
2992 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2993 return bfloat16_round_pack_canonical(&p
, s
);
2996 float128
float128_round_to_int(float128 a
, float_status
*s
)
3000 float128_unpack_canonical(&p
, a
, s
);
3001 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
3002 return float128_round_pack_canonical(&p
, s
);
3005 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
3009 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
3010 return floatx80_default_nan(status
);
3013 parts_round_to_int(&p
, status
->float_rounding_mode
, 0, status
,
3014 &floatx80_params
[status
->floatx80_rounding_precision
]);
3015 return floatx80_round_pack_canonical(&p
, status
);
3019 * Floating-point to signed integer conversions
3022 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3027 float16_unpack_canonical(&p
, a
, s
);
3028 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
3031 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3036 float16_unpack_canonical(&p
, a
, s
);
3037 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3040 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3045 float16_unpack_canonical(&p
, a
, s
);
3046 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3049 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3054 float16_unpack_canonical(&p
, a
, s
);
3055 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3058 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3063 float32_unpack_canonical(&p
, a
, s
);
3064 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3067 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3072 float32_unpack_canonical(&p
, a
, s
);
3073 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3076 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3081 float32_unpack_canonical(&p
, a
, s
);
3082 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3085 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3090 float64_unpack_canonical(&p
, a
, s
);
3091 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3094 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3099 float64_unpack_canonical(&p
, a
, s
);
3100 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3103 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3108 float64_unpack_canonical(&p
, a
, s
);
3109 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3112 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3117 bfloat16_unpack_canonical(&p
, a
, s
);
3118 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3121 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3126 bfloat16_unpack_canonical(&p
, a
, s
);
3127 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3130 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3135 bfloat16_unpack_canonical(&p
, a
, s
);
3136 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3139 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
3140 int scale
, float_status
*s
)
3144 float128_unpack_canonical(&p
, a
, s
);
3145 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3148 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
3149 int scale
, float_status
*s
)
3153 float128_unpack_canonical(&p
, a
, s
);
3154 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3157 static int32_t floatx80_to_int32_scalbn(floatx80 a
, FloatRoundMode rmode
,
3158 int scale
, float_status
*s
)
3162 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3163 parts_default_nan(&p
, s
);
3165 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3168 static int64_t floatx80_to_int64_scalbn(floatx80 a
, FloatRoundMode rmode
,
3169 int scale
, float_status
*s
)
3173 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3174 parts_default_nan(&p
, s
);
3176 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3179 int8_t float16_to_int8(float16 a
, float_status
*s
)
3181 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3184 int16_t float16_to_int16(float16 a
, float_status
*s
)
3186 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3189 int32_t float16_to_int32(float16 a
, float_status
*s
)
3191 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3194 int64_t float16_to_int64(float16 a
, float_status
*s
)
3196 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3199 int16_t float32_to_int16(float32 a
, float_status
*s
)
3201 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3204 int32_t float32_to_int32(float32 a
, float_status
*s
)
3206 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3209 int64_t float32_to_int64(float32 a
, float_status
*s
)
3211 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3214 int16_t float64_to_int16(float64 a
, float_status
*s
)
3216 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3219 int32_t float64_to_int32(float64 a
, float_status
*s
)
3221 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3224 int64_t float64_to_int64(float64 a
, float_status
*s
)
3226 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3229 int32_t float128_to_int32(float128 a
, float_status
*s
)
3231 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3234 int64_t float128_to_int64(float128 a
, float_status
*s
)
3236 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3239 int32_t floatx80_to_int32(floatx80 a
, float_status
*s
)
3241 return floatx80_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3244 int64_t floatx80_to_int64(floatx80 a
, float_status
*s
)
3246 return floatx80_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3249 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
3251 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3254 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
3256 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3259 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
3261 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3264 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
3266 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3269 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
3271 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3274 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
3276 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3279 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
3281 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3284 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
3286 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3289 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
3291 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3294 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
3296 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3299 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
3301 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3304 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*s
)
3306 return floatx80_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3309 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*s
)
3311 return floatx80_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3314 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
3316 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3319 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
3321 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3324 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
3326 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3329 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
3331 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3334 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
3336 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3339 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
3341 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3345 * Floating-point to unsigned integer conversions
3348 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3353 float16_unpack_canonical(&p
, a
, s
);
3354 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
3357 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3362 float16_unpack_canonical(&p
, a
, s
);
3363 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3366 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3371 float16_unpack_canonical(&p
, a
, s
);
3372 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3375 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3380 float16_unpack_canonical(&p
, a
, s
);
3381 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3384 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3389 float32_unpack_canonical(&p
, a
, s
);
3390 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3393 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3398 float32_unpack_canonical(&p
, a
, s
);
3399 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3402 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3407 float32_unpack_canonical(&p
, a
, s
);
3408 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3411 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3416 float64_unpack_canonical(&p
, a
, s
);
3417 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3420 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3425 float64_unpack_canonical(&p
, a
, s
);
3426 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3429 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3434 float64_unpack_canonical(&p
, a
, s
);
3435 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3438 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3439 int scale
, float_status
*s
)
3443 bfloat16_unpack_canonical(&p
, a
, s
);
3444 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3447 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3448 int scale
, float_status
*s
)
3452 bfloat16_unpack_canonical(&p
, a
, s
);
3453 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3456 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3457 int scale
, float_status
*s
)
3461 bfloat16_unpack_canonical(&p
, a
, s
);
3462 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3465 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3466 int scale
, float_status
*s
)
3470 float128_unpack_canonical(&p
, a
, s
);
3471 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3474 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3475 int scale
, float_status
*s
)
3479 float128_unpack_canonical(&p
, a
, s
);
3480 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3483 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3485 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3488 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3490 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3493 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3495 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3498 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3500 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3503 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3505 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3508 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3510 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3513 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3515 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3518 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3520 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3523 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3525 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3528 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3530 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3533 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3535 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3538 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3540 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3543 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3545 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3548 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3550 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3553 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3555 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3558 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3560 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3563 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3565 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3568 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3570 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3573 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3575 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3578 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3580 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3583 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3585 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3588 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3590 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3593 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3595 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3598 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3600 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3603 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3605 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3608 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3610 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3613 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3615 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3618 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3620 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3623 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3625 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3629 * Signed integer to floating-point conversions
3632 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3636 parts_sint_to_float(&p
, a
, scale
, status
);
3637 return float16_round_pack_canonical(&p
, status
);
3640 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3642 return int64_to_float16_scalbn(a
, scale
, status
);
3645 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3647 return int64_to_float16_scalbn(a
, scale
, status
);
3650 float16
int64_to_float16(int64_t a
, float_status
*status
)
3652 return int64_to_float16_scalbn(a
, 0, status
);
3655 float16
int32_to_float16(int32_t a
, float_status
*status
)
3657 return int64_to_float16_scalbn(a
, 0, status
);
3660 float16
int16_to_float16(int16_t a
, float_status
*status
)
3662 return int64_to_float16_scalbn(a
, 0, status
);
3665 float16
int8_to_float16(int8_t a
, float_status
*status
)
3667 return int64_to_float16_scalbn(a
, 0, status
);
3670 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3674 /* Without scaling, there are no overflow concerns. */
3675 if (likely(scale
== 0) && can_use_fpu(status
)) {
3681 parts64_sint_to_float(&p
, a
, scale
, status
);
3682 return float32_round_pack_canonical(&p
, status
);
3685 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3687 return int64_to_float32_scalbn(a
, scale
, status
);
3690 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3692 return int64_to_float32_scalbn(a
, scale
, status
);
3695 float32
int64_to_float32(int64_t a
, float_status
*status
)
3697 return int64_to_float32_scalbn(a
, 0, status
);
3700 float32
int32_to_float32(int32_t a
, float_status
*status
)
3702 return int64_to_float32_scalbn(a
, 0, status
);
3705 float32
int16_to_float32(int16_t a
, float_status
*status
)
3707 return int64_to_float32_scalbn(a
, 0, status
);
3710 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3714 /* Without scaling, there are no overflow concerns. */
3715 if (likely(scale
== 0) && can_use_fpu(status
)) {
3721 parts_sint_to_float(&p
, a
, scale
, status
);
3722 return float64_round_pack_canonical(&p
, status
);
3725 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3727 return int64_to_float64_scalbn(a
, scale
, status
);
3730 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3732 return int64_to_float64_scalbn(a
, scale
, status
);
3735 float64
int64_to_float64(int64_t a
, float_status
*status
)
3737 return int64_to_float64_scalbn(a
, 0, status
);
3740 float64
int32_to_float64(int32_t a
, float_status
*status
)
3742 return int64_to_float64_scalbn(a
, 0, status
);
3745 float64
int16_to_float64(int16_t a
, float_status
*status
)
3747 return int64_to_float64_scalbn(a
, 0, status
);
3750 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3754 parts_sint_to_float(&p
, a
, scale
, status
);
3755 return bfloat16_round_pack_canonical(&p
, status
);
3758 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3760 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3763 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3765 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3768 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3770 return int64_to_bfloat16_scalbn(a
, 0, status
);
3773 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3775 return int64_to_bfloat16_scalbn(a
, 0, status
);
3778 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3780 return int64_to_bfloat16_scalbn(a
, 0, status
);
3783 float128
int64_to_float128(int64_t a
, float_status
*status
)
3787 parts_sint_to_float(&p
, a
, 0, status
);
3788 return float128_round_pack_canonical(&p
, status
);
3791 float128
int32_to_float128(int32_t a
, float_status
*status
)
3793 return int64_to_float128(a
, status
);
3796 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
3800 parts_sint_to_float(&p
, a
, 0, status
);
3801 return floatx80_round_pack_canonical(&p
, status
);
3804 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
3806 return int64_to_floatx80(a
, status
);
3810 * Unsigned Integer to floating-point conversions
3813 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3817 parts_uint_to_float(&p
, a
, scale
, status
);
3818 return float16_round_pack_canonical(&p
, status
);
3821 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3823 return uint64_to_float16_scalbn(a
, scale
, status
);
3826 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3828 return uint64_to_float16_scalbn(a
, scale
, status
);
3831 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3833 return uint64_to_float16_scalbn(a
, 0, status
);
3836 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3838 return uint64_to_float16_scalbn(a
, 0, status
);
3841 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
3843 return uint64_to_float16_scalbn(a
, 0, status
);
3846 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
3848 return uint64_to_float16_scalbn(a
, 0, status
);
3851 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
3855 /* Without scaling, there are no overflow concerns. */
3856 if (likely(scale
== 0) && can_use_fpu(status
)) {
3862 parts_uint_to_float(&p
, a
, scale
, status
);
3863 return float32_round_pack_canonical(&p
, status
);
3866 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
3868 return uint64_to_float32_scalbn(a
, scale
, status
);
3871 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
3873 return uint64_to_float32_scalbn(a
, scale
, status
);
3876 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
3878 return uint64_to_float32_scalbn(a
, 0, status
);
3881 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3883 return uint64_to_float32_scalbn(a
, 0, status
);
3886 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3888 return uint64_to_float32_scalbn(a
, 0, status
);
3891 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3895 /* Without scaling, there are no overflow concerns. */
3896 if (likely(scale
== 0) && can_use_fpu(status
)) {
3902 parts_uint_to_float(&p
, a
, scale
, status
);
3903 return float64_round_pack_canonical(&p
, status
);
3906 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3908 return uint64_to_float64_scalbn(a
, scale
, status
);
3911 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3913 return uint64_to_float64_scalbn(a
, scale
, status
);
3916 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3918 return uint64_to_float64_scalbn(a
, 0, status
);
3921 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3923 return uint64_to_float64_scalbn(a
, 0, status
);
3926 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3928 return uint64_to_float64_scalbn(a
, 0, status
);
3931 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3935 parts_uint_to_float(&p
, a
, scale
, status
);
3936 return bfloat16_round_pack_canonical(&p
, status
);
3939 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3941 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3944 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3946 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3949 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3951 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3954 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3956 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3959 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3961 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3964 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
3968 parts_uint_to_float(&p
, a
, 0, status
);
3969 return float128_round_pack_canonical(&p
, status
);
3973 * Minimum and maximum
3976 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
3978 FloatParts64 pa
, pb
, *pr
;
3980 float16_unpack_canonical(&pa
, a
, s
);
3981 float16_unpack_canonical(&pb
, b
, s
);
3982 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3984 return float16_round_pack_canonical(pr
, s
);
3987 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
3988 float_status
*s
, int flags
)
3990 FloatParts64 pa
, pb
, *pr
;
3992 bfloat16_unpack_canonical(&pa
, a
, s
);
3993 bfloat16_unpack_canonical(&pb
, b
, s
);
3994 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3996 return bfloat16_round_pack_canonical(pr
, s
);
3999 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
4001 FloatParts64 pa
, pb
, *pr
;
4003 float32_unpack_canonical(&pa
, a
, s
);
4004 float32_unpack_canonical(&pb
, b
, s
);
4005 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4007 return float32_round_pack_canonical(pr
, s
);
4010 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
4012 FloatParts64 pa
, pb
, *pr
;
4014 float64_unpack_canonical(&pa
, a
, s
);
4015 float64_unpack_canonical(&pb
, b
, s
);
4016 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4018 return float64_round_pack_canonical(pr
, s
);
4021 static float128
float128_minmax(float128 a
, float128 b
,
4022 float_status
*s
, int flags
)
4024 FloatParts128 pa
, pb
, *pr
;
4026 float128_unpack_canonical(&pa
, a
, s
);
4027 float128_unpack_canonical(&pb
, b
, s
);
4028 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
4030 return float128_round_pack_canonical(pr
, s
);
4033 #define MINMAX_1(type, name, flags) \
4034 type type##_##name(type a, type b, float_status *s) \
4035 { return type##_minmax(a, b, s, flags); }
4037 #define MINMAX_2(type) \
4038 MINMAX_1(type, max, 0) \
4039 MINMAX_1(type, maxnum, minmax_isnum) \
4040 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4041 MINMAX_1(type, maximum_number, minmax_isnumber) \
4042 MINMAX_1(type, min, minmax_ismin) \
4043 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4044 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4045 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
4057 * Floating point compare
4060 static FloatRelation QEMU_FLATTEN
4061 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
4063 FloatParts64 pa
, pb
;
4065 float16_unpack_canonical(&pa
, a
, s
);
4066 float16_unpack_canonical(&pb
, b
, s
);
4067 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4070 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
4072 return float16_do_compare(a
, b
, s
, false);
4075 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
4077 return float16_do_compare(a
, b
, s
, true);
4080 static FloatRelation QEMU_SOFTFLOAT_ATTR
4081 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
4083 FloatParts64 pa
, pb
;
4085 float32_unpack_canonical(&pa
, a
, s
);
4086 float32_unpack_canonical(&pb
, b
, s
);
4087 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4090 static FloatRelation QEMU_FLATTEN
4091 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
4093 union_float32 ua
, ub
;
4098 if (QEMU_NO_HARDFLOAT
) {
4102 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
4103 if (isgreaterequal(ua
.h
, ub
.h
)) {
4104 if (isgreater(ua
.h
, ub
.h
)) {
4105 return float_relation_greater
;
4107 return float_relation_equal
;
4109 if (likely(isless(ua
.h
, ub
.h
))) {
4110 return float_relation_less
;
4113 * The only condition remaining is unordered.
4114 * Fall through to set flags.
4117 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4120 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
4122 return float32_hs_compare(a
, b
, s
, false);
4125 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
4127 return float32_hs_compare(a
, b
, s
, true);
4130 static FloatRelation QEMU_SOFTFLOAT_ATTR
4131 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
4133 FloatParts64 pa
, pb
;
4135 float64_unpack_canonical(&pa
, a
, s
);
4136 float64_unpack_canonical(&pb
, b
, s
);
4137 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4140 static FloatRelation QEMU_FLATTEN
4141 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
4143 union_float64 ua
, ub
;
4148 if (QEMU_NO_HARDFLOAT
) {
4152 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
4153 if (isgreaterequal(ua
.h
, ub
.h
)) {
4154 if (isgreater(ua
.h
, ub
.h
)) {
4155 return float_relation_greater
;
4157 return float_relation_equal
;
4159 if (likely(isless(ua
.h
, ub
.h
))) {
4160 return float_relation_less
;
4163 * The only condition remaining is unordered.
4164 * Fall through to set flags.
4167 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4170 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
4172 return float64_hs_compare(a
, b
, s
, false);
4175 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
4177 return float64_hs_compare(a
, b
, s
, true);
4180 static FloatRelation QEMU_FLATTEN
4181 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
4183 FloatParts64 pa
, pb
;
4185 bfloat16_unpack_canonical(&pa
, a
, s
);
4186 bfloat16_unpack_canonical(&pb
, b
, s
);
4187 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4190 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
4192 return bfloat16_do_compare(a
, b
, s
, false);
4195 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
4197 return bfloat16_do_compare(a
, b
, s
, true);
4200 static FloatRelation QEMU_FLATTEN
4201 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
4203 FloatParts128 pa
, pb
;
4205 float128_unpack_canonical(&pa
, a
, s
);
4206 float128_unpack_canonical(&pb
, b
, s
);
4207 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4210 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
4212 return float128_do_compare(a
, b
, s
, false);
4215 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
4217 return float128_do_compare(a
, b
, s
, true);
4220 static FloatRelation QEMU_FLATTEN
4221 floatx80_do_compare(floatx80 a
, floatx80 b
, float_status
*s
, bool is_quiet
)
4223 FloatParts128 pa
, pb
;
4225 if (!floatx80_unpack_canonical(&pa
, a
, s
) ||
4226 !floatx80_unpack_canonical(&pb
, b
, s
)) {
4227 return float_relation_unordered
;
4229 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4232 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*s
)
4234 return floatx80_do_compare(a
, b
, s
, false);
4237 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
, float_status
*s
)
4239 return floatx80_do_compare(a
, b
, s
, true);
4246 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
4250 float16_unpack_canonical(&p
, a
, status
);
4251 parts_scalbn(&p
, n
, status
);
4252 return float16_round_pack_canonical(&p
, status
);
4255 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
4259 float32_unpack_canonical(&p
, a
, status
);
4260 parts_scalbn(&p
, n
, status
);
4261 return float32_round_pack_canonical(&p
, status
);
4264 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
4268 float64_unpack_canonical(&p
, a
, status
);
4269 parts_scalbn(&p
, n
, status
);
4270 return float64_round_pack_canonical(&p
, status
);
4273 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
4277 bfloat16_unpack_canonical(&p
, a
, status
);
4278 parts_scalbn(&p
, n
, status
);
4279 return bfloat16_round_pack_canonical(&p
, status
);
4282 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
4286 float128_unpack_canonical(&p
, a
, status
);
4287 parts_scalbn(&p
, n
, status
);
4288 return float128_round_pack_canonical(&p
, status
);
4291 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
4295 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
4296 return floatx80_default_nan(status
);
4298 parts_scalbn(&p
, n
, status
);
4299 return floatx80_round_pack_canonical(&p
, status
);
4306 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
4310 float16_unpack_canonical(&p
, a
, status
);
4311 parts_sqrt(&p
, status
, &float16_params
);
4312 return float16_round_pack_canonical(&p
, status
);
4315 static float32 QEMU_SOFTFLOAT_ATTR
4316 soft_f32_sqrt(float32 a
, float_status
*status
)
4320 float32_unpack_canonical(&p
, a
, status
);
4321 parts_sqrt(&p
, status
, &float32_params
);
4322 return float32_round_pack_canonical(&p
, status
);
4325 static float64 QEMU_SOFTFLOAT_ATTR
4326 soft_f64_sqrt(float64 a
, float_status
*status
)
4330 float64_unpack_canonical(&p
, a
, status
);
4331 parts_sqrt(&p
, status
, &float64_params
);
4332 return float64_round_pack_canonical(&p
, status
);
4335 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
4337 union_float32 ua
, ur
;
4340 if (unlikely(!can_use_fpu(s
))) {
4344 float32_input_flush1(&ua
.s
, s
);
4345 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
4346 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4347 fpclassify(ua
.h
) == FP_ZERO
) ||
4351 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
4352 float32_is_neg(ua
.s
))) {
4359 return soft_f32_sqrt(ua
.s
, s
);
4362 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
4364 union_float64 ua
, ur
;
4367 if (unlikely(!can_use_fpu(s
))) {
4371 float64_input_flush1(&ua
.s
, s
);
4372 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
4373 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4374 fpclassify(ua
.h
) == FP_ZERO
) ||
4378 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
4379 float64_is_neg(ua
.s
))) {
4386 return soft_f64_sqrt(ua
.s
, s
);
4389 float64
float64r32_sqrt(float64 a
, float_status
*status
)
4393 float64_unpack_canonical(&p
, a
, status
);
4394 parts_sqrt(&p
, status
, &float64_params
);
4395 return float64r32_round_pack_canonical(&p
, status
);
4398 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
4402 bfloat16_unpack_canonical(&p
, a
, status
);
4403 parts_sqrt(&p
, status
, &bfloat16_params
);
4404 return bfloat16_round_pack_canonical(&p
, status
);
4407 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
4411 float128_unpack_canonical(&p
, a
, status
);
4412 parts_sqrt(&p
, status
, &float128_params
);
4413 return float128_round_pack_canonical(&p
, status
);
4416 floatx80
floatx80_sqrt(floatx80 a
, float_status
*s
)
4420 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
4421 return floatx80_default_nan(s
);
4423 parts_sqrt(&p
, s
, &floatx80_params
[s
->floatx80_rounding_precision
]);
4424 return floatx80_round_pack_canonical(&p
, s
);
4430 float32
float32_log2(float32 a
, float_status
*status
)
4434 float32_unpack_canonical(&p
, a
, status
);
4435 parts_log2(&p
, status
, &float32_params
);
4436 return float32_round_pack_canonical(&p
, status
);
4439 float64
float64_log2(float64 a
, float_status
*status
)
4443 float64_unpack_canonical(&p
, a
, status
);
4444 parts_log2(&p
, status
, &float64_params
);
4445 return float64_round_pack_canonical(&p
, status
);
4448 /*----------------------------------------------------------------------------
4449 | The pattern for a default generated NaN.
4450 *----------------------------------------------------------------------------*/
4452 float16
float16_default_nan(float_status
*status
)
4456 parts_default_nan(&p
, status
);
4457 p
.frac
>>= float16_params
.frac_shift
;
4458 return float16_pack_raw(&p
);
4461 float32
float32_default_nan(float_status
*status
)
4465 parts_default_nan(&p
, status
);
4466 p
.frac
>>= float32_params
.frac_shift
;
4467 return float32_pack_raw(&p
);
4470 float64
float64_default_nan(float_status
*status
)
4474 parts_default_nan(&p
, status
);
4475 p
.frac
>>= float64_params
.frac_shift
;
4476 return float64_pack_raw(&p
);
4479 float128
float128_default_nan(float_status
*status
)
4483 parts_default_nan(&p
, status
);
4484 frac_shr(&p
, float128_params
.frac_shift
);
4485 return float128_pack_raw(&p
);
4488 bfloat16
bfloat16_default_nan(float_status
*status
)
4492 parts_default_nan(&p
, status
);
4493 p
.frac
>>= bfloat16_params
.frac_shift
;
4494 return bfloat16_pack_raw(&p
);
4497 /*----------------------------------------------------------------------------
4498 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4499 *----------------------------------------------------------------------------*/
4501 float16
float16_silence_nan(float16 a
, float_status
*status
)
4505 float16_unpack_raw(&p
, a
);
4506 p
.frac
<<= float16_params
.frac_shift
;
4507 parts_silence_nan(&p
, status
);
4508 p
.frac
>>= float16_params
.frac_shift
;
4509 return float16_pack_raw(&p
);
4512 float32
float32_silence_nan(float32 a
, float_status
*status
)
4516 float32_unpack_raw(&p
, a
);
4517 p
.frac
<<= float32_params
.frac_shift
;
4518 parts_silence_nan(&p
, status
);
4519 p
.frac
>>= float32_params
.frac_shift
;
4520 return float32_pack_raw(&p
);
4523 float64
float64_silence_nan(float64 a
, float_status
*status
)
4527 float64_unpack_raw(&p
, a
);
4528 p
.frac
<<= float64_params
.frac_shift
;
4529 parts_silence_nan(&p
, status
);
4530 p
.frac
>>= float64_params
.frac_shift
;
4531 return float64_pack_raw(&p
);
4534 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
4538 bfloat16_unpack_raw(&p
, a
);
4539 p
.frac
<<= bfloat16_params
.frac_shift
;
4540 parts_silence_nan(&p
, status
);
4541 p
.frac
>>= bfloat16_params
.frac_shift
;
4542 return bfloat16_pack_raw(&p
);
4545 float128
float128_silence_nan(float128 a
, float_status
*status
)
4549 float128_unpack_raw(&p
, a
);
4550 frac_shl(&p
, float128_params
.frac_shift
);
4551 parts_silence_nan(&p
, status
);
4552 frac_shr(&p
, float128_params
.frac_shift
);
4553 return float128_pack_raw(&p
);
4556 /*----------------------------------------------------------------------------
4557 | If `a' is denormal and we are in flush-to-zero mode then set the
4558 | input-denormal exception and return zero. Otherwise just return the value.
4559 *----------------------------------------------------------------------------*/
4561 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
4563 if (p
.exp
== 0 && p
.frac
!= 0) {
4564 float_raise(float_flag_input_denormal
, status
);
4571 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
4573 if (status
->flush_inputs_to_zero
) {
4576 float16_unpack_raw(&p
, a
);
4577 if (parts_squash_denormal(p
, status
)) {
4578 return float16_set_sign(float16_zero
, p
.sign
);
4584 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4586 if (status
->flush_inputs_to_zero
) {
4589 float32_unpack_raw(&p
, a
);
4590 if (parts_squash_denormal(p
, status
)) {
4591 return float32_set_sign(float32_zero
, p
.sign
);
4597 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4599 if (status
->flush_inputs_to_zero
) {
4602 float64_unpack_raw(&p
, a
);
4603 if (parts_squash_denormal(p
, status
)) {
4604 return float64_set_sign(float64_zero
, p
.sign
);
4610 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4612 if (status
->flush_inputs_to_zero
) {
4615 bfloat16_unpack_raw(&p
, a
);
4616 if (parts_squash_denormal(p
, status
)) {
4617 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4623 /*----------------------------------------------------------------------------
4624 | Normalizes the subnormal extended double-precision floating-point value
4625 | represented by the denormalized significand `aSig'. The normalized exponent
4626 | and significand are stored at the locations pointed to by `zExpPtr' and
4627 | `zSigPtr', respectively.
4628 *----------------------------------------------------------------------------*/
4630 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4635 shiftCount
= clz64(aSig
);
4636 *zSigPtr
= aSig
<<shiftCount
;
4637 *zExpPtr
= 1 - shiftCount
;
4640 /*----------------------------------------------------------------------------
4641 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4642 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4643 | and returns the proper extended double-precision floating-point value
4644 | corresponding to the abstract input. Ordinarily, the abstract value is
4645 | rounded and packed into the extended double-precision format, with the
4646 | inexact exception raised if the abstract input cannot be represented
4647 | exactly. However, if the abstract value is too large, the overflow and
4648 | inexact exceptions are raised and an infinity or maximal finite value is
4649 | returned. If the abstract value is too small, the input value is rounded to
4650 | a subnormal number, and the underflow and inexact exceptions are raised if
4651 | the abstract input cannot be represented exactly as a subnormal extended
4652 | double-precision floating-point number.
4653 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4654 | the result is rounded to the same number of bits as single or double
4655 | precision, respectively. Otherwise, the result is rounded to the full
4656 | precision of the extended double-precision format.
4657 | The input significand must be normalized or smaller. If the input
4658 | significand is not normalized, `zExp' must be 0; in that case, the result
4659 | returned is a subnormal number, and it must not require rounding. The
4660 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4661 | Floating-Point Arithmetic.
4662 *----------------------------------------------------------------------------*/
4664 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4665 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4666 float_status
*status
)
4668 FloatRoundMode roundingMode
;
4669 bool roundNearestEven
, increment
, isTiny
;
4670 int64_t roundIncrement
, roundMask
, roundBits
;
4672 roundingMode
= status
->float_rounding_mode
;
4673 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4674 switch (roundingPrecision
) {
4675 case floatx80_precision_x
:
4677 case floatx80_precision_d
:
4678 roundIncrement
= UINT64_C(0x0000000000000400);
4679 roundMask
= UINT64_C(0x00000000000007FF);
4681 case floatx80_precision_s
:
4682 roundIncrement
= UINT64_C(0x0000008000000000);
4683 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4686 g_assert_not_reached();
4688 zSig0
|= ( zSig1
!= 0 );
4689 switch (roundingMode
) {
4690 case float_round_nearest_even
:
4691 case float_round_ties_away
:
4693 case float_round_to_zero
:
4696 case float_round_up
:
4697 roundIncrement
= zSign
? 0 : roundMask
;
4699 case float_round_down
:
4700 roundIncrement
= zSign
? roundMask
: 0;
4705 roundBits
= zSig0
& roundMask
;
4706 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4707 if ( ( 0x7FFE < zExp
)
4708 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4713 if (status
->flush_to_zero
) {
4714 float_raise(float_flag_output_denormal
, status
);
4715 return packFloatx80(zSign
, 0, 0);
4717 isTiny
= status
->tininess_before_rounding
4719 || (zSig0
<= zSig0
+ roundIncrement
);
4720 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4722 roundBits
= zSig0
& roundMask
;
4723 if (isTiny
&& roundBits
) {
4724 float_raise(float_flag_underflow
, status
);
4727 float_raise(float_flag_inexact
, status
);
4729 zSig0
+= roundIncrement
;
4730 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4731 roundIncrement
= roundMask
+ 1;
4732 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4733 roundMask
|= roundIncrement
;
4735 zSig0
&= ~ roundMask
;
4736 return packFloatx80( zSign
, zExp
, zSig0
);
4740 float_raise(float_flag_inexact
, status
);
4742 zSig0
+= roundIncrement
;
4743 if ( zSig0
< roundIncrement
) {
4745 zSig0
= UINT64_C(0x8000000000000000);
4747 roundIncrement
= roundMask
+ 1;
4748 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4749 roundMask
|= roundIncrement
;
4751 zSig0
&= ~ roundMask
;
4752 if ( zSig0
== 0 ) zExp
= 0;
4753 return packFloatx80( zSign
, zExp
, zSig0
);
4755 switch (roundingMode
) {
4756 case float_round_nearest_even
:
4757 case float_round_ties_away
:
4758 increment
= ((int64_t)zSig1
< 0);
4760 case float_round_to_zero
:
4763 case float_round_up
:
4764 increment
= !zSign
&& zSig1
;
4766 case float_round_down
:
4767 increment
= zSign
&& zSig1
;
4772 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4773 if ( ( 0x7FFE < zExp
)
4774 || ( ( zExp
== 0x7FFE )
4775 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4781 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4782 if ( ( roundingMode
== float_round_to_zero
)
4783 || ( zSign
&& ( roundingMode
== float_round_up
) )
4784 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4786 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4788 return packFloatx80(zSign
,
4789 floatx80_infinity_high
,
4790 floatx80_infinity_low
);
4793 isTiny
= status
->tininess_before_rounding
4796 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4797 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4799 if (isTiny
&& zSig1
) {
4800 float_raise(float_flag_underflow
, status
);
4803 float_raise(float_flag_inexact
, status
);
4805 switch (roundingMode
) {
4806 case float_round_nearest_even
:
4807 case float_round_ties_away
:
4808 increment
= ((int64_t)zSig1
< 0);
4810 case float_round_to_zero
:
4813 case float_round_up
:
4814 increment
= !zSign
&& zSig1
;
4816 case float_round_down
:
4817 increment
= zSign
&& zSig1
;
4824 if (!(zSig1
<< 1) && roundNearestEven
) {
4827 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4829 return packFloatx80( zSign
, zExp
, zSig0
);
4833 float_raise(float_flag_inexact
, status
);
4839 zSig0
= UINT64_C(0x8000000000000000);
4842 if (!(zSig1
<< 1) && roundNearestEven
) {
4848 if ( zSig0
== 0 ) zExp
= 0;
4850 return packFloatx80( zSign
, zExp
, zSig0
);
4854 /*----------------------------------------------------------------------------
4855 | Takes an abstract floating-point value having sign `zSign', exponent
4856 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4857 | and returns the proper extended double-precision floating-point value
4858 | corresponding to the abstract input. This routine is just like
4859 | `roundAndPackFloatx80' except that the input significand does not have to be
4861 *----------------------------------------------------------------------------*/
4863 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
4864 bool zSign
, int32_t zExp
,
4865 uint64_t zSig0
, uint64_t zSig1
,
4866 float_status
*status
)
4875 shiftCount
= clz64(zSig0
);
4876 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4878 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
4879 zSig0
, zSig1
, status
);
4883 /*----------------------------------------------------------------------------
4884 | Returns the binary exponential of the single-precision floating-point value
4885 | `a'. The operation is performed according to the IEC/IEEE Standard for
4886 | Binary Floating-Point Arithmetic.
4888 | Uses the following identities:
4890 | 1. -------------------------------------------------------------------------
4894 | 2. -------------------------------------------------------------------------
4897 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4899 *----------------------------------------------------------------------------*/
4901 static const float64 float32_exp2_coefficients
[15] =
4903 const_float64( 0x3ff0000000000000ll
), /* 1 */
4904 const_float64( 0x3fe0000000000000ll
), /* 2 */
4905 const_float64( 0x3fc5555555555555ll
), /* 3 */
4906 const_float64( 0x3fa5555555555555ll
), /* 4 */
4907 const_float64( 0x3f81111111111111ll
), /* 5 */
4908 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
4909 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
4910 const_float64( 0x3efa01a01a01a01all
), /* 8 */
4911 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
4912 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
4913 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
4914 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
4915 const_float64( 0x3de6124613a86d09ll
), /* 13 */
4916 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
4917 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
4920 float32
float32_exp2(float32 a
, float_status
*status
)
4922 FloatParts64 xp
, xnp
, tp
, rp
;
4925 float32_unpack_canonical(&xp
, a
, status
);
4926 if (unlikely(xp
.cls
!= float_class_normal
)) {
4928 case float_class_snan
:
4929 case float_class_qnan
:
4930 parts_return_nan(&xp
, status
);
4931 return float32_round_pack_canonical(&xp
, status
);
4932 case float_class_inf
:
4933 return xp
.sign
? float32_zero
: a
;
4934 case float_class_zero
:
4939 g_assert_not_reached();
4942 float_raise(float_flag_inexact
, status
);
4944 float64_unpack_canonical(&tp
, float64_ln2
, status
);
4945 xp
= *parts_mul(&xp
, &tp
, status
);
4948 float64_unpack_canonical(&rp
, float64_one
, status
);
4949 for (i
= 0 ; i
< 15 ; i
++) {
4950 float64_unpack_canonical(&tp
, float32_exp2_coefficients
[i
], status
);
4951 rp
= *parts_muladd(&tp
, &xp
, &rp
, 0, status
);
4952 xnp
= *parts_mul(&xnp
, &xp
, status
);
4955 return float32_round_pack_canonical(&rp
, status
);
4958 /*----------------------------------------------------------------------------
4959 | Rounds the extended double-precision floating-point value `a'
4960 | to the precision provided by floatx80_rounding_precision and returns the
4961 | result as an extended double-precision floating-point value.
4962 | The operation is performed according to the IEC/IEEE Standard for Binary
4963 | Floating-Point Arithmetic.
4964 *----------------------------------------------------------------------------*/
4966 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
4970 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
4971 return floatx80_default_nan(status
);
4973 return floatx80_round_pack_canonical(&p
, status
);
4976 static void __attribute__((constructor
)) softfloat_init(void)
4978 union_float64 ua
, ub
, uc
, ur
;
4980 if (QEMU_NO_HARDFLOAT
) {
4984 * Test that the host's FMA is not obviously broken. For example,
4985 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4986 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4988 ua
.s
= 0x0020000000000001ULL
;
4989 ub
.s
= 0x3ca0000000000000ULL
;
4990 uc
.s
= 0x0020000000000000ULL
;
4991 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
4992 if (ur
.s
!= 0x0020000000000001ULL
) {
4993 force_soft_fma
= true;