4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
409 typedef enum __attribute__ ((__packed__
)) {
410 float_class_unclassified
,
414 float_class_qnan
, /* all NaNs from here */
418 #define float_cmask(bit) (1u << (bit))
421 float_cmask_zero
= float_cmask(float_class_zero
),
422 float_cmask_normal
= float_cmask(float_class_normal
),
423 float_cmask_inf
= float_cmask(float_class_inf
),
424 float_cmask_qnan
= float_cmask(float_class_qnan
),
425 float_cmask_snan
= float_cmask(float_class_snan
),
427 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
428 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
431 /* Flags for parts_minmax. */
433 /* Set for minimum; clear for maximum. */
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
441 /* Simple helpers for checking if, or what kind of, NaN we have */
442 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
444 return unlikely(c
>= float_class_qnan
);
447 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
449 return c
== float_class_snan
;
452 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
454 return c
== float_class_qnan
;
458 * Structure holding all of the decomposed parts of a float.
459 * The exponent is unbiased and the fraction is normalized.
461 * The fraction words are stored in big-endian word ordering,
462 * so that truncation from a larger format to a smaller format
463 * can be done simply by ignoring subsequent elements.
471 /* Routines that know the structure may reference the singular name. */
474 * Routines expanded with multiple structures reference "hi" and "lo"
475 * depending on the operation. In FloatParts64, "hi" and "lo" are
476 * both the same word and aliased here.
496 uint64_t frac_hm
; /* high-middle */
497 uint64_t frac_lm
; /* low-middle */
501 /* These apply to the most significant word of each FloatPartsN. */
502 #define DECOMPOSED_BINARY_POINT 63
503 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
505 /* Structure holding all of the relevant parameters for a format.
506 * exp_size: the size of the exponent field
507 * exp_bias: the offset applied to the exponent field
508 * exp_max: the maximum normalised exponent
509 * frac_size: the size of the fraction field
510 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
511 * The following are computed based the size of fraction
512 * round_mask: bits below lsb which must be rounded
513 * The following optional modifiers are available:
514 * arm_althp: handle ARM Alternative Half Precision
526 /* Expand fields based on the size of exponent and fraction */
527 #define FLOAT_PARAMS_(E) \
529 .exp_bias = ((1 << E) - 1) >> 1, \
530 .exp_max = (1 << E) - 1
532 #define FLOAT_PARAMS(E, F) \
535 .frac_shift = (-F - 1) & 63, \
536 .round_mask = (1ull << ((-F - 1) & 63)) - 1
538 static const FloatFmt float16_params
= {
542 static const FloatFmt float16_params_ahp
= {
547 static const FloatFmt bfloat16_params
= {
551 static const FloatFmt float32_params
= {
555 static const FloatFmt float64_params
= {
559 static const FloatFmt float128_params
= {
560 FLOAT_PARAMS(15, 112)
563 #define FLOATX80_PARAMS(R) \
565 .frac_size = R == 64 ? 63 : R, \
567 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
569 static const FloatFmt floatx80_params
[3] = {
570 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
571 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
572 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
575 /* Unpack a float to parts, but do not canonicalize. */
576 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
578 const int f_size
= fmt
->frac_size
;
579 const int e_size
= fmt
->exp_size
;
581 *r
= (FloatParts64
) {
582 .cls
= float_class_unclassified
,
583 .sign
= extract64(raw
, f_size
+ e_size
, 1),
584 .exp
= extract64(raw
, f_size
, e_size
),
585 .frac
= extract64(raw
, 0, f_size
)
589 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
591 unpack_raw64(p
, &float16_params
, f
);
594 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
596 unpack_raw64(p
, &bfloat16_params
, f
);
599 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
601 unpack_raw64(p
, &float32_params
, f
);
604 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
606 unpack_raw64(p
, &float64_params
, f
);
609 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
611 *p
= (FloatParts128
) {
612 .cls
= float_class_unclassified
,
613 .sign
= extract32(f
.high
, 15, 1),
614 .exp
= extract32(f
.high
, 0, 15),
619 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
621 const int f_size
= float128_params
.frac_size
- 64;
622 const int e_size
= float128_params
.exp_size
;
624 *p
= (FloatParts128
) {
625 .cls
= float_class_unclassified
,
626 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
627 .exp
= extract64(f
.high
, f_size
, e_size
),
628 .frac_hi
= extract64(f
.high
, 0, f_size
),
633 /* Pack a float from parts, but do not canonicalize. */
634 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
636 const int f_size
= fmt
->frac_size
;
637 const int e_size
= fmt
->exp_size
;
640 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
641 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
642 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
646 static inline float16
float16_pack_raw(const FloatParts64
*p
)
648 return make_float16(pack_raw64(p
, &float16_params
));
651 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
653 return pack_raw64(p
, &bfloat16_params
);
656 static inline float32
float32_pack_raw(const FloatParts64
*p
)
658 return make_float32(pack_raw64(p
, &float32_params
));
661 static inline float64
float64_pack_raw(const FloatParts64
*p
)
663 return make_float64(pack_raw64(p
, &float64_params
));
666 static float128
float128_pack_raw(const FloatParts128
*p
)
668 const int f_size
= float128_params
.frac_size
- 64;
669 const int e_size
= float128_params
.exp_size
;
672 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
673 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
674 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
675 return make_float128(hi
, p
->frac_lo
);
678 /*----------------------------------------------------------------------------
679 | Functions and definitions to determine: (1) whether tininess for underflow
680 | is detected before or after rounding by default, (2) what (if anything)
681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
683 | are propagated from function inputs to output. These details are target-
685 *----------------------------------------------------------------------------*/
686 #include "softfloat-specialize.c.inc"
688 #define PARTS_GENERIC_64_128(NAME, P) \
689 _Generic((P), FloatParts64 *: parts64_##NAME, \
690 FloatParts128 *: parts128_##NAME)
692 #define PARTS_GENERIC_64_128_256(NAME, P) \
693 _Generic((P), FloatParts64 *: parts64_##NAME, \
694 FloatParts128 *: parts128_##NAME, \
695 FloatParts256 *: parts256_##NAME)
697 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
698 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
700 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
701 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
703 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
705 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
707 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
710 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
712 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
713 FloatParts64
*c
, float_status
*s
,
714 int ab_mask
, int abc_mask
);
715 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
719 int ab_mask
, int abc_mask
);
721 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
722 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
724 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
725 const FloatFmt
*fmt
);
726 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
727 const FloatFmt
*fmt
);
729 #define parts_canonicalize(A, S, F) \
730 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
732 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
733 const FloatFmt
*fmt
);
734 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
735 const FloatFmt
*fmt
);
737 #define parts_uncanon_normal(A, S, F) \
738 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
740 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
741 const FloatFmt
*fmt
);
742 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
743 const FloatFmt
*fmt
);
745 #define parts_uncanon(A, S, F) \
746 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
748 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
749 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
750 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
752 #define parts_add_normal(A, B) \
753 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
755 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
756 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
757 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
759 #define parts_sub_normal(A, B) \
760 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
762 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
763 float_status
*s
, bool subtract
);
764 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
765 float_status
*s
, bool subtract
);
767 #define parts_addsub(A, B, S, Z) \
768 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
770 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
772 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
775 #define parts_mul(A, B, S) \
776 PARTS_GENERIC_64_128(mul, A)(A, B, S)
778 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
779 FloatParts64
*c
, int flags
,
781 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
782 FloatParts128
*c
, int flags
,
785 #define parts_muladd(A, B, C, Z, S) \
786 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
788 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
790 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
793 #define parts_div(A, B, S) \
794 PARTS_GENERIC_64_128(div, A)(A, B, S)
796 static FloatParts64
*parts64_modrem(FloatParts64
*a
, FloatParts64
*b
,
797 uint64_t *mod_quot
, float_status
*s
);
798 static FloatParts128
*parts128_modrem(FloatParts128
*a
, FloatParts128
*b
,
799 uint64_t *mod_quot
, float_status
*s
);
801 #define parts_modrem(A, B, Q, S) \
802 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
804 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
805 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
807 #define parts_sqrt(A, S, F) \
808 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
810 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
811 int scale
, int frac_size
);
812 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
813 int scale
, int frac_size
);
815 #define parts_round_to_int_normal(A, R, C, F) \
816 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
818 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
819 int scale
, float_status
*s
,
820 const FloatFmt
*fmt
);
821 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
822 int scale
, float_status
*s
,
823 const FloatFmt
*fmt
);
825 #define parts_round_to_int(A, R, C, S, F) \
826 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
828 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
829 int scale
, int64_t min
, int64_t max
,
831 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
832 int scale
, int64_t min
, int64_t max
,
835 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
836 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
838 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
839 int scale
, uint64_t max
,
841 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
842 int scale
, uint64_t max
,
845 #define parts_float_to_uint(P, R, Z, M, S) \
846 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
848 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
849 int scale
, float_status
*s
);
850 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
851 int scale
, float_status
*s
);
853 #define parts_sint_to_float(P, I, Z, S) \
854 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
856 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
857 int scale
, float_status
*s
);
858 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
859 int scale
, float_status
*s
);
861 #define parts_uint_to_float(P, I, Z, S) \
862 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
864 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
865 float_status
*s
, int flags
);
866 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
867 float_status
*s
, int flags
);
869 #define parts_minmax(A, B, S, F) \
870 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
872 static int parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
873 float_status
*s
, bool q
);
874 static int parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
875 float_status
*s
, bool q
);
877 #define parts_compare(A, B, S, Q) \
878 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
880 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
881 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
883 #define parts_scalbn(A, N, S) \
884 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
886 static void parts64_log2(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
887 static void parts128_log2(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
889 #define parts_log2(A, S, F) \
890 PARTS_GENERIC_64_128(log2, A)(A, S, F)
893 * Helper functions for softfloat-parts.c.inc, per-size operations.
896 #define FRAC_GENERIC_64_128(NAME, P) \
897 _Generic((P), FloatParts64 *: frac64_##NAME, \
898 FloatParts128 *: frac128_##NAME)
900 #define FRAC_GENERIC_64_128_256(NAME, P) \
901 _Generic((P), FloatParts64 *: frac64_##NAME, \
902 FloatParts128 *: frac128_##NAME, \
903 FloatParts256 *: frac256_##NAME)
905 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
907 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
910 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
913 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
914 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
918 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
921 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
922 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
923 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
924 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
928 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
930 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
932 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
935 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
937 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
938 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
941 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
943 static void frac64_allones(FloatParts64
*a
)
948 static void frac128_allones(FloatParts128
*a
)
950 a
->frac_hi
= a
->frac_lo
= -1;
953 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
955 static int frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
957 return a
->frac
== b
->frac
? 0 : a
->frac
< b
->frac
? -1 : 1;
960 static int frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
962 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
964 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
969 return ta
< tb
? -1 : 1;
972 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
974 static void frac64_clear(FloatParts64
*a
)
979 static void frac128_clear(FloatParts128
*a
)
981 a
->frac_hi
= a
->frac_lo
= 0;
984 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
986 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
988 uint64_t n1
, n0
, r
, q
;
992 * We want a 2*N / N-bit division to produce exactly an N-bit
993 * result, so that we do not lose any precision and so that we
994 * do not have to renormalize afterward. If A.frac < B.frac,
995 * then division would produce an (N-1)-bit result; shift A left
996 * by one to produce the an N-bit result, and return true to
997 * decrement the exponent to match.
999 * The udiv_qrnnd algorithm that we're using requires normalization,
1000 * i.e. the msb of the denominator must be set, which is already true.
1002 ret
= a
->frac
< b
->frac
;
1010 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1012 /* Set lsb if there is a remainder, to set inexact. */
1013 a
->frac
= q
| (r
!= 0);
1018 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1020 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1021 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1024 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1025 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1027 ret
= lt128(a0
, a1
, b0
, b1
);
1029 a1
= shr_double(a0
, a1
, 1);
1033 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1034 q0
= estimateDiv128To64(a0
, a1
, b0
);
1037 * Estimate is high because B1 was not included (unless B1 == 0).
1038 * Reduce quotient and increase remainder until remainder is non-negative.
1039 * This loop will execute 0 to 2 times.
1041 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1042 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1045 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1048 /* Repeat using the remainder, producing a second word of quotient. */
1049 q1
= estimateDiv128To64(r1
, r2
, b0
);
1050 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1051 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1054 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1057 /* Any remainder indicates inexact; set sticky bit. */
1058 q1
|= (r2
| r3
) != 0;
1065 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1067 static bool frac64_eqz(FloatParts64
*a
)
1069 return a
->frac
== 0;
1072 static bool frac128_eqz(FloatParts128
*a
)
1074 return (a
->frac_hi
| a
->frac_lo
) == 0;
1077 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1079 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1081 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1084 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1086 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1087 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1090 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1092 static void frac64_neg(FloatParts64
*a
)
1097 static void frac128_neg(FloatParts128
*a
)
1100 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1101 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1104 static void frac256_neg(FloatParts256
*a
)
1107 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1108 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1109 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1110 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1113 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1115 static int frac64_normalize(FloatParts64
*a
)
1118 int shift
= clz64(a
->frac
);
1125 static int frac128_normalize(FloatParts128
*a
)
1128 int shl
= clz64(a
->frac_hi
);
1129 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1132 } else if (a
->frac_lo
) {
1133 int shl
= clz64(a
->frac_lo
);
1134 a
->frac_hi
= a
->frac_lo
<< shl
;
1141 static int frac256_normalize(FloatParts256
*a
)
1143 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1144 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1156 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1159 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1162 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1165 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1175 a0
= shl_double(a0
, a1
, shl
);
1176 a1
= shl_double(a1
, a2
, shl
);
1177 a2
= shl_double(a2
, a3
, shl
);
1188 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1190 static void frac64_modrem(FloatParts64
*a
, FloatParts64
*b
, uint64_t *mod_quot
)
1192 uint64_t a0
, a1
, b0
, t0
, t1
, q
, quot
;
1193 int exp_diff
= a
->exp
- b
->exp
;
1199 if (exp_diff
< -1) {
1205 if (exp_diff
== -1) {
1211 quot
= q
= b0
<= a0
;
1217 while (exp_diff
> 0) {
1218 q
= estimateDiv128To64(a0
, a1
, b0
);
1219 q
= q
> 2 ? q
- 2 : 0;
1220 mul64To128(b0
, q
, &t0
, &t1
);
1221 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1222 shortShift128Left(a0
, a1
, 62, &a0
, &a1
);
1224 quot
= (quot
<< 62) + q
;
1229 q
= estimateDiv128To64(a0
, a1
, b0
);
1230 q
= q
> 2 ? (q
- 2) >> (64 - exp_diff
) : 0;
1231 mul64To128(b0
, q
<< (64 - exp_diff
), &t0
, &t1
);
1232 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1233 shortShift128Left(0, b0
, 64 - exp_diff
, &t0
, &t1
);
1234 while (le128(t0
, t1
, a0
, a1
)) {
1236 sub128(a0
, a1
, t0
, t1
, &a0
, &a1
);
1238 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1247 sub128(t0
, t1
, a0
, a1
, &t0
, &t1
);
1248 if (lt128(t0
, t1
, a0
, a1
) ||
1249 (eq128(t0
, t1
, a0
, a1
) && (q
& 1))) {
1258 shortShift128Left(a0
, a1
, shift
, &a0
, &a1
);
1259 } else if (likely(a1
)) {
1265 a
->cls
= float_class_zero
;
1269 a
->exp
= b
->exp
+ exp_diff
- shift
;
1270 a
->frac
= a0
| (a1
!= 0);
1273 static void frac128_modrem(FloatParts128
*a
, FloatParts128
*b
,
1276 uint64_t a0
, a1
, a2
, b0
, b1
, t0
, t1
, t2
, q
, quot
;
1277 int exp_diff
= a
->exp
- b
->exp
;
1284 if (exp_diff
< -1) {
1290 if (exp_diff
== -1) {
1291 shift128Right(a0
, a1
, 1, &a0
, &a1
);
1298 quot
= q
= le128(b0
, b1
, a0
, a1
);
1300 sub128(a0
, a1
, b0
, b1
, &a0
, &a1
);
1304 while (exp_diff
> 0) {
1305 q
= estimateDiv128To64(a0
, a1
, b0
);
1306 q
= q
> 4 ? q
- 4 : 0;
1307 mul128By64To192(b0
, b1
, q
, &t0
, &t1
, &t2
);
1308 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1309 shortShift192Left(a0
, a1
, a2
, 61, &a0
, &a1
, &a2
);
1311 quot
= (quot
<< 61) + q
;
1316 q
= estimateDiv128To64(a0
, a1
, b0
);
1317 q
= q
> 4 ? (q
- 4) >> (64 - exp_diff
) : 0;
1318 mul128By64To192(b0
, b1
, q
<< (64 - exp_diff
), &t0
, &t1
, &t2
);
1319 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1320 shortShift192Left(0, b0
, b1
, 64 - exp_diff
, &t0
, &t1
, &t2
);
1321 while (le192(t0
, t1
, t2
, a0
, a1
, a2
)) {
1323 sub192(a0
, a1
, a2
, t0
, t1
, t2
, &a0
, &a1
, &a2
);
1325 quot
= (exp_diff
< 64 ? quot
<< exp_diff
: 0) + q
;
1335 sub192(t0
, t1
, t2
, a0
, a1
, a2
, &t0
, &t1
, &t2
);
1336 if (lt192(t0
, t1
, t2
, a0
, a1
, a2
) ||
1337 (eq192(t0
, t1
, t2
, a0
, a1
, a2
) && (q
& 1))) {
1347 shortShift192Left(a0
, a1
, a2
, shift
, &a0
, &a1
, &a2
);
1348 } else if (likely(a1
)) {
1350 shortShift128Left(a1
, a2
, shift
, &a0
, &a1
);
1353 } else if (likely(a2
)) {
1359 a
->cls
= float_class_zero
;
1363 a
->exp
= b
->exp
+ exp_diff
- shift
;
1365 a
->frac_lo
= a1
| (a2
!= 0);
1368 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1370 static void frac64_shl(FloatParts64
*a
, int c
)
1375 static void frac128_shl(FloatParts128
*a
, int c
)
1377 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1385 a0
= shl_double(a0
, a1
, c
);
1393 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1395 static void frac64_shr(FloatParts64
*a
, int c
)
1400 static void frac128_shr(FloatParts128
*a
, int c
)
1402 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1410 a1
= shr_double(a0
, a1
, c
);
1418 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1420 static void frac64_shrjam(FloatParts64
*a
, int c
)
1422 uint64_t a0
= a
->frac
;
1424 if (likely(c
!= 0)) {
1425 if (likely(c
< 64)) {
1426 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1434 static void frac128_shrjam(FloatParts128
*a
, int c
)
1436 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1437 uint64_t sticky
= 0;
1439 if (unlikely(c
== 0)) {
1441 } else if (likely(c
< 64)) {
1443 } else if (likely(c
< 128)) {
1457 sticky
|= shr_double(a1
, 0, c
);
1458 a1
= shr_double(a0
, a1
, c
);
1462 a
->frac_lo
= a1
| (sticky
!= 0);
1466 static void frac256_shrjam(FloatParts256
*a
, int c
)
1468 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1469 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1470 uint64_t sticky
= 0;
1472 if (unlikely(c
== 0)) {
1474 } else if (likely(c
< 64)) {
1476 } else if (likely(c
< 256)) {
1477 if (unlikely(c
& 128)) {
1479 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1481 if (unlikely(c
& 64)) {
1483 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1490 sticky
= a0
| a1
| a2
| a3
;
1491 a0
= a1
= a2
= a3
= 0;
1495 sticky
|= shr_double(a3
, 0, c
);
1496 a3
= shr_double(a2
, a3
, c
);
1497 a2
= shr_double(a1
, a2
, c
);
1498 a1
= shr_double(a0
, a1
, c
);
1502 a
->frac_lo
= a3
| (sticky
!= 0);
1508 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1510 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1512 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1515 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1518 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1519 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1523 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1526 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1527 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1528 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1529 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1533 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1535 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1537 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1540 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1542 r
->frac_hi
= a
->frac_hi
;
1543 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1546 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1548 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1550 r
->frac_hi
= a
->frac
;
1554 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1556 r
->frac_hi
= a
->frac_hi
;
1557 r
->frac_hm
= a
->frac_lo
;
1562 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1565 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1566 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1567 * and thus MIT licenced.
1569 static const uint16_t rsqrt_tab
[128] = {
1570 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1571 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1572 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1573 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1574 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1575 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1576 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1577 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1578 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1579 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1580 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1581 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1582 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1583 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1584 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1585 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1588 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1589 #define FloatPartsN glue(FloatParts,N)
1590 #define FloatPartsW glue(FloatParts,W)
1595 #include "softfloat-parts-addsub.c.inc"
1596 #include "softfloat-parts.c.inc"
1603 #include "softfloat-parts-addsub.c.inc"
1604 #include "softfloat-parts.c.inc"
1610 #include "softfloat-parts-addsub.c.inc"
1619 * Pack/unpack routines with a specific FloatFmt.
1622 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1623 float_status
*s
, const FloatFmt
*params
)
1625 float16_unpack_raw(p
, f
);
1626 parts_canonicalize(p
, s
, params
);
1629 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1632 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1635 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1638 bfloat16_unpack_raw(p
, f
);
1639 parts_canonicalize(p
, s
, &bfloat16_params
);
1642 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1644 const FloatFmt
*params
)
1646 parts_uncanon(p
, s
, params
);
1647 return float16_pack_raw(p
);
1650 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1653 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1656 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1659 parts_uncanon(p
, s
, &bfloat16_params
);
1660 return bfloat16_pack_raw(p
);
1663 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1666 float32_unpack_raw(p
, f
);
1667 parts_canonicalize(p
, s
, &float32_params
);
1670 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1673 parts_uncanon(p
, s
, &float32_params
);
1674 return float32_pack_raw(p
);
1677 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1680 float64_unpack_raw(p
, f
);
1681 parts_canonicalize(p
, s
, &float64_params
);
1684 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1687 parts_uncanon(p
, s
, &float64_params
);
1688 return float64_pack_raw(p
);
1691 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1694 float128_unpack_raw(p
, f
);
1695 parts_canonicalize(p
, s
, &float128_params
);
1698 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1701 parts_uncanon(p
, s
, &float128_params
);
1702 return float128_pack_raw(p
);
1705 /* Returns false if the encoding is invalid. */
1706 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1709 /* Ensure rounding precision is set before beginning. */
1710 switch (s
->floatx80_rounding_precision
) {
1711 case floatx80_precision_x
:
1712 case floatx80_precision_d
:
1713 case floatx80_precision_s
:
1716 g_assert_not_reached();
1719 if (unlikely(floatx80_invalid_encoding(f
))) {
1720 float_raise(float_flag_invalid
, s
);
1724 floatx80_unpack_raw(p
, f
);
1726 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1727 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1729 /* The explicit integer bit is ignored, after invalid checks. */
1730 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1731 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1732 : parts_is_snan_frac(p
->frac_hi
, s
)
1733 ? float_class_snan
: float_class_qnan
);
1738 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1741 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1746 case float_class_normal
:
1747 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1748 parts_uncanon_normal(p
, s
, fmt
);
1756 frac_truncjam(&p64
, p
);
1757 parts_uncanon_normal(&p64
, s
, fmt
);
1761 if (exp
!= fmt
->exp_max
) {
1764 /* rounded to inf -- fall through to set frac correctly */
1766 case float_class_inf
:
1767 /* x86 and m68k differ in the setting of the integer bit. */
1768 frac
= floatx80_infinity_low
;
1772 case float_class_zero
:
1777 case float_class_snan
:
1778 case float_class_qnan
:
1779 /* NaNs have the integer bit set. */
1780 frac
= p
->frac_hi
| (1ull << 63);
1785 g_assert_not_reached();
1788 return packFloatx80(p
->sign
, exp
, frac
);
1792 * Addition and subtraction
1795 static float16 QEMU_FLATTEN
1796 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1798 FloatParts64 pa
, pb
, *pr
;
1800 float16_unpack_canonical(&pa
, a
, status
);
1801 float16_unpack_canonical(&pb
, b
, status
);
1802 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1804 return float16_round_pack_canonical(pr
, status
);
1807 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1809 return float16_addsub(a
, b
, status
, false);
1812 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1814 return float16_addsub(a
, b
, status
, true);
1817 static float32 QEMU_SOFTFLOAT_ATTR
1818 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1820 FloatParts64 pa
, pb
, *pr
;
1822 float32_unpack_canonical(&pa
, a
, status
);
1823 float32_unpack_canonical(&pb
, b
, status
);
1824 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1826 return float32_round_pack_canonical(pr
, status
);
1829 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1831 return soft_f32_addsub(a
, b
, status
, false);
1834 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1836 return soft_f32_addsub(a
, b
, status
, true);
1839 static float64 QEMU_SOFTFLOAT_ATTR
1840 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1842 FloatParts64 pa
, pb
, *pr
;
1844 float64_unpack_canonical(&pa
, a
, status
);
1845 float64_unpack_canonical(&pb
, b
, status
);
1846 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1848 return float64_round_pack_canonical(pr
, status
);
1851 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1853 return soft_f64_addsub(a
, b
, status
, false);
1856 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1858 return soft_f64_addsub(a
, b
, status
, true);
1861 static float hard_f32_add(float a
, float b
)
1866 static float hard_f32_sub(float a
, float b
)
1871 static double hard_f64_add(double a
, double b
)
1876 static double hard_f64_sub(double a
, double b
)
1881 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1883 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1884 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1886 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1889 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1891 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1892 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1894 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1898 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1899 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1901 return float32_gen2(a
, b
, s
, hard
, soft
,
1902 f32_is_zon2
, f32_addsubmul_post
);
1905 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1906 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1908 return float64_gen2(a
, b
, s
, hard
, soft
,
1909 f64_is_zon2
, f64_addsubmul_post
);
1912 float32 QEMU_FLATTEN
1913 float32_add(float32 a
, float32 b
, float_status
*s
)
1915 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1918 float32 QEMU_FLATTEN
1919 float32_sub(float32 a
, float32 b
, float_status
*s
)
1921 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1924 float64 QEMU_FLATTEN
1925 float64_add(float64 a
, float64 b
, float_status
*s
)
1927 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1930 float64 QEMU_FLATTEN
1931 float64_sub(float64 a
, float64 b
, float_status
*s
)
1933 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1936 static bfloat16 QEMU_FLATTEN
1937 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
1939 FloatParts64 pa
, pb
, *pr
;
1941 bfloat16_unpack_canonical(&pa
, a
, status
);
1942 bfloat16_unpack_canonical(&pb
, b
, status
);
1943 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1945 return bfloat16_round_pack_canonical(pr
, status
);
1948 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
1950 return bfloat16_addsub(a
, b
, status
, false);
1953 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
1955 return bfloat16_addsub(a
, b
, status
, true);
1958 static float128 QEMU_FLATTEN
1959 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
1961 FloatParts128 pa
, pb
, *pr
;
1963 float128_unpack_canonical(&pa
, a
, status
);
1964 float128_unpack_canonical(&pb
, b
, status
);
1965 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1967 return float128_round_pack_canonical(pr
, status
);
1970 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
1972 return float128_addsub(a
, b
, status
, false);
1975 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
1977 return float128_addsub(a
, b
, status
, true);
1980 static floatx80 QEMU_FLATTEN
1981 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
1983 FloatParts128 pa
, pb
, *pr
;
1985 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1986 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1987 return floatx80_default_nan(status
);
1990 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1991 return floatx80_round_pack_canonical(pr
, status
);
1994 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
1996 return floatx80_addsub(a
, b
, status
, false);
1999 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
2001 return floatx80_addsub(a
, b
, status
, true);
2008 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
2010 FloatParts64 pa
, pb
, *pr
;
2012 float16_unpack_canonical(&pa
, a
, status
);
2013 float16_unpack_canonical(&pb
, b
, status
);
2014 pr
= parts_mul(&pa
, &pb
, status
);
2016 return float16_round_pack_canonical(pr
, status
);
2019 static float32 QEMU_SOFTFLOAT_ATTR
2020 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
2022 FloatParts64 pa
, pb
, *pr
;
2024 float32_unpack_canonical(&pa
, a
, status
);
2025 float32_unpack_canonical(&pb
, b
, status
);
2026 pr
= parts_mul(&pa
, &pb
, status
);
2028 return float32_round_pack_canonical(pr
, status
);
2031 static float64 QEMU_SOFTFLOAT_ATTR
2032 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
2034 FloatParts64 pa
, pb
, *pr
;
2036 float64_unpack_canonical(&pa
, a
, status
);
2037 float64_unpack_canonical(&pb
, b
, status
);
2038 pr
= parts_mul(&pa
, &pb
, status
);
2040 return float64_round_pack_canonical(pr
, status
);
2043 static float hard_f32_mul(float a
, float b
)
2048 static double hard_f64_mul(double a
, double b
)
2053 float32 QEMU_FLATTEN
2054 float32_mul(float32 a
, float32 b
, float_status
*s
)
2056 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
2057 f32_is_zon2
, f32_addsubmul_post
);
2060 float64 QEMU_FLATTEN
2061 float64_mul(float64 a
, float64 b
, float_status
*s
)
2063 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
2064 f64_is_zon2
, f64_addsubmul_post
);
2067 bfloat16 QEMU_FLATTEN
2068 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
2070 FloatParts64 pa
, pb
, *pr
;
2072 bfloat16_unpack_canonical(&pa
, a
, status
);
2073 bfloat16_unpack_canonical(&pb
, b
, status
);
2074 pr
= parts_mul(&pa
, &pb
, status
);
2076 return bfloat16_round_pack_canonical(pr
, status
);
2079 float128 QEMU_FLATTEN
2080 float128_mul(float128 a
, float128 b
, float_status
*status
)
2082 FloatParts128 pa
, pb
, *pr
;
2084 float128_unpack_canonical(&pa
, a
, status
);
2085 float128_unpack_canonical(&pb
, b
, status
);
2086 pr
= parts_mul(&pa
, &pb
, status
);
2088 return float128_round_pack_canonical(pr
, status
);
2091 floatx80 QEMU_FLATTEN
2092 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
2094 FloatParts128 pa
, pb
, *pr
;
2096 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2097 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2098 return floatx80_default_nan(status
);
2101 pr
= parts_mul(&pa
, &pb
, status
);
2102 return floatx80_round_pack_canonical(pr
, status
);
2106 * Fused multiply-add
2109 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
2110 int flags
, float_status
*status
)
2112 FloatParts64 pa
, pb
, pc
, *pr
;
2114 float16_unpack_canonical(&pa
, a
, status
);
2115 float16_unpack_canonical(&pb
, b
, status
);
2116 float16_unpack_canonical(&pc
, c
, status
);
2117 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2119 return float16_round_pack_canonical(pr
, status
);
2122 static float32 QEMU_SOFTFLOAT_ATTR
2123 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
2124 float_status
*status
)
2126 FloatParts64 pa
, pb
, pc
, *pr
;
2128 float32_unpack_canonical(&pa
, a
, status
);
2129 float32_unpack_canonical(&pb
, b
, status
);
2130 float32_unpack_canonical(&pc
, c
, status
);
2131 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2133 return float32_round_pack_canonical(pr
, status
);
2136 static float64 QEMU_SOFTFLOAT_ATTR
2137 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
2138 float_status
*status
)
2140 FloatParts64 pa
, pb
, pc
, *pr
;
2142 float64_unpack_canonical(&pa
, a
, status
);
2143 float64_unpack_canonical(&pb
, b
, status
);
2144 float64_unpack_canonical(&pc
, c
, status
);
2145 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2147 return float64_round_pack_canonical(pr
, status
);
2150 static bool force_soft_fma
;
2152 float32 QEMU_FLATTEN
2153 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2155 union_float32 ua
, ub
, uc
, ur
;
2161 if (unlikely(!can_use_fpu(s
))) {
2164 if (unlikely(flags
& float_muladd_halve_result
)) {
2168 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2169 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2173 if (unlikely(force_soft_fma
)) {
2178 * When (a || b) == 0, there's no need to check for under/over flow,
2179 * since we know the addend is (normal || 0) and the product is 0.
2181 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2185 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2186 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2187 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2189 if (flags
& float_muladd_negate_c
) {
2194 union_float32 ua_orig
= ua
;
2195 union_float32 uc_orig
= uc
;
2197 if (flags
& float_muladd_negate_product
) {
2200 if (flags
& float_muladd_negate_c
) {
2204 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2206 if (unlikely(f32_is_inf(ur
))) {
2207 float_raise(float_flag_overflow
, s
);
2208 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2214 if (flags
& float_muladd_negate_result
) {
2215 return float32_chs(ur
.s
);
2220 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2223 float64 QEMU_FLATTEN
2224 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2226 union_float64 ua
, ub
, uc
, ur
;
2232 if (unlikely(!can_use_fpu(s
))) {
2235 if (unlikely(flags
& float_muladd_halve_result
)) {
2239 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2240 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2244 if (unlikely(force_soft_fma
)) {
2249 * When (a || b) == 0, there's no need to check for under/over flow,
2250 * since we know the addend is (normal || 0) and the product is 0.
2252 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2256 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2257 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2258 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2260 if (flags
& float_muladd_negate_c
) {
2265 union_float64 ua_orig
= ua
;
2266 union_float64 uc_orig
= uc
;
2268 if (flags
& float_muladd_negate_product
) {
2271 if (flags
& float_muladd_negate_c
) {
2275 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2277 if (unlikely(f64_is_inf(ur
))) {
2278 float_raise(float_flag_overflow
, s
);
2279 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2285 if (flags
& float_muladd_negate_result
) {
2286 return float64_chs(ur
.s
);
2291 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2294 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2295 int flags
, float_status
*status
)
2297 FloatParts64 pa
, pb
, pc
, *pr
;
2299 bfloat16_unpack_canonical(&pa
, a
, status
);
2300 bfloat16_unpack_canonical(&pb
, b
, status
);
2301 bfloat16_unpack_canonical(&pc
, c
, status
);
2302 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2304 return bfloat16_round_pack_canonical(pr
, status
);
2307 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2308 int flags
, float_status
*status
)
2310 FloatParts128 pa
, pb
, pc
, *pr
;
2312 float128_unpack_canonical(&pa
, a
, status
);
2313 float128_unpack_canonical(&pb
, b
, status
);
2314 float128_unpack_canonical(&pc
, c
, status
);
2315 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2317 return float128_round_pack_canonical(pr
, status
);
2324 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2326 FloatParts64 pa
, pb
, *pr
;
2328 float16_unpack_canonical(&pa
, a
, status
);
2329 float16_unpack_canonical(&pb
, b
, status
);
2330 pr
= parts_div(&pa
, &pb
, status
);
2332 return float16_round_pack_canonical(pr
, status
);
2335 static float32 QEMU_SOFTFLOAT_ATTR
2336 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2338 FloatParts64 pa
, pb
, *pr
;
2340 float32_unpack_canonical(&pa
, a
, status
);
2341 float32_unpack_canonical(&pb
, b
, status
);
2342 pr
= parts_div(&pa
, &pb
, status
);
2344 return float32_round_pack_canonical(pr
, status
);
2347 static float64 QEMU_SOFTFLOAT_ATTR
2348 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2350 FloatParts64 pa
, pb
, *pr
;
2352 float64_unpack_canonical(&pa
, a
, status
);
2353 float64_unpack_canonical(&pb
, b
, status
);
2354 pr
= parts_div(&pa
, &pb
, status
);
2356 return float64_round_pack_canonical(pr
, status
);
2359 static float hard_f32_div(float a
, float b
)
2364 static double hard_f64_div(double a
, double b
)
2369 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2371 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2372 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2373 fpclassify(b
.h
) == FP_NORMAL
;
2375 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2378 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2380 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2381 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2382 fpclassify(b
.h
) == FP_NORMAL
;
2384 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2387 static bool f32_div_post(union_float32 a
, union_float32 b
)
2389 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2390 return fpclassify(a
.h
) != FP_ZERO
;
2392 return !float32_is_zero(a
.s
);
2395 static bool f64_div_post(union_float64 a
, union_float64 b
)
2397 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2398 return fpclassify(a
.h
) != FP_ZERO
;
2400 return !float64_is_zero(a
.s
);
2403 float32 QEMU_FLATTEN
2404 float32_div(float32 a
, float32 b
, float_status
*s
)
2406 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2407 f32_div_pre
, f32_div_post
);
2410 float64 QEMU_FLATTEN
2411 float64_div(float64 a
, float64 b
, float_status
*s
)
2413 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2414 f64_div_pre
, f64_div_post
);
2417 bfloat16 QEMU_FLATTEN
2418 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2420 FloatParts64 pa
, pb
, *pr
;
2422 bfloat16_unpack_canonical(&pa
, a
, status
);
2423 bfloat16_unpack_canonical(&pb
, b
, status
);
2424 pr
= parts_div(&pa
, &pb
, status
);
2426 return bfloat16_round_pack_canonical(pr
, status
);
2429 float128 QEMU_FLATTEN
2430 float128_div(float128 a
, float128 b
, float_status
*status
)
2432 FloatParts128 pa
, pb
, *pr
;
2434 float128_unpack_canonical(&pa
, a
, status
);
2435 float128_unpack_canonical(&pb
, b
, status
);
2436 pr
= parts_div(&pa
, &pb
, status
);
2438 return float128_round_pack_canonical(pr
, status
);
2441 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
2443 FloatParts128 pa
, pb
, *pr
;
2445 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2446 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2447 return floatx80_default_nan(status
);
2450 pr
= parts_div(&pa
, &pb
, status
);
2451 return floatx80_round_pack_canonical(pr
, status
);
2458 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
2460 FloatParts64 pa
, pb
, *pr
;
2462 float32_unpack_canonical(&pa
, a
, status
);
2463 float32_unpack_canonical(&pb
, b
, status
);
2464 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2466 return float32_round_pack_canonical(pr
, status
);
2469 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
2471 FloatParts64 pa
, pb
, *pr
;
2473 float64_unpack_canonical(&pa
, a
, status
);
2474 float64_unpack_canonical(&pb
, b
, status
);
2475 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2477 return float64_round_pack_canonical(pr
, status
);
2480 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
2482 FloatParts128 pa
, pb
, *pr
;
2484 float128_unpack_canonical(&pa
, a
, status
);
2485 float128_unpack_canonical(&pb
, b
, status
);
2486 pr
= parts_modrem(&pa
, &pb
, NULL
, status
);
2488 return float128_round_pack_canonical(pr
, status
);
2492 * Returns the remainder of the extended double-precision floating-point value
2493 * `a' with respect to the corresponding value `b'.
2494 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2495 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2496 * the remainder based on truncating the quotient toward zero instead and
2497 * *quotient is set to the low 64 bits of the absolute value of the integer
2500 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
,
2501 uint64_t *quotient
, float_status
*status
)
2503 FloatParts128 pa
, pb
, *pr
;
2506 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2507 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2508 return floatx80_default_nan(status
);
2510 pr
= parts_modrem(&pa
, &pb
, mod
? quotient
: NULL
, status
);
2512 return floatx80_round_pack_canonical(pr
, status
);
2515 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
2518 return floatx80_modrem(a
, b
, false, "ient
, status
);
2521 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
2524 return floatx80_modrem(a
, b
, true, "ient
, status
);
2528 * Float to Float conversions
2530 * Returns the result of converting one float format to another. The
2531 * conversion is performed according to the IEC/IEEE Standard for
2532 * Binary Floating-Point Arithmetic.
2534 * Usually this only needs to take care of raising invalid exceptions
2535 * and handling the conversion on NaNs.
2538 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2541 case float_class_qnan
:
2542 case float_class_snan
:
2544 * There is no NaN in the destination format. Raise Invalid
2545 * and return a zero with the sign of the input NaN.
2547 float_raise(float_flag_invalid
, s
);
2548 a
->cls
= float_class_zero
;
2551 case float_class_inf
:
2553 * There is no Inf in the destination format. Raise Invalid
2554 * and return the maximum normal with the correct sign.
2556 float_raise(float_flag_invalid
, s
);
2557 a
->cls
= float_class_normal
;
2558 a
->exp
= float16_params_ahp
.exp_max
;
2559 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2560 float16_params_ahp
.frac_size
+ 1);
2563 case float_class_normal
:
2564 case float_class_zero
:
2568 g_assert_not_reached();
2572 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2574 if (is_nan(a
->cls
)) {
2575 parts_return_nan(a
, s
);
2579 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2581 if (is_nan(a
->cls
)) {
2582 parts_return_nan(a
, s
);
2586 #define parts_float_to_float(P, S) \
2587 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2589 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2596 if (a
->cls
== float_class_normal
) {
2597 frac_truncjam(a
, b
);
2598 } else if (is_nan(a
->cls
)) {
2599 /* Discard the low bits of the NaN. */
2600 a
->frac
= b
->frac_hi
;
2601 parts_return_nan(a
, s
);
2605 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2613 if (is_nan(a
->cls
)) {
2614 parts_return_nan(a
, s
);
2618 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2620 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2623 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2624 parts_float_to_float(&p
, s
);
2625 return float32_round_pack_canonical(&p
, s
);
2628 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2630 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2633 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2634 parts_float_to_float(&p
, s
);
2635 return float64_round_pack_canonical(&p
, s
);
2638 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2641 const FloatFmt
*fmt
;
2643 float32_unpack_canonical(&p
, a
, s
);
2645 parts_float_to_float(&p
, s
);
2646 fmt
= &float16_params
;
2648 parts_float_to_ahp(&p
, s
);
2649 fmt
= &float16_params_ahp
;
2651 return float16a_round_pack_canonical(&p
, s
, fmt
);
2654 static float64 QEMU_SOFTFLOAT_ATTR
2655 soft_float32_to_float64(float32 a
, float_status
*s
)
2659 float32_unpack_canonical(&p
, a
, s
);
2660 parts_float_to_float(&p
, s
);
2661 return float64_round_pack_canonical(&p
, s
);
2664 float64
float32_to_float64(float32 a
, float_status
*s
)
2666 if (likely(float32_is_normal(a
))) {
2667 /* Widening conversion can never produce inexact results. */
2673 } else if (float32_is_zero(a
)) {
2674 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2676 return soft_float32_to_float64(a
, s
);
2680 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2683 const FloatFmt
*fmt
;
2685 float64_unpack_canonical(&p
, a
, s
);
2687 parts_float_to_float(&p
, s
);
2688 fmt
= &float16_params
;
2690 parts_float_to_ahp(&p
, s
);
2691 fmt
= &float16_params_ahp
;
2693 return float16a_round_pack_canonical(&p
, s
, fmt
);
2696 float32
float64_to_float32(float64 a
, float_status
*s
)
2700 float64_unpack_canonical(&p
, a
, s
);
2701 parts_float_to_float(&p
, s
);
2702 return float32_round_pack_canonical(&p
, s
);
2705 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2709 bfloat16_unpack_canonical(&p
, a
, s
);
2710 parts_float_to_float(&p
, s
);
2711 return float32_round_pack_canonical(&p
, s
);
2714 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2718 bfloat16_unpack_canonical(&p
, a
, s
);
2719 parts_float_to_float(&p
, s
);
2720 return float64_round_pack_canonical(&p
, s
);
2723 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2727 float32_unpack_canonical(&p
, a
, s
);
2728 parts_float_to_float(&p
, s
);
2729 return bfloat16_round_pack_canonical(&p
, s
);
2732 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2736 float64_unpack_canonical(&p
, a
, s
);
2737 parts_float_to_float(&p
, s
);
2738 return bfloat16_round_pack_canonical(&p
, s
);
2741 float32
float128_to_float32(float128 a
, float_status
*s
)
2746 float128_unpack_canonical(&p128
, a
, s
);
2747 parts_float_to_float_narrow(&p64
, &p128
, s
);
2748 return float32_round_pack_canonical(&p64
, s
);
2751 float64
float128_to_float64(float128 a
, float_status
*s
)
2756 float128_unpack_canonical(&p128
, a
, s
);
2757 parts_float_to_float_narrow(&p64
, &p128
, s
);
2758 return float64_round_pack_canonical(&p64
, s
);
2761 float128
float32_to_float128(float32 a
, float_status
*s
)
2766 float32_unpack_canonical(&p64
, a
, s
);
2767 parts_float_to_float_widen(&p128
, &p64
, s
);
2768 return float128_round_pack_canonical(&p128
, s
);
2771 float128
float64_to_float128(float64 a
, float_status
*s
)
2776 float64_unpack_canonical(&p64
, a
, s
);
2777 parts_float_to_float_widen(&p128
, &p64
, s
);
2778 return float128_round_pack_canonical(&p128
, s
);
2781 float32
floatx80_to_float32(floatx80 a
, float_status
*s
)
2786 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2787 parts_float_to_float_narrow(&p64
, &p128
, s
);
2789 parts_default_nan(&p64
, s
);
2791 return float32_round_pack_canonical(&p64
, s
);
2794 float64
floatx80_to_float64(floatx80 a
, float_status
*s
)
2799 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2800 parts_float_to_float_narrow(&p64
, &p128
, s
);
2802 parts_default_nan(&p64
, s
);
2804 return float64_round_pack_canonical(&p64
, s
);
2807 float128
floatx80_to_float128(floatx80 a
, float_status
*s
)
2811 if (floatx80_unpack_canonical(&p
, a
, s
)) {
2812 parts_float_to_float(&p
, s
);
2814 parts_default_nan(&p
, s
);
2816 return float128_round_pack_canonical(&p
, s
);
2819 floatx80
float32_to_floatx80(float32 a
, float_status
*s
)
2824 float32_unpack_canonical(&p64
, a
, s
);
2825 parts_float_to_float_widen(&p128
, &p64
, s
);
2826 return floatx80_round_pack_canonical(&p128
, s
);
2829 floatx80
float64_to_floatx80(float64 a
, float_status
*s
)
2834 float64_unpack_canonical(&p64
, a
, s
);
2835 parts_float_to_float_widen(&p128
, &p64
, s
);
2836 return floatx80_round_pack_canonical(&p128
, s
);
2839 floatx80
float128_to_floatx80(float128 a
, float_status
*s
)
2843 float128_unpack_canonical(&p
, a
, s
);
2844 parts_float_to_float(&p
, s
);
2845 return floatx80_round_pack_canonical(&p
, s
);
2849 * Round to integral value
2852 float16
float16_round_to_int(float16 a
, float_status
*s
)
2856 float16_unpack_canonical(&p
, a
, s
);
2857 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2858 return float16_round_pack_canonical(&p
, s
);
2861 float32
float32_round_to_int(float32 a
, float_status
*s
)
2865 float32_unpack_canonical(&p
, a
, s
);
2866 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2867 return float32_round_pack_canonical(&p
, s
);
2870 float64
float64_round_to_int(float64 a
, float_status
*s
)
2874 float64_unpack_canonical(&p
, a
, s
);
2875 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2876 return float64_round_pack_canonical(&p
, s
);
2879 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2883 bfloat16_unpack_canonical(&p
, a
, s
);
2884 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2885 return bfloat16_round_pack_canonical(&p
, s
);
2888 float128
float128_round_to_int(float128 a
, float_status
*s
)
2892 float128_unpack_canonical(&p
, a
, s
);
2893 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
2894 return float128_round_pack_canonical(&p
, s
);
2897 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
2901 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
2902 return floatx80_default_nan(status
);
2905 parts_round_to_int(&p
, status
->float_rounding_mode
, 0, status
,
2906 &floatx80_params
[status
->floatx80_rounding_precision
]);
2907 return floatx80_round_pack_canonical(&p
, status
);
2911 * Floating-point to signed integer conversions
2914 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2919 float16_unpack_canonical(&p
, a
, s
);
2920 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
2923 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2928 float16_unpack_canonical(&p
, a
, s
);
2929 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2932 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2937 float16_unpack_canonical(&p
, a
, s
);
2938 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2941 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2946 float16_unpack_canonical(&p
, a
, s
);
2947 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2950 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2955 float32_unpack_canonical(&p
, a
, s
);
2956 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2959 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2964 float32_unpack_canonical(&p
, a
, s
);
2965 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2968 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2973 float32_unpack_canonical(&p
, a
, s
);
2974 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2977 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2982 float64_unpack_canonical(&p
, a
, s
);
2983 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2986 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2991 float64_unpack_canonical(&p
, a
, s
);
2992 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2995 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3000 float64_unpack_canonical(&p
, a
, s
);
3001 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3004 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3009 bfloat16_unpack_canonical(&p
, a
, s
);
3010 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
3013 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3018 bfloat16_unpack_canonical(&p
, a
, s
);
3019 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3022 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
3027 bfloat16_unpack_canonical(&p
, a
, s
);
3028 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3031 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
3032 int scale
, float_status
*s
)
3036 float128_unpack_canonical(&p
, a
, s
);
3037 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3040 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
3041 int scale
, float_status
*s
)
3045 float128_unpack_canonical(&p
, a
, s
);
3046 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3049 static int32_t floatx80_to_int32_scalbn(floatx80 a
, FloatRoundMode rmode
,
3050 int scale
, float_status
*s
)
3054 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3055 parts_default_nan(&p
, s
);
3057 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
3060 static int64_t floatx80_to_int64_scalbn(floatx80 a
, FloatRoundMode rmode
,
3061 int scale
, float_status
*s
)
3065 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3066 parts_default_nan(&p
, s
);
3068 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
3071 int8_t float16_to_int8(float16 a
, float_status
*s
)
3073 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3076 int16_t float16_to_int16(float16 a
, float_status
*s
)
3078 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3081 int32_t float16_to_int32(float16 a
, float_status
*s
)
3083 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3086 int64_t float16_to_int64(float16 a
, float_status
*s
)
3088 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3091 int16_t float32_to_int16(float32 a
, float_status
*s
)
3093 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3096 int32_t float32_to_int32(float32 a
, float_status
*s
)
3098 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3101 int64_t float32_to_int64(float32 a
, float_status
*s
)
3103 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3106 int16_t float64_to_int16(float64 a
, float_status
*s
)
3108 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3111 int32_t float64_to_int32(float64 a
, float_status
*s
)
3113 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3116 int64_t float64_to_int64(float64 a
, float_status
*s
)
3118 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3121 int32_t float128_to_int32(float128 a
, float_status
*s
)
3123 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3126 int64_t float128_to_int64(float128 a
, float_status
*s
)
3128 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3131 int32_t floatx80_to_int32(floatx80 a
, float_status
*s
)
3133 return floatx80_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3136 int64_t floatx80_to_int64(floatx80 a
, float_status
*s
)
3138 return floatx80_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3141 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
3143 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3146 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
3148 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3151 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
3153 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3156 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
3158 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3161 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
3163 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3166 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
3168 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3171 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
3173 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3176 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
3178 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3181 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
3183 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3186 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
3188 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3191 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
3193 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3196 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*s
)
3198 return floatx80_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3201 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*s
)
3203 return floatx80_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3206 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
3208 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3211 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
3213 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3216 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
3218 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3221 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
3223 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3226 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
3228 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3231 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
3233 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3237 * Floating-point to unsigned integer conversions
3240 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3245 float16_unpack_canonical(&p
, a
, s
);
3246 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
3249 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3254 float16_unpack_canonical(&p
, a
, s
);
3255 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3258 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3263 float16_unpack_canonical(&p
, a
, s
);
3264 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3267 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3272 float16_unpack_canonical(&p
, a
, s
);
3273 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3276 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3281 float32_unpack_canonical(&p
, a
, s
);
3282 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3285 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3290 float32_unpack_canonical(&p
, a
, s
);
3291 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3294 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3299 float32_unpack_canonical(&p
, a
, s
);
3300 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3303 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3308 float64_unpack_canonical(&p
, a
, s
);
3309 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3312 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3317 float64_unpack_canonical(&p
, a
, s
);
3318 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3321 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3326 float64_unpack_canonical(&p
, a
, s
);
3327 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3330 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3331 int scale
, float_status
*s
)
3335 bfloat16_unpack_canonical(&p
, a
, s
);
3336 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3339 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3340 int scale
, float_status
*s
)
3344 bfloat16_unpack_canonical(&p
, a
, s
);
3345 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3348 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3349 int scale
, float_status
*s
)
3353 bfloat16_unpack_canonical(&p
, a
, s
);
3354 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3357 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3358 int scale
, float_status
*s
)
3362 float128_unpack_canonical(&p
, a
, s
);
3363 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3366 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3367 int scale
, float_status
*s
)
3371 float128_unpack_canonical(&p
, a
, s
);
3372 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3375 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3377 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3380 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3382 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3385 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3387 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3390 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3392 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3395 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3397 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3400 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3402 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3405 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3407 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3410 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3412 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3415 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3417 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3420 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3422 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3425 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3427 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3430 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3432 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3435 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3437 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3440 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3442 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3445 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3447 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3450 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3452 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3455 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3457 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3460 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3462 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3465 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3467 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3470 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3472 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3475 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3477 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3480 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3482 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3485 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3487 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3490 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3492 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3495 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3497 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3500 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3502 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3505 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3507 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3510 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3512 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3515 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3517 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3521 * Signed integer to floating-point conversions
3524 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3528 parts_sint_to_float(&p
, a
, scale
, status
);
3529 return float16_round_pack_canonical(&p
, status
);
3532 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3534 return int64_to_float16_scalbn(a
, scale
, status
);
3537 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3539 return int64_to_float16_scalbn(a
, scale
, status
);
3542 float16
int64_to_float16(int64_t a
, float_status
*status
)
3544 return int64_to_float16_scalbn(a
, 0, status
);
3547 float16
int32_to_float16(int32_t a
, float_status
*status
)
3549 return int64_to_float16_scalbn(a
, 0, status
);
3552 float16
int16_to_float16(int16_t a
, float_status
*status
)
3554 return int64_to_float16_scalbn(a
, 0, status
);
3557 float16
int8_to_float16(int8_t a
, float_status
*status
)
3559 return int64_to_float16_scalbn(a
, 0, status
);
3562 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3566 /* Without scaling, there are no overflow concerns. */
3567 if (likely(scale
== 0) && can_use_fpu(status
)) {
3573 parts64_sint_to_float(&p
, a
, scale
, status
);
3574 return float32_round_pack_canonical(&p
, status
);
3577 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3579 return int64_to_float32_scalbn(a
, scale
, status
);
3582 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3584 return int64_to_float32_scalbn(a
, scale
, status
);
3587 float32
int64_to_float32(int64_t a
, float_status
*status
)
3589 return int64_to_float32_scalbn(a
, 0, status
);
3592 float32
int32_to_float32(int32_t a
, float_status
*status
)
3594 return int64_to_float32_scalbn(a
, 0, status
);
3597 float32
int16_to_float32(int16_t a
, float_status
*status
)
3599 return int64_to_float32_scalbn(a
, 0, status
);
3602 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3606 /* Without scaling, there are no overflow concerns. */
3607 if (likely(scale
== 0) && can_use_fpu(status
)) {
3613 parts_sint_to_float(&p
, a
, scale
, status
);
3614 return float64_round_pack_canonical(&p
, status
);
3617 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3619 return int64_to_float64_scalbn(a
, scale
, status
);
3622 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3624 return int64_to_float64_scalbn(a
, scale
, status
);
3627 float64
int64_to_float64(int64_t a
, float_status
*status
)
3629 return int64_to_float64_scalbn(a
, 0, status
);
3632 float64
int32_to_float64(int32_t a
, float_status
*status
)
3634 return int64_to_float64_scalbn(a
, 0, status
);
3637 float64
int16_to_float64(int16_t a
, float_status
*status
)
3639 return int64_to_float64_scalbn(a
, 0, status
);
3642 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3646 parts_sint_to_float(&p
, a
, scale
, status
);
3647 return bfloat16_round_pack_canonical(&p
, status
);
3650 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3652 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3655 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3657 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3660 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3662 return int64_to_bfloat16_scalbn(a
, 0, status
);
3665 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3667 return int64_to_bfloat16_scalbn(a
, 0, status
);
3670 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3672 return int64_to_bfloat16_scalbn(a
, 0, status
);
3675 float128
int64_to_float128(int64_t a
, float_status
*status
)
3679 parts_sint_to_float(&p
, a
, 0, status
);
3680 return float128_round_pack_canonical(&p
, status
);
3683 float128
int32_to_float128(int32_t a
, float_status
*status
)
3685 return int64_to_float128(a
, status
);
3688 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
3692 parts_sint_to_float(&p
, a
, 0, status
);
3693 return floatx80_round_pack_canonical(&p
, status
);
3696 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
3698 return int64_to_floatx80(a
, status
);
3702 * Unsigned Integer to floating-point conversions
3705 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3709 parts_uint_to_float(&p
, a
, scale
, status
);
3710 return float16_round_pack_canonical(&p
, status
);
3713 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3715 return uint64_to_float16_scalbn(a
, scale
, status
);
3718 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3720 return uint64_to_float16_scalbn(a
, scale
, status
);
3723 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3725 return uint64_to_float16_scalbn(a
, 0, status
);
3728 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3730 return uint64_to_float16_scalbn(a
, 0, status
);
3733 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
3735 return uint64_to_float16_scalbn(a
, 0, status
);
3738 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
3740 return uint64_to_float16_scalbn(a
, 0, status
);
3743 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
3747 /* Without scaling, there are no overflow concerns. */
3748 if (likely(scale
== 0) && can_use_fpu(status
)) {
3754 parts_uint_to_float(&p
, a
, scale
, status
);
3755 return float32_round_pack_canonical(&p
, status
);
3758 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
3760 return uint64_to_float32_scalbn(a
, scale
, status
);
3763 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
3765 return uint64_to_float32_scalbn(a
, scale
, status
);
3768 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
3770 return uint64_to_float32_scalbn(a
, 0, status
);
3773 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3775 return uint64_to_float32_scalbn(a
, 0, status
);
3778 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3780 return uint64_to_float32_scalbn(a
, 0, status
);
3783 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3787 /* Without scaling, there are no overflow concerns. */
3788 if (likely(scale
== 0) && can_use_fpu(status
)) {
3794 parts_uint_to_float(&p
, a
, scale
, status
);
3795 return float64_round_pack_canonical(&p
, status
);
3798 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3800 return uint64_to_float64_scalbn(a
, scale
, status
);
3803 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3805 return uint64_to_float64_scalbn(a
, scale
, status
);
3808 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3810 return uint64_to_float64_scalbn(a
, 0, status
);
3813 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3815 return uint64_to_float64_scalbn(a
, 0, status
);
3818 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3820 return uint64_to_float64_scalbn(a
, 0, status
);
3823 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3827 parts_uint_to_float(&p
, a
, scale
, status
);
3828 return bfloat16_round_pack_canonical(&p
, status
);
3831 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3833 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3836 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3838 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3841 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3843 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3846 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3848 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3851 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3853 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3856 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
3860 parts_uint_to_float(&p
, a
, 0, status
);
3861 return float128_round_pack_canonical(&p
, status
);
3865 * Minimum and maximum
3868 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
3870 FloatParts64 pa
, pb
, *pr
;
3872 float16_unpack_canonical(&pa
, a
, s
);
3873 float16_unpack_canonical(&pb
, b
, s
);
3874 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3876 return float16_round_pack_canonical(pr
, s
);
3879 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
3880 float_status
*s
, int flags
)
3882 FloatParts64 pa
, pb
, *pr
;
3884 bfloat16_unpack_canonical(&pa
, a
, s
);
3885 bfloat16_unpack_canonical(&pb
, b
, s
);
3886 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3888 return bfloat16_round_pack_canonical(pr
, s
);
3891 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
3893 FloatParts64 pa
, pb
, *pr
;
3895 float32_unpack_canonical(&pa
, a
, s
);
3896 float32_unpack_canonical(&pb
, b
, s
);
3897 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3899 return float32_round_pack_canonical(pr
, s
);
3902 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
3904 FloatParts64 pa
, pb
, *pr
;
3906 float64_unpack_canonical(&pa
, a
, s
);
3907 float64_unpack_canonical(&pb
, b
, s
);
3908 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3910 return float64_round_pack_canonical(pr
, s
);
3913 static float128
float128_minmax(float128 a
, float128 b
,
3914 float_status
*s
, int flags
)
3916 FloatParts128 pa
, pb
, *pr
;
3918 float128_unpack_canonical(&pa
, a
, s
);
3919 float128_unpack_canonical(&pb
, b
, s
);
3920 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3922 return float128_round_pack_canonical(pr
, s
);
3925 #define MINMAX_1(type, name, flags) \
3926 type type##_##name(type a, type b, float_status *s) \
3927 { return type##_minmax(a, b, s, flags); }
3929 #define MINMAX_2(type) \
3930 MINMAX_1(type, max, 0) \
3931 MINMAX_1(type, maxnum, minmax_isnum) \
3932 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3933 MINMAX_1(type, min, minmax_ismin) \
3934 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3935 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3947 * Floating point compare
3950 static FloatRelation QEMU_FLATTEN
3951 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
3953 FloatParts64 pa
, pb
;
3955 float16_unpack_canonical(&pa
, a
, s
);
3956 float16_unpack_canonical(&pb
, b
, s
);
3957 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3960 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
3962 return float16_do_compare(a
, b
, s
, false);
3965 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
3967 return float16_do_compare(a
, b
, s
, true);
3970 static FloatRelation QEMU_SOFTFLOAT_ATTR
3971 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
3973 FloatParts64 pa
, pb
;
3975 float32_unpack_canonical(&pa
, a
, s
);
3976 float32_unpack_canonical(&pb
, b
, s
);
3977 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3980 static FloatRelation QEMU_FLATTEN
3981 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
3983 union_float32 ua
, ub
;
3988 if (QEMU_NO_HARDFLOAT
) {
3992 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
3993 if (isgreaterequal(ua
.h
, ub
.h
)) {
3994 if (isgreater(ua
.h
, ub
.h
)) {
3995 return float_relation_greater
;
3997 return float_relation_equal
;
3999 if (likely(isless(ua
.h
, ub
.h
))) {
4000 return float_relation_less
;
4003 * The only condition remaining is unordered.
4004 * Fall through to set flags.
4007 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4010 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
4012 return float32_hs_compare(a
, b
, s
, false);
4015 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
4017 return float32_hs_compare(a
, b
, s
, true);
4020 static FloatRelation QEMU_SOFTFLOAT_ATTR
4021 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
4023 FloatParts64 pa
, pb
;
4025 float64_unpack_canonical(&pa
, a
, s
);
4026 float64_unpack_canonical(&pb
, b
, s
);
4027 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4030 static FloatRelation QEMU_FLATTEN
4031 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
4033 union_float64 ua
, ub
;
4038 if (QEMU_NO_HARDFLOAT
) {
4042 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
4043 if (isgreaterequal(ua
.h
, ub
.h
)) {
4044 if (isgreater(ua
.h
, ub
.h
)) {
4045 return float_relation_greater
;
4047 return float_relation_equal
;
4049 if (likely(isless(ua
.h
, ub
.h
))) {
4050 return float_relation_less
;
4053 * The only condition remaining is unordered.
4054 * Fall through to set flags.
4057 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
4060 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
4062 return float64_hs_compare(a
, b
, s
, false);
4065 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
4067 return float64_hs_compare(a
, b
, s
, true);
4070 static FloatRelation QEMU_FLATTEN
4071 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
4073 FloatParts64 pa
, pb
;
4075 bfloat16_unpack_canonical(&pa
, a
, s
);
4076 bfloat16_unpack_canonical(&pb
, b
, s
);
4077 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4080 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
4082 return bfloat16_do_compare(a
, b
, s
, false);
4085 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
4087 return bfloat16_do_compare(a
, b
, s
, true);
4090 static FloatRelation QEMU_FLATTEN
4091 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
4093 FloatParts128 pa
, pb
;
4095 float128_unpack_canonical(&pa
, a
, s
);
4096 float128_unpack_canonical(&pb
, b
, s
);
4097 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4100 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
4102 return float128_do_compare(a
, b
, s
, false);
4105 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
4107 return float128_do_compare(a
, b
, s
, true);
4110 static FloatRelation QEMU_FLATTEN
4111 floatx80_do_compare(floatx80 a
, floatx80 b
, float_status
*s
, bool is_quiet
)
4113 FloatParts128 pa
, pb
;
4115 if (!floatx80_unpack_canonical(&pa
, a
, s
) ||
4116 !floatx80_unpack_canonical(&pb
, b
, s
)) {
4117 return float_relation_unordered
;
4119 return parts_compare(&pa
, &pb
, s
, is_quiet
);
4122 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*s
)
4124 return floatx80_do_compare(a
, b
, s
, false);
4127 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
, float_status
*s
)
4129 return floatx80_do_compare(a
, b
, s
, true);
4136 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
4140 float16_unpack_canonical(&p
, a
, status
);
4141 parts_scalbn(&p
, n
, status
);
4142 return float16_round_pack_canonical(&p
, status
);
4145 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
4149 float32_unpack_canonical(&p
, a
, status
);
4150 parts_scalbn(&p
, n
, status
);
4151 return float32_round_pack_canonical(&p
, status
);
4154 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
4158 float64_unpack_canonical(&p
, a
, status
);
4159 parts_scalbn(&p
, n
, status
);
4160 return float64_round_pack_canonical(&p
, status
);
4163 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
4167 bfloat16_unpack_canonical(&p
, a
, status
);
4168 parts_scalbn(&p
, n
, status
);
4169 return bfloat16_round_pack_canonical(&p
, status
);
4172 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
4176 float128_unpack_canonical(&p
, a
, status
);
4177 parts_scalbn(&p
, n
, status
);
4178 return float128_round_pack_canonical(&p
, status
);
4181 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
4185 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
4186 return floatx80_default_nan(status
);
4188 parts_scalbn(&p
, n
, status
);
4189 return floatx80_round_pack_canonical(&p
, status
);
4196 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
4200 float16_unpack_canonical(&p
, a
, status
);
4201 parts_sqrt(&p
, status
, &float16_params
);
4202 return float16_round_pack_canonical(&p
, status
);
4205 static float32 QEMU_SOFTFLOAT_ATTR
4206 soft_f32_sqrt(float32 a
, float_status
*status
)
4210 float32_unpack_canonical(&p
, a
, status
);
4211 parts_sqrt(&p
, status
, &float32_params
);
4212 return float32_round_pack_canonical(&p
, status
);
4215 static float64 QEMU_SOFTFLOAT_ATTR
4216 soft_f64_sqrt(float64 a
, float_status
*status
)
4220 float64_unpack_canonical(&p
, a
, status
);
4221 parts_sqrt(&p
, status
, &float64_params
);
4222 return float64_round_pack_canonical(&p
, status
);
4225 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
4227 union_float32 ua
, ur
;
4230 if (unlikely(!can_use_fpu(s
))) {
4234 float32_input_flush1(&ua
.s
, s
);
4235 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
4236 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4237 fpclassify(ua
.h
) == FP_ZERO
) ||
4241 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
4242 float32_is_neg(ua
.s
))) {
4249 return soft_f32_sqrt(ua
.s
, s
);
4252 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
4254 union_float64 ua
, ur
;
4257 if (unlikely(!can_use_fpu(s
))) {
4261 float64_input_flush1(&ua
.s
, s
);
4262 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
4263 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
4264 fpclassify(ua
.h
) == FP_ZERO
) ||
4268 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
4269 float64_is_neg(ua
.s
))) {
4276 return soft_f64_sqrt(ua
.s
, s
);
4279 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
4283 bfloat16_unpack_canonical(&p
, a
, status
);
4284 parts_sqrt(&p
, status
, &bfloat16_params
);
4285 return bfloat16_round_pack_canonical(&p
, status
);
4288 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
4292 float128_unpack_canonical(&p
, a
, status
);
4293 parts_sqrt(&p
, status
, &float128_params
);
4294 return float128_round_pack_canonical(&p
, status
);
4297 floatx80
floatx80_sqrt(floatx80 a
, float_status
*s
)
4301 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
4302 return floatx80_default_nan(s
);
4304 parts_sqrt(&p
, s
, &floatx80_params
[s
->floatx80_rounding_precision
]);
4305 return floatx80_round_pack_canonical(&p
, s
);
4311 float32
float32_log2(float32 a
, float_status
*status
)
4315 float32_unpack_canonical(&p
, a
, status
);
4316 parts_log2(&p
, status
, &float32_params
);
4317 return float32_round_pack_canonical(&p
, status
);
4320 float64
float64_log2(float64 a
, float_status
*status
)
4324 float64_unpack_canonical(&p
, a
, status
);
4325 parts_log2(&p
, status
, &float64_params
);
4326 return float64_round_pack_canonical(&p
, status
);
4329 /*----------------------------------------------------------------------------
4330 | The pattern for a default generated NaN.
4331 *----------------------------------------------------------------------------*/
4333 float16
float16_default_nan(float_status
*status
)
4337 parts_default_nan(&p
, status
);
4338 p
.frac
>>= float16_params
.frac_shift
;
4339 return float16_pack_raw(&p
);
4342 float32
float32_default_nan(float_status
*status
)
4346 parts_default_nan(&p
, status
);
4347 p
.frac
>>= float32_params
.frac_shift
;
4348 return float32_pack_raw(&p
);
4351 float64
float64_default_nan(float_status
*status
)
4355 parts_default_nan(&p
, status
);
4356 p
.frac
>>= float64_params
.frac_shift
;
4357 return float64_pack_raw(&p
);
4360 float128
float128_default_nan(float_status
*status
)
4364 parts_default_nan(&p
, status
);
4365 frac_shr(&p
, float128_params
.frac_shift
);
4366 return float128_pack_raw(&p
);
4369 bfloat16
bfloat16_default_nan(float_status
*status
)
4373 parts_default_nan(&p
, status
);
4374 p
.frac
>>= bfloat16_params
.frac_shift
;
4375 return bfloat16_pack_raw(&p
);
4378 /*----------------------------------------------------------------------------
4379 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4380 *----------------------------------------------------------------------------*/
4382 float16
float16_silence_nan(float16 a
, float_status
*status
)
4386 float16_unpack_raw(&p
, a
);
4387 p
.frac
<<= float16_params
.frac_shift
;
4388 parts_silence_nan(&p
, status
);
4389 p
.frac
>>= float16_params
.frac_shift
;
4390 return float16_pack_raw(&p
);
4393 float32
float32_silence_nan(float32 a
, float_status
*status
)
4397 float32_unpack_raw(&p
, a
);
4398 p
.frac
<<= float32_params
.frac_shift
;
4399 parts_silence_nan(&p
, status
);
4400 p
.frac
>>= float32_params
.frac_shift
;
4401 return float32_pack_raw(&p
);
4404 float64
float64_silence_nan(float64 a
, float_status
*status
)
4408 float64_unpack_raw(&p
, a
);
4409 p
.frac
<<= float64_params
.frac_shift
;
4410 parts_silence_nan(&p
, status
);
4411 p
.frac
>>= float64_params
.frac_shift
;
4412 return float64_pack_raw(&p
);
4415 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
4419 bfloat16_unpack_raw(&p
, a
);
4420 p
.frac
<<= bfloat16_params
.frac_shift
;
4421 parts_silence_nan(&p
, status
);
4422 p
.frac
>>= bfloat16_params
.frac_shift
;
4423 return bfloat16_pack_raw(&p
);
4426 float128
float128_silence_nan(float128 a
, float_status
*status
)
4430 float128_unpack_raw(&p
, a
);
4431 frac_shl(&p
, float128_params
.frac_shift
);
4432 parts_silence_nan(&p
, status
);
4433 frac_shr(&p
, float128_params
.frac_shift
);
4434 return float128_pack_raw(&p
);
4437 /*----------------------------------------------------------------------------
4438 | If `a' is denormal and we are in flush-to-zero mode then set the
4439 | input-denormal exception and return zero. Otherwise just return the value.
4440 *----------------------------------------------------------------------------*/
4442 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
4444 if (p
.exp
== 0 && p
.frac
!= 0) {
4445 float_raise(float_flag_input_denormal
, status
);
4452 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
4454 if (status
->flush_inputs_to_zero
) {
4457 float16_unpack_raw(&p
, a
);
4458 if (parts_squash_denormal(p
, status
)) {
4459 return float16_set_sign(float16_zero
, p
.sign
);
4465 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4467 if (status
->flush_inputs_to_zero
) {
4470 float32_unpack_raw(&p
, a
);
4471 if (parts_squash_denormal(p
, status
)) {
4472 return float32_set_sign(float32_zero
, p
.sign
);
4478 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4480 if (status
->flush_inputs_to_zero
) {
4483 float64_unpack_raw(&p
, a
);
4484 if (parts_squash_denormal(p
, status
)) {
4485 return float64_set_sign(float64_zero
, p
.sign
);
4491 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4493 if (status
->flush_inputs_to_zero
) {
4496 bfloat16_unpack_raw(&p
, a
);
4497 if (parts_squash_denormal(p
, status
)) {
4498 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4504 /*----------------------------------------------------------------------------
4505 | Normalizes the subnormal extended double-precision floating-point value
4506 | represented by the denormalized significand `aSig'. The normalized exponent
4507 | and significand are stored at the locations pointed to by `zExpPtr' and
4508 | `zSigPtr', respectively.
4509 *----------------------------------------------------------------------------*/
4511 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4516 shiftCount
= clz64(aSig
);
4517 *zSigPtr
= aSig
<<shiftCount
;
4518 *zExpPtr
= 1 - shiftCount
;
4521 /*----------------------------------------------------------------------------
4522 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4523 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4524 | and returns the proper extended double-precision floating-point value
4525 | corresponding to the abstract input. Ordinarily, the abstract value is
4526 | rounded and packed into the extended double-precision format, with the
4527 | inexact exception raised if the abstract input cannot be represented
4528 | exactly. However, if the abstract value is too large, the overflow and
4529 | inexact exceptions are raised and an infinity or maximal finite value is
4530 | returned. If the abstract value is too small, the input value is rounded to
4531 | a subnormal number, and the underflow and inexact exceptions are raised if
4532 | the abstract input cannot be represented exactly as a subnormal extended
4533 | double-precision floating-point number.
4534 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4535 | the result is rounded to the same number of bits as single or double
4536 | precision, respectively. Otherwise, the result is rounded to the full
4537 | precision of the extended double-precision format.
4538 | The input significand must be normalized or smaller. If the input
4539 | significand is not normalized, `zExp' must be 0; in that case, the result
4540 | returned is a subnormal number, and it must not require rounding. The
4541 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4542 | Floating-Point Arithmetic.
4543 *----------------------------------------------------------------------------*/
4545 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4546 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4547 float_status
*status
)
4549 FloatRoundMode roundingMode
;
4550 bool roundNearestEven
, increment
, isTiny
;
4551 int64_t roundIncrement
, roundMask
, roundBits
;
4553 roundingMode
= status
->float_rounding_mode
;
4554 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4555 switch (roundingPrecision
) {
4556 case floatx80_precision_x
:
4558 case floatx80_precision_d
:
4559 roundIncrement
= UINT64_C(0x0000000000000400);
4560 roundMask
= UINT64_C(0x00000000000007FF);
4562 case floatx80_precision_s
:
4563 roundIncrement
= UINT64_C(0x0000008000000000);
4564 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4567 g_assert_not_reached();
4569 zSig0
|= ( zSig1
!= 0 );
4570 switch (roundingMode
) {
4571 case float_round_nearest_even
:
4572 case float_round_ties_away
:
4574 case float_round_to_zero
:
4577 case float_round_up
:
4578 roundIncrement
= zSign
? 0 : roundMask
;
4580 case float_round_down
:
4581 roundIncrement
= zSign
? roundMask
: 0;
4586 roundBits
= zSig0
& roundMask
;
4587 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4588 if ( ( 0x7FFE < zExp
)
4589 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4594 if (status
->flush_to_zero
) {
4595 float_raise(float_flag_output_denormal
, status
);
4596 return packFloatx80(zSign
, 0, 0);
4598 isTiny
= status
->tininess_before_rounding
4600 || (zSig0
<= zSig0
+ roundIncrement
);
4601 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4603 roundBits
= zSig0
& roundMask
;
4604 if (isTiny
&& roundBits
) {
4605 float_raise(float_flag_underflow
, status
);
4608 float_raise(float_flag_inexact
, status
);
4610 zSig0
+= roundIncrement
;
4611 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4612 roundIncrement
= roundMask
+ 1;
4613 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4614 roundMask
|= roundIncrement
;
4616 zSig0
&= ~ roundMask
;
4617 return packFloatx80( zSign
, zExp
, zSig0
);
4621 float_raise(float_flag_inexact
, status
);
4623 zSig0
+= roundIncrement
;
4624 if ( zSig0
< roundIncrement
) {
4626 zSig0
= UINT64_C(0x8000000000000000);
4628 roundIncrement
= roundMask
+ 1;
4629 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4630 roundMask
|= roundIncrement
;
4632 zSig0
&= ~ roundMask
;
4633 if ( zSig0
== 0 ) zExp
= 0;
4634 return packFloatx80( zSign
, zExp
, zSig0
);
4636 switch (roundingMode
) {
4637 case float_round_nearest_even
:
4638 case float_round_ties_away
:
4639 increment
= ((int64_t)zSig1
< 0);
4641 case float_round_to_zero
:
4644 case float_round_up
:
4645 increment
= !zSign
&& zSig1
;
4647 case float_round_down
:
4648 increment
= zSign
&& zSig1
;
4653 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4654 if ( ( 0x7FFE < zExp
)
4655 || ( ( zExp
== 0x7FFE )
4656 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4662 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4663 if ( ( roundingMode
== float_round_to_zero
)
4664 || ( zSign
&& ( roundingMode
== float_round_up
) )
4665 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4667 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4669 return packFloatx80(zSign
,
4670 floatx80_infinity_high
,
4671 floatx80_infinity_low
);
4674 isTiny
= status
->tininess_before_rounding
4677 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4678 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4680 if (isTiny
&& zSig1
) {
4681 float_raise(float_flag_underflow
, status
);
4684 float_raise(float_flag_inexact
, status
);
4686 switch (roundingMode
) {
4687 case float_round_nearest_even
:
4688 case float_round_ties_away
:
4689 increment
= ((int64_t)zSig1
< 0);
4691 case float_round_to_zero
:
4694 case float_round_up
:
4695 increment
= !zSign
&& zSig1
;
4697 case float_round_down
:
4698 increment
= zSign
&& zSig1
;
4705 if (!(zSig1
<< 1) && roundNearestEven
) {
4708 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4710 return packFloatx80( zSign
, zExp
, zSig0
);
4714 float_raise(float_flag_inexact
, status
);
4720 zSig0
= UINT64_C(0x8000000000000000);
4723 if (!(zSig1
<< 1) && roundNearestEven
) {
4729 if ( zSig0
== 0 ) zExp
= 0;
4731 return packFloatx80( zSign
, zExp
, zSig0
);
4735 /*----------------------------------------------------------------------------
4736 | Takes an abstract floating-point value having sign `zSign', exponent
4737 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4738 | and returns the proper extended double-precision floating-point value
4739 | corresponding to the abstract input. This routine is just like
4740 | `roundAndPackFloatx80' except that the input significand does not have to be
4742 *----------------------------------------------------------------------------*/
4744 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
4745 bool zSign
, int32_t zExp
,
4746 uint64_t zSig0
, uint64_t zSig1
,
4747 float_status
*status
)
4756 shiftCount
= clz64(zSig0
);
4757 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4759 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
4760 zSig0
, zSig1
, status
);
4764 /*----------------------------------------------------------------------------
4765 | Returns the binary exponential of the single-precision floating-point value
4766 | `a'. The operation is performed according to the IEC/IEEE Standard for
4767 | Binary Floating-Point Arithmetic.
4769 | Uses the following identities:
4771 | 1. -------------------------------------------------------------------------
4775 | 2. -------------------------------------------------------------------------
4778 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4780 *----------------------------------------------------------------------------*/
4782 static const float64 float32_exp2_coefficients
[15] =
4784 const_float64( 0x3ff0000000000000ll
), /* 1 */
4785 const_float64( 0x3fe0000000000000ll
), /* 2 */
4786 const_float64( 0x3fc5555555555555ll
), /* 3 */
4787 const_float64( 0x3fa5555555555555ll
), /* 4 */
4788 const_float64( 0x3f81111111111111ll
), /* 5 */
4789 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
4790 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
4791 const_float64( 0x3efa01a01a01a01all
), /* 8 */
4792 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
4793 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
4794 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
4795 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
4796 const_float64( 0x3de6124613a86d09ll
), /* 13 */
4797 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
4798 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
4801 float32
float32_exp2(float32 a
, float_status
*status
)
4803 FloatParts64 xp
, xnp
, tp
, rp
;
4806 float32_unpack_canonical(&xp
, a
, status
);
4807 if (unlikely(xp
.cls
!= float_class_normal
)) {
4809 case float_class_snan
:
4810 case float_class_qnan
:
4811 parts_return_nan(&xp
, status
);
4812 return float32_round_pack_canonical(&xp
, status
);
4813 case float_class_inf
:
4814 return xp
.sign
? float32_zero
: a
;
4815 case float_class_zero
:
4820 g_assert_not_reached();
4823 float_raise(float_flag_inexact
, status
);
4825 float64_unpack_canonical(&tp
, float64_ln2
, status
);
4826 xp
= *parts_mul(&xp
, &tp
, status
);
4829 float64_unpack_canonical(&rp
, float64_one
, status
);
4830 for (i
= 0 ; i
< 15 ; i
++) {
4831 float64_unpack_canonical(&tp
, float32_exp2_coefficients
[i
], status
);
4832 rp
= *parts_muladd(&tp
, &xp
, &rp
, 0, status
);
4833 xnp
= *parts_mul(&xnp
, &xp
, status
);
4836 return float32_round_pack_canonical(&rp
, status
);
4839 /*----------------------------------------------------------------------------
4840 | Rounds the extended double-precision floating-point value `a'
4841 | to the precision provided by floatx80_rounding_precision and returns the
4842 | result as an extended double-precision floating-point value.
4843 | The operation is performed according to the IEC/IEEE Standard for Binary
4844 | Floating-Point Arithmetic.
4845 *----------------------------------------------------------------------------*/
4847 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
4851 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
4852 return floatx80_default_nan(status
);
4854 return floatx80_round_pack_canonical(&p
, status
);
4857 static void __attribute__((constructor
)) softfloat_init(void)
4859 union_float64 ua
, ub
, uc
, ur
;
4861 if (QEMU_NO_HARDFLOAT
) {
4865 * Test that the host's FMA is not obviously broken. For example,
4866 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4867 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4869 ua
.s
= 0x0020000000000001ULL
;
4870 ub
.s
= 0x3ca0000000000000ULL
;
4871 uc
.s
= 0x0020000000000000ULL
;
4872 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
4873 if (ur
.s
!= 0x0020000000000001ULL
) {
4874 force_soft_fma
= true;