Merge tag 'pull-target-arm-20240308' of https://git.linaro.org/people/pmaydell/qemu...
[qemu/armbru.git] / fpu / softfloat.c
blob027a8e576d368223235c77b67de650faf836f9ec
1 /*
2 * QEMU float support
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
100 * Hardfloat
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
235 static inline bool can_use_fpu(const float_status *s)
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
326 static inline bool f32_is_inf(union_float32 a)
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
331 return float32_is_infinity(a.s);
334 static inline bool f64_is_inf(union_float64 a)
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
339 return float64_is_infinity(a.s);
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
347 union_float32 ua, ub, ur;
349 ua.s = xa;
350 ub.s = xb;
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
367 return ur.s;
369 soft:
370 return soft(ua.s, ub.s, s);
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
378 union_float64 ua, ub, ur;
380 ua.s = xa;
381 ub.s = xb;
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
398 return ur.s;
400 soft:
401 return soft(ua.s, ub.s, s);
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
409 typedef enum __attribute__ ((__packed__)) {
410 float_class_unclassified,
411 float_class_zero,
412 float_class_normal,
413 float_class_inf,
414 float_class_qnan, /* all NaNs from here */
415 float_class_snan,
416 } FloatClass;
418 #define float_cmask(bit) (1u << (bit))
420 enum {
421 float_cmask_zero = float_cmask(float_class_zero),
422 float_cmask_normal = float_cmask(float_class_normal),
423 float_cmask_inf = float_cmask(float_class_inf),
424 float_cmask_qnan = float_cmask(float_class_qnan),
425 float_cmask_snan = float_cmask(float_class_snan),
427 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
431 /* Flags for parts_minmax. */
432 enum {
433 /* Set for minimum; clear for maximum. */
434 minmax_ismin = 1,
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436 minmax_isnum = 2,
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438 minmax_ismag = 4,
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441 * operations.
443 minmax_isnumber = 8,
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
449 return unlikely(c >= float_class_qnan);
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
454 return c == float_class_snan;
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
459 return c == float_class_qnan;
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
471 typedef struct {
472 FloatClass cls;
473 bool sign;
474 int32_t exp;
475 union {
476 /* Routines that know the structure may reference the singular name. */
477 uint64_t frac;
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
483 uint64_t frac_hi;
484 uint64_t frac_lo;
486 } FloatParts64;
488 typedef struct {
489 FloatClass cls;
490 bool sign;
491 int32_t exp;
492 uint64_t frac_hi;
493 uint64_t frac_lo;
494 } FloatParts128;
496 typedef struct {
497 FloatClass cls;
498 bool sign;
499 int32_t exp;
500 uint64_t frac_hi;
501 uint64_t frac_hm; /* high-middle */
502 uint64_t frac_lm; /* low-middle */
503 uint64_t frac_lo;
504 } FloatParts256;
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT 63
508 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * round_mask: bits below lsb which must be rounded
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
520 * m68k_denormal: explicit integer bit for extended precision may be 1
522 typedef struct {
523 int exp_size;
524 int exp_bias;
525 int exp_re_bias;
526 int exp_max;
527 int frac_size;
528 int frac_shift;
529 bool arm_althp;
530 bool m68k_denormal;
531 uint64_t round_mask;
532 } FloatFmt;
534 /* Expand fields based on the size of exponent and fraction */
535 #define FLOAT_PARAMS_(E) \
536 .exp_size = E, \
537 .exp_bias = ((1 << E) - 1) >> 1, \
538 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \
539 .exp_max = (1 << E) - 1
541 #define FLOAT_PARAMS(E, F) \
542 FLOAT_PARAMS_(E), \
543 .frac_size = F, \
544 .frac_shift = (-F - 1) & 63, \
545 .round_mask = (1ull << ((-F - 1) & 63)) - 1
547 static const FloatFmt float16_params = {
548 FLOAT_PARAMS(5, 10)
551 static const FloatFmt float16_params_ahp = {
552 FLOAT_PARAMS(5, 10),
553 .arm_althp = true
556 static const FloatFmt bfloat16_params = {
557 FLOAT_PARAMS(8, 7)
560 static const FloatFmt float32_params = {
561 FLOAT_PARAMS(8, 23)
564 static const FloatFmt float64_params = {
565 FLOAT_PARAMS(11, 52)
568 static const FloatFmt float128_params = {
569 FLOAT_PARAMS(15, 112)
572 #define FLOATX80_PARAMS(R) \
573 FLOAT_PARAMS_(15), \
574 .frac_size = R == 64 ? 63 : R, \
575 .frac_shift = 0, \
576 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
578 static const FloatFmt floatx80_params[3] = {
579 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
581 [floatx80_precision_x] = {
582 FLOATX80_PARAMS(64),
583 #ifdef TARGET_M68K
584 .m68k_denormal = true,
585 #endif
589 /* Unpack a float to parts, but do not canonicalize. */
590 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
592 const int f_size = fmt->frac_size;
593 const int e_size = fmt->exp_size;
595 *r = (FloatParts64) {
596 .cls = float_class_unclassified,
597 .sign = extract64(raw, f_size + e_size, 1),
598 .exp = extract64(raw, f_size, e_size),
599 .frac = extract64(raw, 0, f_size)
603 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
605 unpack_raw64(p, &float16_params, f);
608 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
610 unpack_raw64(p, &bfloat16_params, f);
613 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
615 unpack_raw64(p, &float32_params, f);
618 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
620 unpack_raw64(p, &float64_params, f);
623 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
625 *p = (FloatParts128) {
626 .cls = float_class_unclassified,
627 .sign = extract32(f.high, 15, 1),
628 .exp = extract32(f.high, 0, 15),
629 .frac_hi = f.low
633 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
635 const int f_size = float128_params.frac_size - 64;
636 const int e_size = float128_params.exp_size;
638 *p = (FloatParts128) {
639 .cls = float_class_unclassified,
640 .sign = extract64(f.high, f_size + e_size, 1),
641 .exp = extract64(f.high, f_size, e_size),
642 .frac_hi = extract64(f.high, 0, f_size),
643 .frac_lo = f.low,
647 /* Pack a float from parts, but do not canonicalize. */
648 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
650 const int f_size = fmt->frac_size;
651 const int e_size = fmt->exp_size;
652 uint64_t ret;
654 ret = (uint64_t)p->sign << (f_size + e_size);
655 ret = deposit64(ret, f_size, e_size, p->exp);
656 ret = deposit64(ret, 0, f_size, p->frac);
657 return ret;
660 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
662 return make_float16(pack_raw64(p, &float16_params));
665 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
667 return pack_raw64(p, &bfloat16_params);
670 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
672 return make_float32(pack_raw64(p, &float32_params));
675 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
677 return make_float64(pack_raw64(p, &float64_params));
680 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
682 const int f_size = float128_params.frac_size - 64;
683 const int e_size = float128_params.exp_size;
684 uint64_t hi;
686 hi = (uint64_t)p->sign << (f_size + e_size);
687 hi = deposit64(hi, f_size, e_size, p->exp);
688 hi = deposit64(hi, 0, f_size, p->frac_hi);
689 return make_float128(hi, p->frac_lo);
692 /*----------------------------------------------------------------------------
693 | Functions and definitions to determine: (1) whether tininess for underflow
694 | is detected before or after rounding by default, (2) what (if anything)
695 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
696 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697 | are propagated from function inputs to output. These details are target-
698 | specific.
699 *----------------------------------------------------------------------------*/
700 #include "softfloat-specialize.c.inc"
702 #define PARTS_GENERIC_64_128(NAME, P) \
703 _Generic((P), FloatParts64 *: parts64_##NAME, \
704 FloatParts128 *: parts128_##NAME)
706 #define PARTS_GENERIC_64_128_256(NAME, P) \
707 _Generic((P), FloatParts64 *: parts64_##NAME, \
708 FloatParts128 *: parts128_##NAME, \
709 FloatParts256 *: parts256_##NAME)
711 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
712 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
714 static void parts64_return_nan(FloatParts64 *a, float_status *s);
715 static void parts128_return_nan(FloatParts128 *a, float_status *s);
717 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
719 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720 float_status *s);
721 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722 float_status *s);
724 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
726 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727 FloatParts64 *c, float_status *s,
728 int ab_mask, int abc_mask);
729 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730 FloatParts128 *b,
731 FloatParts128 *c,
732 float_status *s,
733 int ab_mask, int abc_mask);
735 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
738 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739 const FloatFmt *fmt);
740 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741 const FloatFmt *fmt);
743 #define parts_canonicalize(A, S, F) \
744 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
746 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747 const FloatFmt *fmt);
748 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749 const FloatFmt *fmt);
751 #define parts_uncanon_normal(A, S, F) \
752 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
754 static void parts64_uncanon(FloatParts64 *p, float_status *status,
755 const FloatFmt *fmt);
756 static void parts128_uncanon(FloatParts128 *p, float_status *status,
757 const FloatFmt *fmt);
759 #define parts_uncanon(A, S, F) \
760 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
762 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
766 #define parts_add_normal(A, B) \
767 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
769 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
771 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
773 #define parts_sub_normal(A, B) \
774 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
776 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777 float_status *s, bool subtract);
778 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779 float_status *s, bool subtract);
781 #define parts_addsub(A, B, S, Z) \
782 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
784 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785 float_status *s);
786 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787 float_status *s);
789 #define parts_mul(A, B, S) \
790 PARTS_GENERIC_64_128(mul, A)(A, B, S)
792 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
793 FloatParts64 *c, int flags,
794 float_status *s);
795 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
796 FloatParts128 *c, int flags,
797 float_status *s);
799 #define parts_muladd(A, B, C, Z, S) \
800 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
802 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803 float_status *s);
804 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805 float_status *s);
807 #define parts_div(A, B, S) \
808 PARTS_GENERIC_64_128(div, A)(A, B, S)
810 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811 uint64_t *mod_quot, float_status *s);
812 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813 uint64_t *mod_quot, float_status *s);
815 #define parts_modrem(A, B, Q, S) \
816 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
818 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
821 #define parts_sqrt(A, S, F) \
822 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
824 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825 int scale, int frac_size);
826 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827 int scale, int frac_size);
829 #define parts_round_to_int_normal(A, R, C, F) \
830 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
832 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833 int scale, float_status *s,
834 const FloatFmt *fmt);
835 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836 int scale, float_status *s,
837 const FloatFmt *fmt);
839 #define parts_round_to_int(A, R, C, S, F) \
840 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
842 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843 int scale, int64_t min, int64_t max,
844 float_status *s);
845 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846 int scale, int64_t min, int64_t max,
847 float_status *s);
849 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
850 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
852 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853 int scale, uint64_t max,
854 float_status *s);
855 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856 int scale, uint64_t max,
857 float_status *s);
859 #define parts_float_to_uint(P, R, Z, M, S) \
860 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
862 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863 FloatRoundMode rmode,
864 int bitsm1, float_status *s);
865 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866 FloatRoundMode rmode,
867 int bitsm1, float_status *s);
869 #define parts_float_to_sint_modulo(P, R, M, S) \
870 PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
872 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873 int scale, float_status *s);
874 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875 int scale, float_status *s);
877 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
878 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
880 #define parts_sint_to_float(P, I, Z, S) \
881 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
883 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884 int scale, float_status *s);
885 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886 int scale, float_status *s);
888 #define parts_uint_to_float(P, I, Z, S) \
889 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
891 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892 float_status *s, int flags);
893 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894 float_status *s, int flags);
896 #define parts_minmax(A, B, S, F) \
897 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
899 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900 float_status *s, bool q);
901 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902 float_status *s, bool q);
904 #define parts_compare(A, B, S, Q) \
905 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
907 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
910 #define parts_scalbn(A, N, S) \
911 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
913 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
916 #define parts_log2(A, S, F) \
917 PARTS_GENERIC_64_128(log2, A)(A, S, F)
920 * Helper functions for softfloat-parts.c.inc, per-size operations.
923 #define FRAC_GENERIC_64_128(NAME, P) \
924 _Generic((P), FloatParts64 *: frac64_##NAME, \
925 FloatParts128 *: frac128_##NAME)
927 #define FRAC_GENERIC_64_128_256(NAME, P) \
928 _Generic((P), FloatParts64 *: frac64_##NAME, \
929 FloatParts128 *: frac128_##NAME, \
930 FloatParts256 *: frac256_##NAME)
932 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
934 return uadd64_overflow(a->frac, b->frac, &r->frac);
937 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
939 bool c = 0;
940 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942 return c;
945 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
947 bool c = 0;
948 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952 return c;
955 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
957 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
959 return uadd64_overflow(a->frac, c, &r->frac);
962 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
964 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
968 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
970 static void frac64_allones(FloatParts64 *a)
972 a->frac = -1;
975 static void frac128_allones(FloatParts128 *a)
977 a->frac_hi = a->frac_lo = -1;
980 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
982 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
984 return (a->frac == b->frac ? float_relation_equal
985 : a->frac < b->frac ? float_relation_less
986 : float_relation_greater);
989 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
991 uint64_t ta = a->frac_hi, tb = b->frac_hi;
992 if (ta == tb) {
993 ta = a->frac_lo, tb = b->frac_lo;
994 if (ta == tb) {
995 return float_relation_equal;
998 return ta < tb ? float_relation_less : float_relation_greater;
1001 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1003 static void frac64_clear(FloatParts64 *a)
1005 a->frac = 0;
1008 static void frac128_clear(FloatParts128 *a)
1010 a->frac_hi = a->frac_lo = 0;
1013 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1015 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1017 uint64_t n1, n0, r, q;
1018 bool ret;
1021 * We want a 2*N / N-bit division to produce exactly an N-bit
1022 * result, so that we do not lose any precision and so that we
1023 * do not have to renormalize afterward. If A.frac < B.frac,
1024 * then division would produce an (N-1)-bit result; shift A left
1025 * by one to produce the an N-bit result, and return true to
1026 * decrement the exponent to match.
1028 * The udiv_qrnnd algorithm that we're using requires normalization,
1029 * i.e. the msb of the denominator must be set, which is already true.
1031 ret = a->frac < b->frac;
1032 if (ret) {
1033 n0 = a->frac;
1034 n1 = 0;
1035 } else {
1036 n0 = a->frac >> 1;
1037 n1 = a->frac << 63;
1039 q = udiv_qrnnd(&r, n0, n1, b->frac);
1041 /* Set lsb if there is a remainder, to set inexact. */
1042 a->frac = q | (r != 0);
1044 return ret;
1047 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1049 uint64_t q0, q1, a0, a1, b0, b1;
1050 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051 bool ret = false;
1053 a0 = a->frac_hi, a1 = a->frac_lo;
1054 b0 = b->frac_hi, b1 = b->frac_lo;
1056 ret = lt128(a0, a1, b0, b1);
1057 if (!ret) {
1058 a1 = shr_double(a0, a1, 1);
1059 a0 = a0 >> 1;
1062 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063 q0 = estimateDiv128To64(a0, a1, b0);
1066 * Estimate is high because B1 was not included (unless B1 == 0).
1067 * Reduce quotient and increase remainder until remainder is non-negative.
1068 * This loop will execute 0 to 2 times.
1070 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072 while (r0 != 0) {
1073 q0--;
1074 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1077 /* Repeat using the remainder, producing a second word of quotient. */
1078 q1 = estimateDiv128To64(r1, r2, b0);
1079 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081 while (r1 != 0) {
1082 q1--;
1083 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1086 /* Any remainder indicates inexact; set sticky bit. */
1087 q1 |= (r2 | r3) != 0;
1089 a->frac_hi = q0;
1090 a->frac_lo = q1;
1091 return ret;
1094 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1096 static bool frac64_eqz(FloatParts64 *a)
1098 return a->frac == 0;
1101 static bool frac128_eqz(FloatParts128 *a)
1103 return (a->frac_hi | a->frac_lo) == 0;
1106 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1108 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1110 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1113 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1115 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1119 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1121 static void frac64_neg(FloatParts64 *a)
1123 a->frac = -a->frac;
1126 static void frac128_neg(FloatParts128 *a)
1128 bool c = 0;
1129 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1133 static void frac256_neg(FloatParts256 *a)
1135 bool c = 0;
1136 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1142 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1144 static int frac64_normalize(FloatParts64 *a)
1146 if (a->frac) {
1147 int shift = clz64(a->frac);
1148 a->frac <<= shift;
1149 return shift;
1151 return 64;
1154 static int frac128_normalize(FloatParts128 *a)
1156 if (a->frac_hi) {
1157 int shl = clz64(a->frac_hi);
1158 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159 a->frac_lo <<= shl;
1160 return shl;
1161 } else if (a->frac_lo) {
1162 int shl = clz64(a->frac_lo);
1163 a->frac_hi = a->frac_lo << shl;
1164 a->frac_lo = 0;
1165 return shl + 64;
1167 return 128;
1170 static int frac256_normalize(FloatParts256 *a)
1172 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174 int ret, shl;
1176 if (likely(a0)) {
1177 shl = clz64(a0);
1178 if (shl == 0) {
1179 return 0;
1181 ret = shl;
1182 } else {
1183 if (a1) {
1184 ret = 64;
1185 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186 } else if (a2) {
1187 ret = 128;
1188 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189 } else if (a3) {
1190 ret = 192;
1191 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192 } else {
1193 ret = 256;
1194 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195 goto done;
1197 shl = clz64(a0);
1198 if (shl == 0) {
1199 goto done;
1201 ret += shl;
1204 a0 = shl_double(a0, a1, shl);
1205 a1 = shl_double(a1, a2, shl);
1206 a2 = shl_double(a2, a3, shl);
1207 a3 <<= shl;
1209 done:
1210 a->frac_hi = a0;
1211 a->frac_hm = a1;
1212 a->frac_lm = a2;
1213 a->frac_lo = a3;
1214 return ret;
1217 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1219 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1221 uint64_t a0, a1, b0, t0, t1, q, quot;
1222 int exp_diff = a->exp - b->exp;
1223 int shift;
1225 a0 = a->frac;
1226 a1 = 0;
1228 if (exp_diff < -1) {
1229 if (mod_quot) {
1230 *mod_quot = 0;
1232 return;
1234 if (exp_diff == -1) {
1235 a0 >>= 1;
1236 exp_diff = 0;
1239 b0 = b->frac;
1240 quot = q = b0 <= a0;
1241 if (q) {
1242 a0 -= b0;
1245 exp_diff -= 64;
1246 while (exp_diff > 0) {
1247 q = estimateDiv128To64(a0, a1, b0);
1248 q = q > 2 ? q - 2 : 0;
1249 mul64To128(b0, q, &t0, &t1);
1250 sub128(a0, a1, t0, t1, &a0, &a1);
1251 shortShift128Left(a0, a1, 62, &a0, &a1);
1252 exp_diff -= 62;
1253 quot = (quot << 62) + q;
1256 exp_diff += 64;
1257 if (exp_diff > 0) {
1258 q = estimateDiv128To64(a0, a1, b0);
1259 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260 mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261 sub128(a0, a1, t0, t1, &a0, &a1);
1262 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263 while (le128(t0, t1, a0, a1)) {
1264 ++q;
1265 sub128(a0, a1, t0, t1, &a0, &a1);
1267 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268 } else {
1269 t0 = b0;
1270 t1 = 0;
1273 if (mod_quot) {
1274 *mod_quot = quot;
1275 } else {
1276 sub128(t0, t1, a0, a1, &t0, &t1);
1277 if (lt128(t0, t1, a0, a1) ||
1278 (eq128(t0, t1, a0, a1) && (q & 1))) {
1279 a0 = t0;
1280 a1 = t1;
1281 a->sign = !a->sign;
1285 if (likely(a0)) {
1286 shift = clz64(a0);
1287 shortShift128Left(a0, a1, shift, &a0, &a1);
1288 } else if (likely(a1)) {
1289 shift = clz64(a1);
1290 a0 = a1 << shift;
1291 a1 = 0;
1292 shift += 64;
1293 } else {
1294 a->cls = float_class_zero;
1295 return;
1298 a->exp = b->exp + exp_diff - shift;
1299 a->frac = a0 | (a1 != 0);
1302 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303 uint64_t *mod_quot)
1305 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306 int exp_diff = a->exp - b->exp;
1307 int shift;
1309 a0 = a->frac_hi;
1310 a1 = a->frac_lo;
1311 a2 = 0;
1313 if (exp_diff < -1) {
1314 if (mod_quot) {
1315 *mod_quot = 0;
1317 return;
1319 if (exp_diff == -1) {
1320 shift128Right(a0, a1, 1, &a0, &a1);
1321 exp_diff = 0;
1324 b0 = b->frac_hi;
1325 b1 = b->frac_lo;
1327 quot = q = le128(b0, b1, a0, a1);
1328 if (q) {
1329 sub128(a0, a1, b0, b1, &a0, &a1);
1332 exp_diff -= 64;
1333 while (exp_diff > 0) {
1334 q = estimateDiv128To64(a0, a1, b0);
1335 q = q > 4 ? q - 4 : 0;
1336 mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339 exp_diff -= 61;
1340 quot = (quot << 61) + q;
1343 exp_diff += 64;
1344 if (exp_diff > 0) {
1345 q = estimateDiv128To64(a0, a1, b0);
1346 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350 while (le192(t0, t1, t2, a0, a1, a2)) {
1351 ++q;
1352 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1354 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355 } else {
1356 t0 = b0;
1357 t1 = b1;
1358 t2 = 0;
1361 if (mod_quot) {
1362 *mod_quot = quot;
1363 } else {
1364 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365 if (lt192(t0, t1, t2, a0, a1, a2) ||
1366 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367 a0 = t0;
1368 a1 = t1;
1369 a2 = t2;
1370 a->sign = !a->sign;
1374 if (likely(a0)) {
1375 shift = clz64(a0);
1376 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377 } else if (likely(a1)) {
1378 shift = clz64(a1);
1379 shortShift128Left(a1, a2, shift, &a0, &a1);
1380 a2 = 0;
1381 shift += 64;
1382 } else if (likely(a2)) {
1383 shift = clz64(a2);
1384 a0 = a2 << shift;
1385 a1 = a2 = 0;
1386 shift += 128;
1387 } else {
1388 a->cls = float_class_zero;
1389 return;
1392 a->exp = b->exp + exp_diff - shift;
1393 a->frac_hi = a0;
1394 a->frac_lo = a1 | (a2 != 0);
1397 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1399 static void frac64_shl(FloatParts64 *a, int c)
1401 a->frac <<= c;
1404 static void frac128_shl(FloatParts128 *a, int c)
1406 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408 if (c & 64) {
1409 a0 = a1, a1 = 0;
1412 c &= 63;
1413 if (c) {
1414 a0 = shl_double(a0, a1, c);
1415 a1 = a1 << c;
1418 a->frac_hi = a0;
1419 a->frac_lo = a1;
1422 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1424 static void frac64_shr(FloatParts64 *a, int c)
1426 a->frac >>= c;
1429 static void frac128_shr(FloatParts128 *a, int c)
1431 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1433 if (c & 64) {
1434 a1 = a0, a0 = 0;
1437 c &= 63;
1438 if (c) {
1439 a1 = shr_double(a0, a1, c);
1440 a0 = a0 >> c;
1443 a->frac_hi = a0;
1444 a->frac_lo = a1;
1447 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1449 static void frac64_shrjam(FloatParts64 *a, int c)
1451 uint64_t a0 = a->frac;
1453 if (likely(c != 0)) {
1454 if (likely(c < 64)) {
1455 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456 } else {
1457 a0 = a0 != 0;
1459 a->frac = a0;
1463 static void frac128_shrjam(FloatParts128 *a, int c)
1465 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466 uint64_t sticky = 0;
1468 if (unlikely(c == 0)) {
1469 return;
1470 } else if (likely(c < 64)) {
1471 /* nothing */
1472 } else if (likely(c < 128)) {
1473 sticky = a1;
1474 a1 = a0;
1475 a0 = 0;
1476 c &= 63;
1477 if (c == 0) {
1478 goto done;
1480 } else {
1481 sticky = a0 | a1;
1482 a0 = a1 = 0;
1483 goto done;
1486 sticky |= shr_double(a1, 0, c);
1487 a1 = shr_double(a0, a1, c);
1488 a0 = a0 >> c;
1490 done:
1491 a->frac_lo = a1 | (sticky != 0);
1492 a->frac_hi = a0;
1495 static void frac256_shrjam(FloatParts256 *a, int c)
1497 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499 uint64_t sticky = 0;
1501 if (unlikely(c == 0)) {
1502 return;
1503 } else if (likely(c < 64)) {
1504 /* nothing */
1505 } else if (likely(c < 256)) {
1506 if (unlikely(c & 128)) {
1507 sticky |= a2 | a3;
1508 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1510 if (unlikely(c & 64)) {
1511 sticky |= a3;
1512 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1514 c &= 63;
1515 if (c == 0) {
1516 goto done;
1518 } else {
1519 sticky = a0 | a1 | a2 | a3;
1520 a0 = a1 = a2 = a3 = 0;
1521 goto done;
1524 sticky |= shr_double(a3, 0, c);
1525 a3 = shr_double(a2, a3, c);
1526 a2 = shr_double(a1, a2, c);
1527 a1 = shr_double(a0, a1, c);
1528 a0 = a0 >> c;
1530 done:
1531 a->frac_lo = a3 | (sticky != 0);
1532 a->frac_lm = a2;
1533 a->frac_hm = a1;
1534 a->frac_hi = a0;
1537 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1539 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1541 return usub64_overflow(a->frac, b->frac, &r->frac);
1544 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1546 bool c = 0;
1547 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549 return c;
1552 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1554 bool c = 0;
1555 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559 return c;
1562 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1564 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1566 r->frac = a->frac_hi | (a->frac_lo != 0);
1569 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1571 r->frac_hi = a->frac_hi;
1572 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1575 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1577 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1579 r->frac_hi = a->frac;
1580 r->frac_lo = 0;
1583 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1585 r->frac_hi = a->frac_hi;
1586 r->frac_hm = a->frac_lo;
1587 r->frac_lm = 0;
1588 r->frac_lo = 0;
1591 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1594 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1595 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596 * and thus MIT licenced.
1598 static const uint16_t rsqrt_tab[128] = {
1599 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1617 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1618 #define FloatPartsN glue(FloatParts,N)
1619 #define FloatPartsW glue(FloatParts,W)
1621 #define N 64
1622 #define W 128
1624 #include "softfloat-parts-addsub.c.inc"
1625 #include "softfloat-parts.c.inc"
1627 #undef N
1628 #undef W
1629 #define N 128
1630 #define W 256
1632 #include "softfloat-parts-addsub.c.inc"
1633 #include "softfloat-parts.c.inc"
1635 #undef N
1636 #undef W
1637 #define N 256
1639 #include "softfloat-parts-addsub.c.inc"
1641 #undef N
1642 #undef W
1643 #undef partsN
1644 #undef FloatPartsN
1645 #undef FloatPartsW
1648 * Pack/unpack routines with a specific FloatFmt.
1651 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652 float_status *s, const FloatFmt *params)
1654 float16_unpack_raw(p, f);
1655 parts_canonicalize(p, s, params);
1658 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659 float_status *s)
1661 float16a_unpack_canonical(p, f, s, &float16_params);
1664 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665 float_status *s)
1667 bfloat16_unpack_raw(p, f);
1668 parts_canonicalize(p, s, &bfloat16_params);
1671 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672 float_status *s,
1673 const FloatFmt *params)
1675 parts_uncanon(p, s, params);
1676 return float16_pack_raw(p);
1679 static float16 float16_round_pack_canonical(FloatParts64 *p,
1680 float_status *s)
1682 return float16a_round_pack_canonical(p, s, &float16_params);
1685 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686 float_status *s)
1688 parts_uncanon(p, s, &bfloat16_params);
1689 return bfloat16_pack_raw(p);
1692 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693 float_status *s)
1695 float32_unpack_raw(p, f);
1696 parts_canonicalize(p, s, &float32_params);
1699 static float32 float32_round_pack_canonical(FloatParts64 *p,
1700 float_status *s)
1702 parts_uncanon(p, s, &float32_params);
1703 return float32_pack_raw(p);
1706 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707 float_status *s)
1709 float64_unpack_raw(p, f);
1710 parts_canonicalize(p, s, &float64_params);
1713 static float64 float64_round_pack_canonical(FloatParts64 *p,
1714 float_status *s)
1716 parts_uncanon(p, s, &float64_params);
1717 return float64_pack_raw(p);
1720 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721 float_status *s)
1723 parts_uncanon(p, s, &float32_params);
1726 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727 * We need to adjust the fraction higher so that the least N bits are
1728 * zero, and the fraction is adjacent to the float64 implicit bit.
1730 switch (p->cls) {
1731 case float_class_normal:
1732 if (unlikely(p->exp == 0)) {
1734 * The result is denormal for float32, but can be represented
1735 * in normalized form for float64. Adjust, per canonicalize.
1737 int shift = frac_normalize(p);
1738 p->exp = (float32_params.frac_shift -
1739 float32_params.exp_bias - shift + 1 +
1740 float64_params.exp_bias);
1741 frac_shr(p, float64_params.frac_shift);
1742 } else {
1743 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744 p->exp += float64_params.exp_bias - float32_params.exp_bias;
1746 break;
1747 case float_class_snan:
1748 case float_class_qnan:
1749 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750 p->exp = float64_params.exp_max;
1751 break;
1752 case float_class_inf:
1753 p->exp = float64_params.exp_max;
1754 break;
1755 case float_class_zero:
1756 break;
1757 default:
1758 g_assert_not_reached();
1761 return float64_pack_raw(p);
1764 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765 float_status *s)
1767 float128_unpack_raw(p, f);
1768 parts_canonicalize(p, s, &float128_params);
1771 static float128 float128_round_pack_canonical(FloatParts128 *p,
1772 float_status *s)
1774 parts_uncanon(p, s, &float128_params);
1775 return float128_pack_raw(p);
1778 /* Returns false if the encoding is invalid. */
1779 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780 float_status *s)
1782 /* Ensure rounding precision is set before beginning. */
1783 switch (s->floatx80_rounding_precision) {
1784 case floatx80_precision_x:
1785 case floatx80_precision_d:
1786 case floatx80_precision_s:
1787 break;
1788 default:
1789 g_assert_not_reached();
1792 if (unlikely(floatx80_invalid_encoding(f))) {
1793 float_raise(float_flag_invalid, s);
1794 return false;
1797 floatx80_unpack_raw(p, f);
1799 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801 } else {
1802 /* The explicit integer bit is ignored, after invalid checks. */
1803 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804 p->cls = (p->frac_hi == 0 ? float_class_inf
1805 : parts_is_snan_frac(p->frac_hi, s)
1806 ? float_class_snan : float_class_qnan);
1808 return true;
1811 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812 float_status *s)
1814 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815 uint64_t frac;
1816 int exp;
1818 switch (p->cls) {
1819 case float_class_normal:
1820 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821 parts_uncanon_normal(p, s, fmt);
1822 frac = p->frac_hi;
1823 exp = p->exp;
1824 } else {
1825 FloatParts64 p64;
1827 p64.sign = p->sign;
1828 p64.exp = p->exp;
1829 frac_truncjam(&p64, p);
1830 parts_uncanon_normal(&p64, s, fmt);
1831 frac = p64.frac;
1832 exp = p64.exp;
1834 if (exp != fmt->exp_max) {
1835 break;
1837 /* rounded to inf -- fall through to set frac correctly */
1839 case float_class_inf:
1840 /* x86 and m68k differ in the setting of the integer bit. */
1841 frac = floatx80_infinity_low;
1842 exp = fmt->exp_max;
1843 break;
1845 case float_class_zero:
1846 frac = 0;
1847 exp = 0;
1848 break;
1850 case float_class_snan:
1851 case float_class_qnan:
1852 /* NaNs have the integer bit set. */
1853 frac = p->frac_hi | (1ull << 63);
1854 exp = fmt->exp_max;
1855 break;
1857 default:
1858 g_assert_not_reached();
1861 return packFloatx80(p->sign, exp, frac);
1865 * Addition and subtraction
1868 static float16 QEMU_FLATTEN
1869 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1871 FloatParts64 pa, pb, *pr;
1873 float16_unpack_canonical(&pa, a, status);
1874 float16_unpack_canonical(&pb, b, status);
1875 pr = parts_addsub(&pa, &pb, status, subtract);
1877 return float16_round_pack_canonical(pr, status);
1880 float16 float16_add(float16 a, float16 b, float_status *status)
1882 return float16_addsub(a, b, status, false);
1885 float16 float16_sub(float16 a, float16 b, float_status *status)
1887 return float16_addsub(a, b, status, true);
1890 static float32 QEMU_SOFTFLOAT_ATTR
1891 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1893 FloatParts64 pa, pb, *pr;
1895 float32_unpack_canonical(&pa, a, status);
1896 float32_unpack_canonical(&pb, b, status);
1897 pr = parts_addsub(&pa, &pb, status, subtract);
1899 return float32_round_pack_canonical(pr, status);
1902 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1904 return soft_f32_addsub(a, b, status, false);
1907 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1909 return soft_f32_addsub(a, b, status, true);
1912 static float64 QEMU_SOFTFLOAT_ATTR
1913 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1915 FloatParts64 pa, pb, *pr;
1917 float64_unpack_canonical(&pa, a, status);
1918 float64_unpack_canonical(&pb, b, status);
1919 pr = parts_addsub(&pa, &pb, status, subtract);
1921 return float64_round_pack_canonical(pr, status);
1924 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1926 return soft_f64_addsub(a, b, status, false);
1929 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1931 return soft_f64_addsub(a, b, status, true);
1934 static float hard_f32_add(float a, float b)
1936 return a + b;
1939 static float hard_f32_sub(float a, float b)
1941 return a - b;
1944 static double hard_f64_add(double a, double b)
1946 return a + b;
1949 static double hard_f64_sub(double a, double b)
1951 return a - b;
1954 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1956 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1959 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1962 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1964 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966 } else {
1967 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1971 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1974 return float32_gen2(a, b, s, hard, soft,
1975 f32_is_zon2, f32_addsubmul_post);
1978 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1981 return float64_gen2(a, b, s, hard, soft,
1982 f64_is_zon2, f64_addsubmul_post);
1985 float32 QEMU_FLATTEN
1986 float32_add(float32 a, float32 b, float_status *s)
1988 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1991 float32 QEMU_FLATTEN
1992 float32_sub(float32 a, float32 b, float_status *s)
1994 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1997 float64 QEMU_FLATTEN
1998 float64_add(float64 a, float64 b, float_status *s)
2000 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2003 float64 QEMU_FLATTEN
2004 float64_sub(float64 a, float64 b, float_status *s)
2006 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2009 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010 bool subtract)
2012 FloatParts64 pa, pb, *pr;
2014 float64_unpack_canonical(&pa, a, status);
2015 float64_unpack_canonical(&pb, b, status);
2016 pr = parts_addsub(&pa, &pb, status, subtract);
2018 return float64r32_round_pack_canonical(pr, status);
2021 float64 float64r32_add(float64 a, float64 b, float_status *status)
2023 return float64r32_addsub(a, b, status, false);
2026 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2028 return float64r32_addsub(a, b, status, true);
2031 static bfloat16 QEMU_FLATTEN
2032 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2034 FloatParts64 pa, pb, *pr;
2036 bfloat16_unpack_canonical(&pa, a, status);
2037 bfloat16_unpack_canonical(&pb, b, status);
2038 pr = parts_addsub(&pa, &pb, status, subtract);
2040 return bfloat16_round_pack_canonical(pr, status);
2043 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2045 return bfloat16_addsub(a, b, status, false);
2048 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2050 return bfloat16_addsub(a, b, status, true);
2053 static float128 QEMU_FLATTEN
2054 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2056 FloatParts128 pa, pb, *pr;
2058 float128_unpack_canonical(&pa, a, status);
2059 float128_unpack_canonical(&pb, b, status);
2060 pr = parts_addsub(&pa, &pb, status, subtract);
2062 return float128_round_pack_canonical(pr, status);
2065 float128 float128_add(float128 a, float128 b, float_status *status)
2067 return float128_addsub(a, b, status, false);
2070 float128 float128_sub(float128 a, float128 b, float_status *status)
2072 return float128_addsub(a, b, status, true);
2075 static floatx80 QEMU_FLATTEN
2076 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2078 FloatParts128 pa, pb, *pr;
2080 if (!floatx80_unpack_canonical(&pa, a, status) ||
2081 !floatx80_unpack_canonical(&pb, b, status)) {
2082 return floatx80_default_nan(status);
2085 pr = parts_addsub(&pa, &pb, status, subtract);
2086 return floatx80_round_pack_canonical(pr, status);
2089 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2091 return floatx80_addsub(a, b, status, false);
2094 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2096 return floatx80_addsub(a, b, status, true);
2100 * Multiplication
2103 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2105 FloatParts64 pa, pb, *pr;
2107 float16_unpack_canonical(&pa, a, status);
2108 float16_unpack_canonical(&pb, b, status);
2109 pr = parts_mul(&pa, &pb, status);
2111 return float16_round_pack_canonical(pr, status);
2114 static float32 QEMU_SOFTFLOAT_ATTR
2115 soft_f32_mul(float32 a, float32 b, float_status *status)
2117 FloatParts64 pa, pb, *pr;
2119 float32_unpack_canonical(&pa, a, status);
2120 float32_unpack_canonical(&pb, b, status);
2121 pr = parts_mul(&pa, &pb, status);
2123 return float32_round_pack_canonical(pr, status);
2126 static float64 QEMU_SOFTFLOAT_ATTR
2127 soft_f64_mul(float64 a, float64 b, float_status *status)
2129 FloatParts64 pa, pb, *pr;
2131 float64_unpack_canonical(&pa, a, status);
2132 float64_unpack_canonical(&pb, b, status);
2133 pr = parts_mul(&pa, &pb, status);
2135 return float64_round_pack_canonical(pr, status);
2138 static float hard_f32_mul(float a, float b)
2140 return a * b;
2143 static double hard_f64_mul(double a, double b)
2145 return a * b;
2148 float32 QEMU_FLATTEN
2149 float32_mul(float32 a, float32 b, float_status *s)
2151 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152 f32_is_zon2, f32_addsubmul_post);
2155 float64 QEMU_FLATTEN
2156 float64_mul(float64 a, float64 b, float_status *s)
2158 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159 f64_is_zon2, f64_addsubmul_post);
2162 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2164 FloatParts64 pa, pb, *pr;
2166 float64_unpack_canonical(&pa, a, status);
2167 float64_unpack_canonical(&pb, b, status);
2168 pr = parts_mul(&pa, &pb, status);
2170 return float64r32_round_pack_canonical(pr, status);
2173 bfloat16 QEMU_FLATTEN
2174 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2176 FloatParts64 pa, pb, *pr;
2178 bfloat16_unpack_canonical(&pa, a, status);
2179 bfloat16_unpack_canonical(&pb, b, status);
2180 pr = parts_mul(&pa, &pb, status);
2182 return bfloat16_round_pack_canonical(pr, status);
2185 float128 QEMU_FLATTEN
2186 float128_mul(float128 a, float128 b, float_status *status)
2188 FloatParts128 pa, pb, *pr;
2190 float128_unpack_canonical(&pa, a, status);
2191 float128_unpack_canonical(&pb, b, status);
2192 pr = parts_mul(&pa, &pb, status);
2194 return float128_round_pack_canonical(pr, status);
2197 floatx80 QEMU_FLATTEN
2198 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2200 FloatParts128 pa, pb, *pr;
2202 if (!floatx80_unpack_canonical(&pa, a, status) ||
2203 !floatx80_unpack_canonical(&pb, b, status)) {
2204 return floatx80_default_nan(status);
2207 pr = parts_mul(&pa, &pb, status);
2208 return floatx80_round_pack_canonical(pr, status);
2212 * Fused multiply-add
2215 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2216 int flags, float_status *status)
2218 FloatParts64 pa, pb, pc, *pr;
2220 float16_unpack_canonical(&pa, a, status);
2221 float16_unpack_canonical(&pb, b, status);
2222 float16_unpack_canonical(&pc, c, status);
2223 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2225 return float16_round_pack_canonical(pr, status);
2228 static float32 QEMU_SOFTFLOAT_ATTR
2229 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2230 float_status *status)
2232 FloatParts64 pa, pb, pc, *pr;
2234 float32_unpack_canonical(&pa, a, status);
2235 float32_unpack_canonical(&pb, b, status);
2236 float32_unpack_canonical(&pc, c, status);
2237 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2239 return float32_round_pack_canonical(pr, status);
2242 static float64 QEMU_SOFTFLOAT_ATTR
2243 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2244 float_status *status)
2246 FloatParts64 pa, pb, pc, *pr;
2248 float64_unpack_canonical(&pa, a, status);
2249 float64_unpack_canonical(&pb, b, status);
2250 float64_unpack_canonical(&pc, c, status);
2251 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2253 return float64_round_pack_canonical(pr, status);
2256 static bool force_soft_fma;
2258 float32 QEMU_FLATTEN
2259 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2261 union_float32 ua, ub, uc, ur;
2263 ua.s = xa;
2264 ub.s = xb;
2265 uc.s = xc;
2267 if (unlikely(!can_use_fpu(s))) {
2268 goto soft;
2270 if (unlikely(flags & float_muladd_halve_result)) {
2271 goto soft;
2274 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2275 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2276 goto soft;
2279 if (unlikely(force_soft_fma)) {
2280 goto soft;
2284 * When (a || b) == 0, there's no need to check for under/over flow,
2285 * since we know the addend is (normal || 0) and the product is 0.
2287 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2288 union_float32 up;
2289 bool prod_sign;
2291 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2292 prod_sign ^= !!(flags & float_muladd_negate_product);
2293 up.s = float32_set_sign(float32_zero, prod_sign);
2295 if (flags & float_muladd_negate_c) {
2296 uc.h = -uc.h;
2298 ur.h = up.h + uc.h;
2299 } else {
2300 union_float32 ua_orig = ua;
2301 union_float32 uc_orig = uc;
2303 if (flags & float_muladd_negate_product) {
2304 ua.h = -ua.h;
2306 if (flags & float_muladd_negate_c) {
2307 uc.h = -uc.h;
2310 ur.h = fmaf(ua.h, ub.h, uc.h);
2312 if (unlikely(f32_is_inf(ur))) {
2313 float_raise(float_flag_overflow, s);
2314 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2315 ua = ua_orig;
2316 uc = uc_orig;
2317 goto soft;
2320 if (flags & float_muladd_negate_result) {
2321 return float32_chs(ur.s);
2323 return ur.s;
2325 soft:
2326 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2329 float64 QEMU_FLATTEN
2330 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2332 union_float64 ua, ub, uc, ur;
2334 ua.s = xa;
2335 ub.s = xb;
2336 uc.s = xc;
2338 if (unlikely(!can_use_fpu(s))) {
2339 goto soft;
2341 if (unlikely(flags & float_muladd_halve_result)) {
2342 goto soft;
2345 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2346 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2347 goto soft;
2350 if (unlikely(force_soft_fma)) {
2351 goto soft;
2355 * When (a || b) == 0, there's no need to check for under/over flow,
2356 * since we know the addend is (normal || 0) and the product is 0.
2358 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2359 union_float64 up;
2360 bool prod_sign;
2362 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2363 prod_sign ^= !!(flags & float_muladd_negate_product);
2364 up.s = float64_set_sign(float64_zero, prod_sign);
2366 if (flags & float_muladd_negate_c) {
2367 uc.h = -uc.h;
2369 ur.h = up.h + uc.h;
2370 } else {
2371 union_float64 ua_orig = ua;
2372 union_float64 uc_orig = uc;
2374 if (flags & float_muladd_negate_product) {
2375 ua.h = -ua.h;
2377 if (flags & float_muladd_negate_c) {
2378 uc.h = -uc.h;
2381 ur.h = fma(ua.h, ub.h, uc.h);
2383 if (unlikely(f64_is_inf(ur))) {
2384 float_raise(float_flag_overflow, s);
2385 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2386 ua = ua_orig;
2387 uc = uc_orig;
2388 goto soft;
2391 if (flags & float_muladd_negate_result) {
2392 return float64_chs(ur.s);
2394 return ur.s;
2396 soft:
2397 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2400 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2401 int flags, float_status *status)
2403 FloatParts64 pa, pb, pc, *pr;
2405 float64_unpack_canonical(&pa, a, status);
2406 float64_unpack_canonical(&pb, b, status);
2407 float64_unpack_canonical(&pc, c, status);
2408 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2410 return float64r32_round_pack_canonical(pr, status);
2413 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2414 int flags, float_status *status)
2416 FloatParts64 pa, pb, pc, *pr;
2418 bfloat16_unpack_canonical(&pa, a, status);
2419 bfloat16_unpack_canonical(&pb, b, status);
2420 bfloat16_unpack_canonical(&pc, c, status);
2421 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2423 return bfloat16_round_pack_canonical(pr, status);
2426 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2427 int flags, float_status *status)
2429 FloatParts128 pa, pb, pc, *pr;
2431 float128_unpack_canonical(&pa, a, status);
2432 float128_unpack_canonical(&pb, b, status);
2433 float128_unpack_canonical(&pc, c, status);
2434 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2436 return float128_round_pack_canonical(pr, status);
2440 * Division
2443 float16 float16_div(float16 a, float16 b, float_status *status)
2445 FloatParts64 pa, pb, *pr;
2447 float16_unpack_canonical(&pa, a, status);
2448 float16_unpack_canonical(&pb, b, status);
2449 pr = parts_div(&pa, &pb, status);
2451 return float16_round_pack_canonical(pr, status);
2454 static float32 QEMU_SOFTFLOAT_ATTR
2455 soft_f32_div(float32 a, float32 b, float_status *status)
2457 FloatParts64 pa, pb, *pr;
2459 float32_unpack_canonical(&pa, a, status);
2460 float32_unpack_canonical(&pb, b, status);
2461 pr = parts_div(&pa, &pb, status);
2463 return float32_round_pack_canonical(pr, status);
2466 static float64 QEMU_SOFTFLOAT_ATTR
2467 soft_f64_div(float64 a, float64 b, float_status *status)
2469 FloatParts64 pa, pb, *pr;
2471 float64_unpack_canonical(&pa, a, status);
2472 float64_unpack_canonical(&pb, b, status);
2473 pr = parts_div(&pa, &pb, status);
2475 return float64_round_pack_canonical(pr, status);
2478 static float hard_f32_div(float a, float b)
2480 return a / b;
2483 static double hard_f64_div(double a, double b)
2485 return a / b;
2488 static bool f32_div_pre(union_float32 a, union_float32 b)
2490 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2491 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2492 fpclassify(b.h) == FP_NORMAL;
2494 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2497 static bool f64_div_pre(union_float64 a, union_float64 b)
2499 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2500 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2501 fpclassify(b.h) == FP_NORMAL;
2503 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2506 static bool f32_div_post(union_float32 a, union_float32 b)
2508 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2509 return fpclassify(a.h) != FP_ZERO;
2511 return !float32_is_zero(a.s);
2514 static bool f64_div_post(union_float64 a, union_float64 b)
2516 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2517 return fpclassify(a.h) != FP_ZERO;
2519 return !float64_is_zero(a.s);
2522 float32 QEMU_FLATTEN
2523 float32_div(float32 a, float32 b, float_status *s)
2525 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2526 f32_div_pre, f32_div_post);
2529 float64 QEMU_FLATTEN
2530 float64_div(float64 a, float64 b, float_status *s)
2532 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2533 f64_div_pre, f64_div_post);
2536 float64 float64r32_div(float64 a, float64 b, float_status *status)
2538 FloatParts64 pa, pb, *pr;
2540 float64_unpack_canonical(&pa, a, status);
2541 float64_unpack_canonical(&pb, b, status);
2542 pr = parts_div(&pa, &pb, status);
2544 return float64r32_round_pack_canonical(pr, status);
2547 bfloat16 QEMU_FLATTEN
2548 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2550 FloatParts64 pa, pb, *pr;
2552 bfloat16_unpack_canonical(&pa, a, status);
2553 bfloat16_unpack_canonical(&pb, b, status);
2554 pr = parts_div(&pa, &pb, status);
2556 return bfloat16_round_pack_canonical(pr, status);
2559 float128 QEMU_FLATTEN
2560 float128_div(float128 a, float128 b, float_status *status)
2562 FloatParts128 pa, pb, *pr;
2564 float128_unpack_canonical(&pa, a, status);
2565 float128_unpack_canonical(&pb, b, status);
2566 pr = parts_div(&pa, &pb, status);
2568 return float128_round_pack_canonical(pr, status);
2571 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2573 FloatParts128 pa, pb, *pr;
2575 if (!floatx80_unpack_canonical(&pa, a, status) ||
2576 !floatx80_unpack_canonical(&pb, b, status)) {
2577 return floatx80_default_nan(status);
2580 pr = parts_div(&pa, &pb, status);
2581 return floatx80_round_pack_canonical(pr, status);
2585 * Remainder
2588 float32 float32_rem(float32 a, float32 b, float_status *status)
2590 FloatParts64 pa, pb, *pr;
2592 float32_unpack_canonical(&pa, a, status);
2593 float32_unpack_canonical(&pb, b, status);
2594 pr = parts_modrem(&pa, &pb, NULL, status);
2596 return float32_round_pack_canonical(pr, status);
2599 float64 float64_rem(float64 a, float64 b, float_status *status)
2601 FloatParts64 pa, pb, *pr;
2603 float64_unpack_canonical(&pa, a, status);
2604 float64_unpack_canonical(&pb, b, status);
2605 pr = parts_modrem(&pa, &pb, NULL, status);
2607 return float64_round_pack_canonical(pr, status);
2610 float128 float128_rem(float128 a, float128 b, float_status *status)
2612 FloatParts128 pa, pb, *pr;
2614 float128_unpack_canonical(&pa, a, status);
2615 float128_unpack_canonical(&pb, b, status);
2616 pr = parts_modrem(&pa, &pb, NULL, status);
2618 return float128_round_pack_canonical(pr, status);
2622 * Returns the remainder of the extended double-precision floating-point value
2623 * `a' with respect to the corresponding value `b'.
2624 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2625 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2626 * the remainder based on truncating the quotient toward zero instead and
2627 * *quotient is set to the low 64 bits of the absolute value of the integer
2628 * quotient.
2630 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2631 uint64_t *quotient, float_status *status)
2633 FloatParts128 pa, pb, *pr;
2635 *quotient = 0;
2636 if (!floatx80_unpack_canonical(&pa, a, status) ||
2637 !floatx80_unpack_canonical(&pb, b, status)) {
2638 return floatx80_default_nan(status);
2640 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2642 return floatx80_round_pack_canonical(pr, status);
2645 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2647 uint64_t quotient;
2648 return floatx80_modrem(a, b, false, &quotient, status);
2651 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2653 uint64_t quotient;
2654 return floatx80_modrem(a, b, true, &quotient, status);
2658 * Float to Float conversions
2660 * Returns the result of converting one float format to another. The
2661 * conversion is performed according to the IEC/IEEE Standard for
2662 * Binary Floating-Point Arithmetic.
2664 * Usually this only needs to take care of raising invalid exceptions
2665 * and handling the conversion on NaNs.
2668 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2670 switch (a->cls) {
2671 case float_class_snan:
2672 float_raise(float_flag_invalid_snan, s);
2673 /* fall through */
2674 case float_class_qnan:
2676 * There is no NaN in the destination format. Raise Invalid
2677 * and return a zero with the sign of the input NaN.
2679 float_raise(float_flag_invalid, s);
2680 a->cls = float_class_zero;
2681 break;
2683 case float_class_inf:
2685 * There is no Inf in the destination format. Raise Invalid
2686 * and return the maximum normal with the correct sign.
2688 float_raise(float_flag_invalid, s);
2689 a->cls = float_class_normal;
2690 a->exp = float16_params_ahp.exp_max;
2691 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2692 float16_params_ahp.frac_size + 1);
2693 break;
2695 case float_class_normal:
2696 case float_class_zero:
2697 break;
2699 default:
2700 g_assert_not_reached();
2704 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2706 if (is_nan(a->cls)) {
2707 parts_return_nan(a, s);
2711 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2713 if (is_nan(a->cls)) {
2714 parts_return_nan(a, s);
2718 #define parts_float_to_float(P, S) \
2719 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2721 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2722 float_status *s)
2724 a->cls = b->cls;
2725 a->sign = b->sign;
2726 a->exp = b->exp;
2728 if (a->cls == float_class_normal) {
2729 frac_truncjam(a, b);
2730 } else if (is_nan(a->cls)) {
2731 /* Discard the low bits of the NaN. */
2732 a->frac = b->frac_hi;
2733 parts_return_nan(a, s);
2737 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2738 float_status *s)
2740 a->cls = b->cls;
2741 a->sign = b->sign;
2742 a->exp = b->exp;
2743 frac_widen(a, b);
2745 if (is_nan(a->cls)) {
2746 parts_return_nan(a, s);
2750 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2752 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2753 FloatParts64 p;
2755 float16a_unpack_canonical(&p, a, s, fmt16);
2756 parts_float_to_float(&p, s);
2757 return float32_round_pack_canonical(&p, s);
2760 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2762 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2763 FloatParts64 p;
2765 float16a_unpack_canonical(&p, a, s, fmt16);
2766 parts_float_to_float(&p, s);
2767 return float64_round_pack_canonical(&p, s);
2770 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2772 FloatParts64 p;
2773 const FloatFmt *fmt;
2775 float32_unpack_canonical(&p, a, s);
2776 if (ieee) {
2777 parts_float_to_float(&p, s);
2778 fmt = &float16_params;
2779 } else {
2780 parts_float_to_ahp(&p, s);
2781 fmt = &float16_params_ahp;
2783 return float16a_round_pack_canonical(&p, s, fmt);
2786 static float64 QEMU_SOFTFLOAT_ATTR
2787 soft_float32_to_float64(float32 a, float_status *s)
2789 FloatParts64 p;
2791 float32_unpack_canonical(&p, a, s);
2792 parts_float_to_float(&p, s);
2793 return float64_round_pack_canonical(&p, s);
2796 float64 float32_to_float64(float32 a, float_status *s)
2798 if (likely(float32_is_normal(a))) {
2799 /* Widening conversion can never produce inexact results. */
2800 union_float32 uf;
2801 union_float64 ud;
2802 uf.s = a;
2803 ud.h = uf.h;
2804 return ud.s;
2805 } else if (float32_is_zero(a)) {
2806 return float64_set_sign(float64_zero, float32_is_neg(a));
2807 } else {
2808 return soft_float32_to_float64(a, s);
2812 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2814 FloatParts64 p;
2815 const FloatFmt *fmt;
2817 float64_unpack_canonical(&p, a, s);
2818 if (ieee) {
2819 parts_float_to_float(&p, s);
2820 fmt = &float16_params;
2821 } else {
2822 parts_float_to_ahp(&p, s);
2823 fmt = &float16_params_ahp;
2825 return float16a_round_pack_canonical(&p, s, fmt);
2828 float32 float64_to_float32(float64 a, float_status *s)
2830 FloatParts64 p;
2832 float64_unpack_canonical(&p, a, s);
2833 parts_float_to_float(&p, s);
2834 return float32_round_pack_canonical(&p, s);
2837 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2839 FloatParts64 p;
2841 bfloat16_unpack_canonical(&p, a, s);
2842 parts_float_to_float(&p, s);
2843 return float32_round_pack_canonical(&p, s);
2846 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2848 FloatParts64 p;
2850 bfloat16_unpack_canonical(&p, a, s);
2851 parts_float_to_float(&p, s);
2852 return float64_round_pack_canonical(&p, s);
2855 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2857 FloatParts64 p;
2859 float32_unpack_canonical(&p, a, s);
2860 parts_float_to_float(&p, s);
2861 return bfloat16_round_pack_canonical(&p, s);
2864 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2866 FloatParts64 p;
2868 float64_unpack_canonical(&p, a, s);
2869 parts_float_to_float(&p, s);
2870 return bfloat16_round_pack_canonical(&p, s);
2873 float32 float128_to_float32(float128 a, float_status *s)
2875 FloatParts64 p64;
2876 FloatParts128 p128;
2878 float128_unpack_canonical(&p128, a, s);
2879 parts_float_to_float_narrow(&p64, &p128, s);
2880 return float32_round_pack_canonical(&p64, s);
2883 float64 float128_to_float64(float128 a, float_status *s)
2885 FloatParts64 p64;
2886 FloatParts128 p128;
2888 float128_unpack_canonical(&p128, a, s);
2889 parts_float_to_float_narrow(&p64, &p128, s);
2890 return float64_round_pack_canonical(&p64, s);
2893 float128 float32_to_float128(float32 a, float_status *s)
2895 FloatParts64 p64;
2896 FloatParts128 p128;
2898 float32_unpack_canonical(&p64, a, s);
2899 parts_float_to_float_widen(&p128, &p64, s);
2900 return float128_round_pack_canonical(&p128, s);
2903 float128 float64_to_float128(float64 a, float_status *s)
2905 FloatParts64 p64;
2906 FloatParts128 p128;
2908 float64_unpack_canonical(&p64, a, s);
2909 parts_float_to_float_widen(&p128, &p64, s);
2910 return float128_round_pack_canonical(&p128, s);
2913 float32 floatx80_to_float32(floatx80 a, float_status *s)
2915 FloatParts64 p64;
2916 FloatParts128 p128;
2918 if (floatx80_unpack_canonical(&p128, a, s)) {
2919 parts_float_to_float_narrow(&p64, &p128, s);
2920 } else {
2921 parts_default_nan(&p64, s);
2923 return float32_round_pack_canonical(&p64, s);
2926 float64 floatx80_to_float64(floatx80 a, float_status *s)
2928 FloatParts64 p64;
2929 FloatParts128 p128;
2931 if (floatx80_unpack_canonical(&p128, a, s)) {
2932 parts_float_to_float_narrow(&p64, &p128, s);
2933 } else {
2934 parts_default_nan(&p64, s);
2936 return float64_round_pack_canonical(&p64, s);
2939 float128 floatx80_to_float128(floatx80 a, float_status *s)
2941 FloatParts128 p;
2943 if (floatx80_unpack_canonical(&p, a, s)) {
2944 parts_float_to_float(&p, s);
2945 } else {
2946 parts_default_nan(&p, s);
2948 return float128_round_pack_canonical(&p, s);
2951 floatx80 float32_to_floatx80(float32 a, float_status *s)
2953 FloatParts64 p64;
2954 FloatParts128 p128;
2956 float32_unpack_canonical(&p64, a, s);
2957 parts_float_to_float_widen(&p128, &p64, s);
2958 return floatx80_round_pack_canonical(&p128, s);
2961 floatx80 float64_to_floatx80(float64 a, float_status *s)
2963 FloatParts64 p64;
2964 FloatParts128 p128;
2966 float64_unpack_canonical(&p64, a, s);
2967 parts_float_to_float_widen(&p128, &p64, s);
2968 return floatx80_round_pack_canonical(&p128, s);
2971 floatx80 float128_to_floatx80(float128 a, float_status *s)
2973 FloatParts128 p;
2975 float128_unpack_canonical(&p, a, s);
2976 parts_float_to_float(&p, s);
2977 return floatx80_round_pack_canonical(&p, s);
2981 * Round to integral value
2984 float16 float16_round_to_int(float16 a, float_status *s)
2986 FloatParts64 p;
2988 float16_unpack_canonical(&p, a, s);
2989 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2990 return float16_round_pack_canonical(&p, s);
2993 float32 float32_round_to_int(float32 a, float_status *s)
2995 FloatParts64 p;
2997 float32_unpack_canonical(&p, a, s);
2998 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2999 return float32_round_pack_canonical(&p, s);
3002 float64 float64_round_to_int(float64 a, float_status *s)
3004 FloatParts64 p;
3006 float64_unpack_canonical(&p, a, s);
3007 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3008 return float64_round_pack_canonical(&p, s);
3011 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3013 FloatParts64 p;
3015 bfloat16_unpack_canonical(&p, a, s);
3016 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3017 return bfloat16_round_pack_canonical(&p, s);
3020 float128 float128_round_to_int(float128 a, float_status *s)
3022 FloatParts128 p;
3024 float128_unpack_canonical(&p, a, s);
3025 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3026 return float128_round_pack_canonical(&p, s);
3029 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3031 FloatParts128 p;
3033 if (!floatx80_unpack_canonical(&p, a, status)) {
3034 return floatx80_default_nan(status);
3037 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3038 &floatx80_params[status->floatx80_rounding_precision]);
3039 return floatx80_round_pack_canonical(&p, status);
3043 * Floating-point to signed integer conversions
3046 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3047 float_status *s)
3049 FloatParts64 p;
3051 float16_unpack_canonical(&p, a, s);
3052 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3055 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3056 float_status *s)
3058 FloatParts64 p;
3060 float16_unpack_canonical(&p, a, s);
3061 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3064 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3065 float_status *s)
3067 FloatParts64 p;
3069 float16_unpack_canonical(&p, a, s);
3070 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3073 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3074 float_status *s)
3076 FloatParts64 p;
3078 float16_unpack_canonical(&p, a, s);
3079 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3082 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3083 float_status *s)
3085 FloatParts64 p;
3087 float32_unpack_canonical(&p, a, s);
3088 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3091 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3092 float_status *s)
3094 FloatParts64 p;
3096 float32_unpack_canonical(&p, a, s);
3097 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3100 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3101 float_status *s)
3103 FloatParts64 p;
3105 float32_unpack_canonical(&p, a, s);
3106 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3109 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3110 float_status *s)
3112 FloatParts64 p;
3114 float64_unpack_canonical(&p, a, s);
3115 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3118 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3119 float_status *s)
3121 FloatParts64 p;
3123 float64_unpack_canonical(&p, a, s);
3124 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3127 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3128 float_status *s)
3130 FloatParts64 p;
3132 float64_unpack_canonical(&p, a, s);
3133 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3136 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3137 float_status *s)
3139 FloatParts64 p;
3141 bfloat16_unpack_canonical(&p, a, s);
3142 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3145 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3146 float_status *s)
3148 FloatParts64 p;
3150 bfloat16_unpack_canonical(&p, a, s);
3151 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3154 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3155 float_status *s)
3157 FloatParts64 p;
3159 bfloat16_unpack_canonical(&p, a, s);
3160 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3163 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164 float_status *s)
3166 FloatParts64 p;
3168 bfloat16_unpack_canonical(&p, a, s);
3169 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3172 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3173 int scale, float_status *s)
3175 FloatParts128 p;
3177 float128_unpack_canonical(&p, a, s);
3178 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3181 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3182 int scale, float_status *s)
3184 FloatParts128 p;
3186 float128_unpack_canonical(&p, a, s);
3187 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3190 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3191 int scale, float_status *s)
3193 int flags = 0;
3194 Int128 r;
3195 FloatParts128 p;
3197 float128_unpack_canonical(&p, a, s);
3199 switch (p.cls) {
3200 case float_class_snan:
3201 flags |= float_flag_invalid_snan;
3202 /* fall through */
3203 case float_class_qnan:
3204 flags |= float_flag_invalid;
3205 r = UINT128_MAX;
3206 break;
3208 case float_class_inf:
3209 flags = float_flag_invalid | float_flag_invalid_cvti;
3210 r = p.sign ? INT128_MIN : INT128_MAX;
3211 break;
3213 case float_class_zero:
3214 return int128_zero();
3216 case float_class_normal:
3217 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3218 flags = float_flag_inexact;
3221 if (p.exp < 127) {
3222 int shift = 127 - p.exp;
3223 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3224 if (p.sign) {
3225 r = int128_neg(r);
3227 } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3228 p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3229 r = INT128_MIN;
3230 } else {
3231 flags = float_flag_invalid | float_flag_invalid_cvti;
3232 r = p.sign ? INT128_MIN : INT128_MAX;
3234 break;
3236 default:
3237 g_assert_not_reached();
3240 float_raise(flags, s);
3241 return r;
3244 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3245 int scale, float_status *s)
3247 FloatParts128 p;
3249 if (!floatx80_unpack_canonical(&p, a, s)) {
3250 parts_default_nan(&p, s);
3252 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3255 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3256 int scale, float_status *s)
3258 FloatParts128 p;
3260 if (!floatx80_unpack_canonical(&p, a, s)) {
3261 parts_default_nan(&p, s);
3263 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3266 int8_t float16_to_int8(float16 a, float_status *s)
3268 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3271 int16_t float16_to_int16(float16 a, float_status *s)
3273 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3276 int32_t float16_to_int32(float16 a, float_status *s)
3278 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3281 int64_t float16_to_int64(float16 a, float_status *s)
3283 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3286 int16_t float32_to_int16(float32 a, float_status *s)
3288 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3291 int32_t float32_to_int32(float32 a, float_status *s)
3293 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3296 int64_t float32_to_int64(float32 a, float_status *s)
3298 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3301 int16_t float64_to_int16(float64 a, float_status *s)
3303 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3306 int32_t float64_to_int32(float64 a, float_status *s)
3308 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3311 int64_t float64_to_int64(float64 a, float_status *s)
3313 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3316 int32_t float128_to_int32(float128 a, float_status *s)
3318 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3321 int64_t float128_to_int64(float128 a, float_status *s)
3323 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3326 Int128 float128_to_int128(float128 a, float_status *s)
3328 return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3331 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3333 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3336 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3338 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3341 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3343 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3346 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3348 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3351 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3353 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3356 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3358 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3361 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3363 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3366 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3368 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3371 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3373 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3376 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3378 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3381 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3383 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3386 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3388 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3391 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3393 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3396 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3398 return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3401 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3403 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3406 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3408 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3411 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3413 return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3416 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3418 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3421 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3423 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3426 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3428 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3431 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3433 return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3436 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3438 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3441 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3443 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3446 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3448 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3451 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3452 float_status *s)
3454 FloatParts64 p;
3456 float64_unpack_canonical(&p, a, s);
3457 return parts_float_to_sint_modulo(&p, rmode, 31, s);
3460 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3461 float_status *s)
3463 FloatParts64 p;
3465 float64_unpack_canonical(&p, a, s);
3466 return parts_float_to_sint_modulo(&p, rmode, 63, s);
3470 * Floating-point to unsigned integer conversions
3473 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3474 float_status *s)
3476 FloatParts64 p;
3478 float16_unpack_canonical(&p, a, s);
3479 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3482 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3483 float_status *s)
3485 FloatParts64 p;
3487 float16_unpack_canonical(&p, a, s);
3488 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3491 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3492 float_status *s)
3494 FloatParts64 p;
3496 float16_unpack_canonical(&p, a, s);
3497 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3500 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3501 float_status *s)
3503 FloatParts64 p;
3505 float16_unpack_canonical(&p, a, s);
3506 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3509 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3510 float_status *s)
3512 FloatParts64 p;
3514 float32_unpack_canonical(&p, a, s);
3515 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3518 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3519 float_status *s)
3521 FloatParts64 p;
3523 float32_unpack_canonical(&p, a, s);
3524 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3527 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3528 float_status *s)
3530 FloatParts64 p;
3532 float32_unpack_canonical(&p, a, s);
3533 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3536 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3537 float_status *s)
3539 FloatParts64 p;
3541 float64_unpack_canonical(&p, a, s);
3542 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3545 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3546 float_status *s)
3548 FloatParts64 p;
3550 float64_unpack_canonical(&p, a, s);
3551 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3554 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3555 float_status *s)
3557 FloatParts64 p;
3559 float64_unpack_canonical(&p, a, s);
3560 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3563 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3564 int scale, float_status *s)
3566 FloatParts64 p;
3568 bfloat16_unpack_canonical(&p, a, s);
3569 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3572 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3573 int scale, float_status *s)
3575 FloatParts64 p;
3577 bfloat16_unpack_canonical(&p, a, s);
3578 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3581 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3582 int scale, float_status *s)
3584 FloatParts64 p;
3586 bfloat16_unpack_canonical(&p, a, s);
3587 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3590 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3591 int scale, float_status *s)
3593 FloatParts64 p;
3595 bfloat16_unpack_canonical(&p, a, s);
3596 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3599 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3600 int scale, float_status *s)
3602 FloatParts128 p;
3604 float128_unpack_canonical(&p, a, s);
3605 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3608 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3609 int scale, float_status *s)
3611 FloatParts128 p;
3613 float128_unpack_canonical(&p, a, s);
3614 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3617 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3618 int scale, float_status *s)
3620 int flags = 0;
3621 Int128 r;
3622 FloatParts128 p;
3624 float128_unpack_canonical(&p, a, s);
3626 switch (p.cls) {
3627 case float_class_snan:
3628 flags |= float_flag_invalid_snan;
3629 /* fall through */
3630 case float_class_qnan:
3631 flags |= float_flag_invalid;
3632 r = UINT128_MAX;
3633 break;
3635 case float_class_inf:
3636 flags = float_flag_invalid | float_flag_invalid_cvti;
3637 r = p.sign ? int128_zero() : UINT128_MAX;
3638 break;
3640 case float_class_zero:
3641 return int128_zero();
3643 case float_class_normal:
3644 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3645 flags = float_flag_inexact;
3646 if (p.cls == float_class_zero) {
3647 r = int128_zero();
3648 break;
3652 if (p.sign) {
3653 flags = float_flag_invalid | float_flag_invalid_cvti;
3654 r = int128_zero();
3655 } else if (p.exp <= 127) {
3656 int shift = 127 - p.exp;
3657 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3658 } else {
3659 flags = float_flag_invalid | float_flag_invalid_cvti;
3660 r = UINT128_MAX;
3662 break;
3664 default:
3665 g_assert_not_reached();
3668 float_raise(flags, s);
3669 return r;
3672 uint8_t float16_to_uint8(float16 a, float_status *s)
3674 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3677 uint16_t float16_to_uint16(float16 a, float_status *s)
3679 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3682 uint32_t float16_to_uint32(float16 a, float_status *s)
3684 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3687 uint64_t float16_to_uint64(float16 a, float_status *s)
3689 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3692 uint16_t float32_to_uint16(float32 a, float_status *s)
3694 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3697 uint32_t float32_to_uint32(float32 a, float_status *s)
3699 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3702 uint64_t float32_to_uint64(float32 a, float_status *s)
3704 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3707 uint16_t float64_to_uint16(float64 a, float_status *s)
3709 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3712 uint32_t float64_to_uint32(float64 a, float_status *s)
3714 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3717 uint64_t float64_to_uint64(float64 a, float_status *s)
3719 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3722 uint32_t float128_to_uint32(float128 a, float_status *s)
3724 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3727 uint64_t float128_to_uint64(float128 a, float_status *s)
3729 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3732 Int128 float128_to_uint128(float128 a, float_status *s)
3734 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3737 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3739 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3742 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3744 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3747 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3749 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3752 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3754 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3757 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3759 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3762 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3764 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3767 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3769 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3772 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3774 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3777 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3779 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3782 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3784 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3787 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3789 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3792 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3794 return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3797 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3799 return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3802 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3804 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3807 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3809 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3812 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3814 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3817 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3819 return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3822 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3824 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3827 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3829 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3832 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3834 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3838 * Signed integer to floating-point conversions
3841 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3843 FloatParts64 p;
3845 parts_sint_to_float(&p, a, scale, status);
3846 return float16_round_pack_canonical(&p, status);
3849 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3851 return int64_to_float16_scalbn(a, scale, status);
3854 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3856 return int64_to_float16_scalbn(a, scale, status);
3859 float16 int64_to_float16(int64_t a, float_status *status)
3861 return int64_to_float16_scalbn(a, 0, status);
3864 float16 int32_to_float16(int32_t a, float_status *status)
3866 return int64_to_float16_scalbn(a, 0, status);
3869 float16 int16_to_float16(int16_t a, float_status *status)
3871 return int64_to_float16_scalbn(a, 0, status);
3874 float16 int8_to_float16(int8_t a, float_status *status)
3876 return int64_to_float16_scalbn(a, 0, status);
3879 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3881 FloatParts64 p;
3883 /* Without scaling, there are no overflow concerns. */
3884 if (likely(scale == 0) && can_use_fpu(status)) {
3885 union_float32 ur;
3886 ur.h = a;
3887 return ur.s;
3890 parts64_sint_to_float(&p, a, scale, status);
3891 return float32_round_pack_canonical(&p, status);
3894 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3896 return int64_to_float32_scalbn(a, scale, status);
3899 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3901 return int64_to_float32_scalbn(a, scale, status);
3904 float32 int64_to_float32(int64_t a, float_status *status)
3906 return int64_to_float32_scalbn(a, 0, status);
3909 float32 int32_to_float32(int32_t a, float_status *status)
3911 return int64_to_float32_scalbn(a, 0, status);
3914 float32 int16_to_float32(int16_t a, float_status *status)
3916 return int64_to_float32_scalbn(a, 0, status);
3919 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3921 FloatParts64 p;
3923 /* Without scaling, there are no overflow concerns. */
3924 if (likely(scale == 0) && can_use_fpu(status)) {
3925 union_float64 ur;
3926 ur.h = a;
3927 return ur.s;
3930 parts_sint_to_float(&p, a, scale, status);
3931 return float64_round_pack_canonical(&p, status);
3934 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3936 return int64_to_float64_scalbn(a, scale, status);
3939 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3941 return int64_to_float64_scalbn(a, scale, status);
3944 float64 int64_to_float64(int64_t a, float_status *status)
3946 return int64_to_float64_scalbn(a, 0, status);
3949 float64 int32_to_float64(int32_t a, float_status *status)
3951 return int64_to_float64_scalbn(a, 0, status);
3954 float64 int16_to_float64(int16_t a, float_status *status)
3956 return int64_to_float64_scalbn(a, 0, status);
3959 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3961 FloatParts64 p;
3963 parts_sint_to_float(&p, a, scale, status);
3964 return bfloat16_round_pack_canonical(&p, status);
3967 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3969 return int64_to_bfloat16_scalbn(a, scale, status);
3972 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3974 return int64_to_bfloat16_scalbn(a, scale, status);
3977 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3979 return int64_to_bfloat16_scalbn(a, scale, status);
3982 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3984 return int64_to_bfloat16_scalbn(a, 0, status);
3987 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3989 return int64_to_bfloat16_scalbn(a, 0, status);
3992 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3994 return int64_to_bfloat16_scalbn(a, 0, status);
3997 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3999 return int64_to_bfloat16_scalbn(a, 0, status);
4002 float128 int128_to_float128(Int128 a, float_status *status)
4004 FloatParts128 p = { };
4005 int shift;
4007 if (int128_nz(a)) {
4008 p.cls = float_class_normal;
4009 if (!int128_nonneg(a)) {
4010 p.sign = true;
4011 a = int128_neg(a);
4014 shift = clz64(int128_gethi(a));
4015 if (shift == 64) {
4016 shift += clz64(int128_getlo(a));
4019 p.exp = 127 - shift;
4020 a = int128_lshift(a, shift);
4022 p.frac_hi = int128_gethi(a);
4023 p.frac_lo = int128_getlo(a);
4024 } else {
4025 p.cls = float_class_zero;
4028 return float128_round_pack_canonical(&p, status);
4031 float128 int64_to_float128(int64_t a, float_status *status)
4033 FloatParts128 p;
4035 parts_sint_to_float(&p, a, 0, status);
4036 return float128_round_pack_canonical(&p, status);
4039 float128 int32_to_float128(int32_t a, float_status *status)
4041 return int64_to_float128(a, status);
4044 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4046 FloatParts128 p;
4048 parts_sint_to_float(&p, a, 0, status);
4049 return floatx80_round_pack_canonical(&p, status);
4052 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4054 return int64_to_floatx80(a, status);
4058 * Unsigned Integer to floating-point conversions
4061 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4063 FloatParts64 p;
4065 parts_uint_to_float(&p, a, scale, status);
4066 return float16_round_pack_canonical(&p, status);
4069 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4071 return uint64_to_float16_scalbn(a, scale, status);
4074 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4076 return uint64_to_float16_scalbn(a, scale, status);
4079 float16 uint64_to_float16(uint64_t a, float_status *status)
4081 return uint64_to_float16_scalbn(a, 0, status);
4084 float16 uint32_to_float16(uint32_t a, float_status *status)
4086 return uint64_to_float16_scalbn(a, 0, status);
4089 float16 uint16_to_float16(uint16_t a, float_status *status)
4091 return uint64_to_float16_scalbn(a, 0, status);
4094 float16 uint8_to_float16(uint8_t a, float_status *status)
4096 return uint64_to_float16_scalbn(a, 0, status);
4099 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4101 FloatParts64 p;
4103 /* Without scaling, there are no overflow concerns. */
4104 if (likely(scale == 0) && can_use_fpu(status)) {
4105 union_float32 ur;
4106 ur.h = a;
4107 return ur.s;
4110 parts_uint_to_float(&p, a, scale, status);
4111 return float32_round_pack_canonical(&p, status);
4114 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4116 return uint64_to_float32_scalbn(a, scale, status);
4119 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4121 return uint64_to_float32_scalbn(a, scale, status);
4124 float32 uint64_to_float32(uint64_t a, float_status *status)
4126 return uint64_to_float32_scalbn(a, 0, status);
4129 float32 uint32_to_float32(uint32_t a, float_status *status)
4131 return uint64_to_float32_scalbn(a, 0, status);
4134 float32 uint16_to_float32(uint16_t a, float_status *status)
4136 return uint64_to_float32_scalbn(a, 0, status);
4139 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4141 FloatParts64 p;
4143 /* Without scaling, there are no overflow concerns. */
4144 if (likely(scale == 0) && can_use_fpu(status)) {
4145 union_float64 ur;
4146 ur.h = a;
4147 return ur.s;
4150 parts_uint_to_float(&p, a, scale, status);
4151 return float64_round_pack_canonical(&p, status);
4154 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4156 return uint64_to_float64_scalbn(a, scale, status);
4159 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4161 return uint64_to_float64_scalbn(a, scale, status);
4164 float64 uint64_to_float64(uint64_t a, float_status *status)
4166 return uint64_to_float64_scalbn(a, 0, status);
4169 float64 uint32_to_float64(uint32_t a, float_status *status)
4171 return uint64_to_float64_scalbn(a, 0, status);
4174 float64 uint16_to_float64(uint16_t a, float_status *status)
4176 return uint64_to_float64_scalbn(a, 0, status);
4179 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4181 FloatParts64 p;
4183 parts_uint_to_float(&p, a, scale, status);
4184 return bfloat16_round_pack_canonical(&p, status);
4187 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4189 return uint64_to_bfloat16_scalbn(a, scale, status);
4192 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4194 return uint64_to_bfloat16_scalbn(a, scale, status);
4197 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4199 return uint64_to_bfloat16_scalbn(a, scale, status);
4202 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4204 return uint64_to_bfloat16_scalbn(a, 0, status);
4207 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4209 return uint64_to_bfloat16_scalbn(a, 0, status);
4212 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4214 return uint64_to_bfloat16_scalbn(a, 0, status);
4217 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4219 return uint64_to_bfloat16_scalbn(a, 0, status);
4222 float128 uint64_to_float128(uint64_t a, float_status *status)
4224 FloatParts128 p;
4226 parts_uint_to_float(&p, a, 0, status);
4227 return float128_round_pack_canonical(&p, status);
4230 float128 uint128_to_float128(Int128 a, float_status *status)
4232 FloatParts128 p = { };
4233 int shift;
4235 if (int128_nz(a)) {
4236 p.cls = float_class_normal;
4238 shift = clz64(int128_gethi(a));
4239 if (shift == 64) {
4240 shift += clz64(int128_getlo(a));
4243 p.exp = 127 - shift;
4244 a = int128_lshift(a, shift);
4246 p.frac_hi = int128_gethi(a);
4247 p.frac_lo = int128_getlo(a);
4248 } else {
4249 p.cls = float_class_zero;
4252 return float128_round_pack_canonical(&p, status);
4256 * Minimum and maximum
4259 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4261 FloatParts64 pa, pb, *pr;
4263 float16_unpack_canonical(&pa, a, s);
4264 float16_unpack_canonical(&pb, b, s);
4265 pr = parts_minmax(&pa, &pb, s, flags);
4267 return float16_round_pack_canonical(pr, s);
4270 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4271 float_status *s, int flags)
4273 FloatParts64 pa, pb, *pr;
4275 bfloat16_unpack_canonical(&pa, a, s);
4276 bfloat16_unpack_canonical(&pb, b, s);
4277 pr = parts_minmax(&pa, &pb, s, flags);
4279 return bfloat16_round_pack_canonical(pr, s);
4282 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4284 FloatParts64 pa, pb, *pr;
4286 float32_unpack_canonical(&pa, a, s);
4287 float32_unpack_canonical(&pb, b, s);
4288 pr = parts_minmax(&pa, &pb, s, flags);
4290 return float32_round_pack_canonical(pr, s);
4293 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4295 FloatParts64 pa, pb, *pr;
4297 float64_unpack_canonical(&pa, a, s);
4298 float64_unpack_canonical(&pb, b, s);
4299 pr = parts_minmax(&pa, &pb, s, flags);
4301 return float64_round_pack_canonical(pr, s);
4304 static float128 float128_minmax(float128 a, float128 b,
4305 float_status *s, int flags)
4307 FloatParts128 pa, pb, *pr;
4309 float128_unpack_canonical(&pa, a, s);
4310 float128_unpack_canonical(&pb, b, s);
4311 pr = parts_minmax(&pa, &pb, s, flags);
4313 return float128_round_pack_canonical(pr, s);
4316 #define MINMAX_1(type, name, flags) \
4317 type type##_##name(type a, type b, float_status *s) \
4318 { return type##_minmax(a, b, s, flags); }
4320 #define MINMAX_2(type) \
4321 MINMAX_1(type, max, 0) \
4322 MINMAX_1(type, maxnum, minmax_isnum) \
4323 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4324 MINMAX_1(type, maximum_number, minmax_isnumber) \
4325 MINMAX_1(type, min, minmax_ismin) \
4326 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4327 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4328 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
4330 MINMAX_2(float16)
4331 MINMAX_2(bfloat16)
4332 MINMAX_2(float32)
4333 MINMAX_2(float64)
4334 MINMAX_2(float128)
4336 #undef MINMAX_1
4337 #undef MINMAX_2
4340 * Floating point compare
4343 static FloatRelation QEMU_FLATTEN
4344 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4346 FloatParts64 pa, pb;
4348 float16_unpack_canonical(&pa, a, s);
4349 float16_unpack_canonical(&pb, b, s);
4350 return parts_compare(&pa, &pb, s, is_quiet);
4353 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4355 return float16_do_compare(a, b, s, false);
4358 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4360 return float16_do_compare(a, b, s, true);
4363 static FloatRelation QEMU_SOFTFLOAT_ATTR
4364 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4366 FloatParts64 pa, pb;
4368 float32_unpack_canonical(&pa, a, s);
4369 float32_unpack_canonical(&pb, b, s);
4370 return parts_compare(&pa, &pb, s, is_quiet);
4373 static FloatRelation QEMU_FLATTEN
4374 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4376 union_float32 ua, ub;
4378 ua.s = xa;
4379 ub.s = xb;
4381 if (QEMU_NO_HARDFLOAT) {
4382 goto soft;
4385 float32_input_flush2(&ua.s, &ub.s, s);
4386 if (isgreaterequal(ua.h, ub.h)) {
4387 if (isgreater(ua.h, ub.h)) {
4388 return float_relation_greater;
4390 return float_relation_equal;
4392 if (likely(isless(ua.h, ub.h))) {
4393 return float_relation_less;
4396 * The only condition remaining is unordered.
4397 * Fall through to set flags.
4399 soft:
4400 return float32_do_compare(ua.s, ub.s, s, is_quiet);
4403 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4405 return float32_hs_compare(a, b, s, false);
4408 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4410 return float32_hs_compare(a, b, s, true);
4413 static FloatRelation QEMU_SOFTFLOAT_ATTR
4414 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4416 FloatParts64 pa, pb;
4418 float64_unpack_canonical(&pa, a, s);
4419 float64_unpack_canonical(&pb, b, s);
4420 return parts_compare(&pa, &pb, s, is_quiet);
4423 static FloatRelation QEMU_FLATTEN
4424 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4426 union_float64 ua, ub;
4428 ua.s = xa;
4429 ub.s = xb;
4431 if (QEMU_NO_HARDFLOAT) {
4432 goto soft;
4435 float64_input_flush2(&ua.s, &ub.s, s);
4436 if (isgreaterequal(ua.h, ub.h)) {
4437 if (isgreater(ua.h, ub.h)) {
4438 return float_relation_greater;
4440 return float_relation_equal;
4442 if (likely(isless(ua.h, ub.h))) {
4443 return float_relation_less;
4446 * The only condition remaining is unordered.
4447 * Fall through to set flags.
4449 soft:
4450 return float64_do_compare(ua.s, ub.s, s, is_quiet);
4453 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4455 return float64_hs_compare(a, b, s, false);
4458 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4460 return float64_hs_compare(a, b, s, true);
4463 static FloatRelation QEMU_FLATTEN
4464 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4466 FloatParts64 pa, pb;
4468 bfloat16_unpack_canonical(&pa, a, s);
4469 bfloat16_unpack_canonical(&pb, b, s);
4470 return parts_compare(&pa, &pb, s, is_quiet);
4473 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4475 return bfloat16_do_compare(a, b, s, false);
4478 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4480 return bfloat16_do_compare(a, b, s, true);
4483 static FloatRelation QEMU_FLATTEN
4484 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4486 FloatParts128 pa, pb;
4488 float128_unpack_canonical(&pa, a, s);
4489 float128_unpack_canonical(&pb, b, s);
4490 return parts_compare(&pa, &pb, s, is_quiet);
4493 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4495 return float128_do_compare(a, b, s, false);
4498 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4500 return float128_do_compare(a, b, s, true);
4503 static FloatRelation QEMU_FLATTEN
4504 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4506 FloatParts128 pa, pb;
4508 if (!floatx80_unpack_canonical(&pa, a, s) ||
4509 !floatx80_unpack_canonical(&pb, b, s)) {
4510 return float_relation_unordered;
4512 return parts_compare(&pa, &pb, s, is_quiet);
4515 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4517 return floatx80_do_compare(a, b, s, false);
4520 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4522 return floatx80_do_compare(a, b, s, true);
4526 * Scale by 2**N
4529 float16 float16_scalbn(float16 a, int n, float_status *status)
4531 FloatParts64 p;
4533 float16_unpack_canonical(&p, a, status);
4534 parts_scalbn(&p, n, status);
4535 return float16_round_pack_canonical(&p, status);
4538 float32 float32_scalbn(float32 a, int n, float_status *status)
4540 FloatParts64 p;
4542 float32_unpack_canonical(&p, a, status);
4543 parts_scalbn(&p, n, status);
4544 return float32_round_pack_canonical(&p, status);
4547 float64 float64_scalbn(float64 a, int n, float_status *status)
4549 FloatParts64 p;
4551 float64_unpack_canonical(&p, a, status);
4552 parts_scalbn(&p, n, status);
4553 return float64_round_pack_canonical(&p, status);
4556 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4558 FloatParts64 p;
4560 bfloat16_unpack_canonical(&p, a, status);
4561 parts_scalbn(&p, n, status);
4562 return bfloat16_round_pack_canonical(&p, status);
4565 float128 float128_scalbn(float128 a, int n, float_status *status)
4567 FloatParts128 p;
4569 float128_unpack_canonical(&p, a, status);
4570 parts_scalbn(&p, n, status);
4571 return float128_round_pack_canonical(&p, status);
4574 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4576 FloatParts128 p;
4578 if (!floatx80_unpack_canonical(&p, a, status)) {
4579 return floatx80_default_nan(status);
4581 parts_scalbn(&p, n, status);
4582 return floatx80_round_pack_canonical(&p, status);
4586 * Square Root
4589 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4591 FloatParts64 p;
4593 float16_unpack_canonical(&p, a, status);
4594 parts_sqrt(&p, status, &float16_params);
4595 return float16_round_pack_canonical(&p, status);
4598 static float32 QEMU_SOFTFLOAT_ATTR
4599 soft_f32_sqrt(float32 a, float_status *status)
4601 FloatParts64 p;
4603 float32_unpack_canonical(&p, a, status);
4604 parts_sqrt(&p, status, &float32_params);
4605 return float32_round_pack_canonical(&p, status);
4608 static float64 QEMU_SOFTFLOAT_ATTR
4609 soft_f64_sqrt(float64 a, float_status *status)
4611 FloatParts64 p;
4613 float64_unpack_canonical(&p, a, status);
4614 parts_sqrt(&p, status, &float64_params);
4615 return float64_round_pack_canonical(&p, status);
4618 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4620 union_float32 ua, ur;
4622 ua.s = xa;
4623 if (unlikely(!can_use_fpu(s))) {
4624 goto soft;
4627 float32_input_flush1(&ua.s, s);
4628 if (QEMU_HARDFLOAT_1F32_USE_FP) {
4629 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4630 fpclassify(ua.h) == FP_ZERO) ||
4631 signbit(ua.h))) {
4632 goto soft;
4634 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4635 float32_is_neg(ua.s))) {
4636 goto soft;
4638 ur.h = sqrtf(ua.h);
4639 return ur.s;
4641 soft:
4642 return soft_f32_sqrt(ua.s, s);
4645 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4647 union_float64 ua, ur;
4649 ua.s = xa;
4650 if (unlikely(!can_use_fpu(s))) {
4651 goto soft;
4654 float64_input_flush1(&ua.s, s);
4655 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4656 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4657 fpclassify(ua.h) == FP_ZERO) ||
4658 signbit(ua.h))) {
4659 goto soft;
4661 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4662 float64_is_neg(ua.s))) {
4663 goto soft;
4665 ur.h = sqrt(ua.h);
4666 return ur.s;
4668 soft:
4669 return soft_f64_sqrt(ua.s, s);
4672 float64 float64r32_sqrt(float64 a, float_status *status)
4674 FloatParts64 p;
4676 float64_unpack_canonical(&p, a, status);
4677 parts_sqrt(&p, status, &float64_params);
4678 return float64r32_round_pack_canonical(&p, status);
4681 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4683 FloatParts64 p;
4685 bfloat16_unpack_canonical(&p, a, status);
4686 parts_sqrt(&p, status, &bfloat16_params);
4687 return bfloat16_round_pack_canonical(&p, status);
4690 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4692 FloatParts128 p;
4694 float128_unpack_canonical(&p, a, status);
4695 parts_sqrt(&p, status, &float128_params);
4696 return float128_round_pack_canonical(&p, status);
4699 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4701 FloatParts128 p;
4703 if (!floatx80_unpack_canonical(&p, a, s)) {
4704 return floatx80_default_nan(s);
4706 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4707 return floatx80_round_pack_canonical(&p, s);
4711 * log2
4713 float32 float32_log2(float32 a, float_status *status)
4715 FloatParts64 p;
4717 float32_unpack_canonical(&p, a, status);
4718 parts_log2(&p, status, &float32_params);
4719 return float32_round_pack_canonical(&p, status);
4722 float64 float64_log2(float64 a, float_status *status)
4724 FloatParts64 p;
4726 float64_unpack_canonical(&p, a, status);
4727 parts_log2(&p, status, &float64_params);
4728 return float64_round_pack_canonical(&p, status);
4731 /*----------------------------------------------------------------------------
4732 | The pattern for a default generated NaN.
4733 *----------------------------------------------------------------------------*/
4735 float16 float16_default_nan(float_status *status)
4737 FloatParts64 p;
4739 parts_default_nan(&p, status);
4740 p.frac >>= float16_params.frac_shift;
4741 return float16_pack_raw(&p);
4744 float32 float32_default_nan(float_status *status)
4746 FloatParts64 p;
4748 parts_default_nan(&p, status);
4749 p.frac >>= float32_params.frac_shift;
4750 return float32_pack_raw(&p);
4753 float64 float64_default_nan(float_status *status)
4755 FloatParts64 p;
4757 parts_default_nan(&p, status);
4758 p.frac >>= float64_params.frac_shift;
4759 return float64_pack_raw(&p);
4762 float128 float128_default_nan(float_status *status)
4764 FloatParts128 p;
4766 parts_default_nan(&p, status);
4767 frac_shr(&p, float128_params.frac_shift);
4768 return float128_pack_raw(&p);
4771 bfloat16 bfloat16_default_nan(float_status *status)
4773 FloatParts64 p;
4775 parts_default_nan(&p, status);
4776 p.frac >>= bfloat16_params.frac_shift;
4777 return bfloat16_pack_raw(&p);
4780 /*----------------------------------------------------------------------------
4781 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4782 *----------------------------------------------------------------------------*/
4784 float16 float16_silence_nan(float16 a, float_status *status)
4786 FloatParts64 p;
4788 float16_unpack_raw(&p, a);
4789 p.frac <<= float16_params.frac_shift;
4790 parts_silence_nan(&p, status);
4791 p.frac >>= float16_params.frac_shift;
4792 return float16_pack_raw(&p);
4795 float32 float32_silence_nan(float32 a, float_status *status)
4797 FloatParts64 p;
4799 float32_unpack_raw(&p, a);
4800 p.frac <<= float32_params.frac_shift;
4801 parts_silence_nan(&p, status);
4802 p.frac >>= float32_params.frac_shift;
4803 return float32_pack_raw(&p);
4806 float64 float64_silence_nan(float64 a, float_status *status)
4808 FloatParts64 p;
4810 float64_unpack_raw(&p, a);
4811 p.frac <<= float64_params.frac_shift;
4812 parts_silence_nan(&p, status);
4813 p.frac >>= float64_params.frac_shift;
4814 return float64_pack_raw(&p);
4817 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4819 FloatParts64 p;
4821 bfloat16_unpack_raw(&p, a);
4822 p.frac <<= bfloat16_params.frac_shift;
4823 parts_silence_nan(&p, status);
4824 p.frac >>= bfloat16_params.frac_shift;
4825 return bfloat16_pack_raw(&p);
4828 float128 float128_silence_nan(float128 a, float_status *status)
4830 FloatParts128 p;
4832 float128_unpack_raw(&p, a);
4833 frac_shl(&p, float128_params.frac_shift);
4834 parts_silence_nan(&p, status);
4835 frac_shr(&p, float128_params.frac_shift);
4836 return float128_pack_raw(&p);
4839 /*----------------------------------------------------------------------------
4840 | If `a' is denormal and we are in flush-to-zero mode then set the
4841 | input-denormal exception and return zero. Otherwise just return the value.
4842 *----------------------------------------------------------------------------*/
4844 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4846 if (p.exp == 0 && p.frac != 0) {
4847 float_raise(float_flag_input_denormal, status);
4848 return true;
4851 return false;
4854 float16 float16_squash_input_denormal(float16 a, float_status *status)
4856 if (status->flush_inputs_to_zero) {
4857 FloatParts64 p;
4859 float16_unpack_raw(&p, a);
4860 if (parts_squash_denormal(p, status)) {
4861 return float16_set_sign(float16_zero, p.sign);
4864 return a;
4867 float32 float32_squash_input_denormal(float32 a, float_status *status)
4869 if (status->flush_inputs_to_zero) {
4870 FloatParts64 p;
4872 float32_unpack_raw(&p, a);
4873 if (parts_squash_denormal(p, status)) {
4874 return float32_set_sign(float32_zero, p.sign);
4877 return a;
4880 float64 float64_squash_input_denormal(float64 a, float_status *status)
4882 if (status->flush_inputs_to_zero) {
4883 FloatParts64 p;
4885 float64_unpack_raw(&p, a);
4886 if (parts_squash_denormal(p, status)) {
4887 return float64_set_sign(float64_zero, p.sign);
4890 return a;
4893 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4895 if (status->flush_inputs_to_zero) {
4896 FloatParts64 p;
4898 bfloat16_unpack_raw(&p, a);
4899 if (parts_squash_denormal(p, status)) {
4900 return bfloat16_set_sign(bfloat16_zero, p.sign);
4903 return a;
4906 /*----------------------------------------------------------------------------
4907 | Normalizes the subnormal extended double-precision floating-point value
4908 | represented by the denormalized significand `aSig'. The normalized exponent
4909 | and significand are stored at the locations pointed to by `zExpPtr' and
4910 | `zSigPtr', respectively.
4911 *----------------------------------------------------------------------------*/
4913 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4914 uint64_t *zSigPtr)
4916 int8_t shiftCount;
4918 shiftCount = clz64(aSig);
4919 *zSigPtr = aSig<<shiftCount;
4920 *zExpPtr = 1 - shiftCount;
4923 /*----------------------------------------------------------------------------
4924 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4925 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4926 | and returns the proper extended double-precision floating-point value
4927 | corresponding to the abstract input. Ordinarily, the abstract value is
4928 | rounded and packed into the extended double-precision format, with the
4929 | inexact exception raised if the abstract input cannot be represented
4930 | exactly. However, if the abstract value is too large, the overflow and
4931 | inexact exceptions are raised and an infinity or maximal finite value is
4932 | returned. If the abstract value is too small, the input value is rounded to
4933 | a subnormal number, and the underflow and inexact exceptions are raised if
4934 | the abstract input cannot be represented exactly as a subnormal extended
4935 | double-precision floating-point number.
4936 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4937 | the result is rounded to the same number of bits as single or double
4938 | precision, respectively. Otherwise, the result is rounded to the full
4939 | precision of the extended double-precision format.
4940 | The input significand must be normalized or smaller. If the input
4941 | significand is not normalized, `zExp' must be 0; in that case, the result
4942 | returned is a subnormal number, and it must not require rounding. The
4943 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4944 | Floating-Point Arithmetic.
4945 *----------------------------------------------------------------------------*/
4947 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4948 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4949 float_status *status)
4951 FloatRoundMode roundingMode;
4952 bool roundNearestEven, increment, isTiny;
4953 int64_t roundIncrement, roundMask, roundBits;
4955 roundingMode = status->float_rounding_mode;
4956 roundNearestEven = ( roundingMode == float_round_nearest_even );
4957 switch (roundingPrecision) {
4958 case floatx80_precision_x:
4959 goto precision80;
4960 case floatx80_precision_d:
4961 roundIncrement = UINT64_C(0x0000000000000400);
4962 roundMask = UINT64_C(0x00000000000007FF);
4963 break;
4964 case floatx80_precision_s:
4965 roundIncrement = UINT64_C(0x0000008000000000);
4966 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4967 break;
4968 default:
4969 g_assert_not_reached();
4971 zSig0 |= ( zSig1 != 0 );
4972 switch (roundingMode) {
4973 case float_round_nearest_even:
4974 case float_round_ties_away:
4975 break;
4976 case float_round_to_zero:
4977 roundIncrement = 0;
4978 break;
4979 case float_round_up:
4980 roundIncrement = zSign ? 0 : roundMask;
4981 break;
4982 case float_round_down:
4983 roundIncrement = zSign ? roundMask : 0;
4984 break;
4985 default:
4986 abort();
4988 roundBits = zSig0 & roundMask;
4989 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4990 if ( ( 0x7FFE < zExp )
4991 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4993 goto overflow;
4995 if ( zExp <= 0 ) {
4996 if (status->flush_to_zero) {
4997 float_raise(float_flag_output_denormal, status);
4998 return packFloatx80(zSign, 0, 0);
5000 isTiny = status->tininess_before_rounding
5001 || (zExp < 0 )
5002 || (zSig0 <= zSig0 + roundIncrement);
5003 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5004 zExp = 0;
5005 roundBits = zSig0 & roundMask;
5006 if (isTiny && roundBits) {
5007 float_raise(float_flag_underflow, status);
5009 if (roundBits) {
5010 float_raise(float_flag_inexact, status);
5012 zSig0 += roundIncrement;
5013 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5014 roundIncrement = roundMask + 1;
5015 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5016 roundMask |= roundIncrement;
5018 zSig0 &= ~ roundMask;
5019 return packFloatx80( zSign, zExp, zSig0 );
5022 if (roundBits) {
5023 float_raise(float_flag_inexact, status);
5025 zSig0 += roundIncrement;
5026 if ( zSig0 < roundIncrement ) {
5027 ++zExp;
5028 zSig0 = UINT64_C(0x8000000000000000);
5030 roundIncrement = roundMask + 1;
5031 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5032 roundMask |= roundIncrement;
5034 zSig0 &= ~ roundMask;
5035 if ( zSig0 == 0 ) zExp = 0;
5036 return packFloatx80( zSign, zExp, zSig0 );
5037 precision80:
5038 switch (roundingMode) {
5039 case float_round_nearest_even:
5040 case float_round_ties_away:
5041 increment = ((int64_t)zSig1 < 0);
5042 break;
5043 case float_round_to_zero:
5044 increment = 0;
5045 break;
5046 case float_round_up:
5047 increment = !zSign && zSig1;
5048 break;
5049 case float_round_down:
5050 increment = zSign && zSig1;
5051 break;
5052 default:
5053 abort();
5055 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5056 if ( ( 0x7FFE < zExp )
5057 || ( ( zExp == 0x7FFE )
5058 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5059 && increment
5062 roundMask = 0;
5063 overflow:
5064 float_raise(float_flag_overflow | float_flag_inexact, status);
5065 if ( ( roundingMode == float_round_to_zero )
5066 || ( zSign && ( roundingMode == float_round_up ) )
5067 || ( ! zSign && ( roundingMode == float_round_down ) )
5069 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5071 return packFloatx80(zSign,
5072 floatx80_infinity_high,
5073 floatx80_infinity_low);
5075 if ( zExp <= 0 ) {
5076 isTiny = status->tininess_before_rounding
5077 || (zExp < 0)
5078 || !increment
5079 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5080 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5081 zExp = 0;
5082 if (isTiny && zSig1) {
5083 float_raise(float_flag_underflow, status);
5085 if (zSig1) {
5086 float_raise(float_flag_inexact, status);
5088 switch (roundingMode) {
5089 case float_round_nearest_even:
5090 case float_round_ties_away:
5091 increment = ((int64_t)zSig1 < 0);
5092 break;
5093 case float_round_to_zero:
5094 increment = 0;
5095 break;
5096 case float_round_up:
5097 increment = !zSign && zSig1;
5098 break;
5099 case float_round_down:
5100 increment = zSign && zSig1;
5101 break;
5102 default:
5103 abort();
5105 if ( increment ) {
5106 ++zSig0;
5107 if (!(zSig1 << 1) && roundNearestEven) {
5108 zSig0 &= ~1;
5110 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5112 return packFloatx80( zSign, zExp, zSig0 );
5115 if (zSig1) {
5116 float_raise(float_flag_inexact, status);
5118 if ( increment ) {
5119 ++zSig0;
5120 if ( zSig0 == 0 ) {
5121 ++zExp;
5122 zSig0 = UINT64_C(0x8000000000000000);
5124 else {
5125 if (!(zSig1 << 1) && roundNearestEven) {
5126 zSig0 &= ~1;
5130 else {
5131 if ( zSig0 == 0 ) zExp = 0;
5133 return packFloatx80( zSign, zExp, zSig0 );
5137 /*----------------------------------------------------------------------------
5138 | Takes an abstract floating-point value having sign `zSign', exponent
5139 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5140 | and returns the proper extended double-precision floating-point value
5141 | corresponding to the abstract input. This routine is just like
5142 | `roundAndPackFloatx80' except that the input significand does not have to be
5143 | normalized.
5144 *----------------------------------------------------------------------------*/
5146 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5147 bool zSign, int32_t zExp,
5148 uint64_t zSig0, uint64_t zSig1,
5149 float_status *status)
5151 int8_t shiftCount;
5153 if ( zSig0 == 0 ) {
5154 zSig0 = zSig1;
5155 zSig1 = 0;
5156 zExp -= 64;
5158 shiftCount = clz64(zSig0);
5159 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5160 zExp -= shiftCount;
5161 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5162 zSig0, zSig1, status);
5166 /*----------------------------------------------------------------------------
5167 | Returns the binary exponential of the single-precision floating-point value
5168 | `a'. The operation is performed according to the IEC/IEEE Standard for
5169 | Binary Floating-Point Arithmetic.
5171 | Uses the following identities:
5173 | 1. -------------------------------------------------------------------------
5174 | x x*ln(2)
5175 | 2 = e
5177 | 2. -------------------------------------------------------------------------
5178 | 2 3 4 5 n
5179 | x x x x x x x
5180 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5181 | 1! 2! 3! 4! 5! n!
5182 *----------------------------------------------------------------------------*/
5184 static const float64 float32_exp2_coefficients[15] =
5186 const_float64( 0x3ff0000000000000ll ), /* 1 */
5187 const_float64( 0x3fe0000000000000ll ), /* 2 */
5188 const_float64( 0x3fc5555555555555ll ), /* 3 */
5189 const_float64( 0x3fa5555555555555ll ), /* 4 */
5190 const_float64( 0x3f81111111111111ll ), /* 5 */
5191 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5192 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5193 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5194 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5195 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5196 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5197 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5198 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5199 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5200 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5203 float32 float32_exp2(float32 a, float_status *status)
5205 FloatParts64 xp, xnp, tp, rp;
5206 int i;
5208 float32_unpack_canonical(&xp, a, status);
5209 if (unlikely(xp.cls != float_class_normal)) {
5210 switch (xp.cls) {
5211 case float_class_snan:
5212 case float_class_qnan:
5213 parts_return_nan(&xp, status);
5214 return float32_round_pack_canonical(&xp, status);
5215 case float_class_inf:
5216 return xp.sign ? float32_zero : a;
5217 case float_class_zero:
5218 return float32_one;
5219 default:
5220 break;
5222 g_assert_not_reached();
5225 float_raise(float_flag_inexact, status);
5227 float64_unpack_canonical(&tp, float64_ln2, status);
5228 xp = *parts_mul(&xp, &tp, status);
5229 xnp = xp;
5231 float64_unpack_canonical(&rp, float64_one, status);
5232 for (i = 0 ; i < 15 ; i++) {
5233 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5234 rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5235 xnp = *parts_mul(&xnp, &xp, status);
5238 return float32_round_pack_canonical(&rp, status);
5241 /*----------------------------------------------------------------------------
5242 | Rounds the extended double-precision floating-point value `a'
5243 | to the precision provided by floatx80_rounding_precision and returns the
5244 | result as an extended double-precision floating-point value.
5245 | The operation is performed according to the IEC/IEEE Standard for Binary
5246 | Floating-Point Arithmetic.
5247 *----------------------------------------------------------------------------*/
5249 floatx80 floatx80_round(floatx80 a, float_status *status)
5251 FloatParts128 p;
5253 if (!floatx80_unpack_canonical(&p, a, status)) {
5254 return floatx80_default_nan(status);
5256 return floatx80_round_pack_canonical(&p, status);
5259 static void __attribute__((constructor)) softfloat_init(void)
5261 union_float64 ua, ub, uc, ur;
5263 if (QEMU_NO_HARDFLOAT) {
5264 return;
5267 * Test that the host's FMA is not obviously broken. For example,
5268 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5269 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5271 ua.s = 0x0020000000000001ULL;
5272 ub.s = 0x3ca0000000000000ULL;
5273 uc.s = 0x0020000000000000ULL;
5274 ur.h = fma(ua.h, ub.h, uc.h);
5275 if (ur.s != 0x0020000000000001ULL) {
5276 force_soft_fma = true;