1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* Various predicates and operations on IEEE-754 floating point types. */
9 #ifndef mozilla_FloatingPoint_h
10 #define mozilla_FloatingPoint_h
12 #include "mozilla/Assertions.h"
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Casting.h"
15 #include "mozilla/MathAlgorithms.h"
16 #include "mozilla/MemoryChecking.h"
17 #include "mozilla/Types.h"
18 #include "mozilla/TypeTraits.h"
26 * It's reasonable to ask why we have this header at all. Don't isnan,
27 * copysign, the built-in comparison operators, and the like solve these
28 * problems? Unfortunately, they don't. We've found that various compilers
29 * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
30 * the standard methods in various situations, so we can't use them. Some of
31 * these compilers even have problems compiling seemingly reasonable bitwise
32 * algorithms! But with some care we've found algorithms that seem to not
33 * trigger those compiler bugs.
35 * For the aforementioned reasons, be very wary of making changes to any of
36 * these algorithms. If you must make changes, keep a careful eye out for
37 * compiler bustage, particularly PGO-specific bustage.
43 * These implementations assume float/double are 32/64-bit single/double
44 * format number types compatible with the IEEE-754 standard. C++ doesn't
45 * require this, but we required it in implementations of these algorithms that
46 * preceded this header, so we shouldn't break anything to continue doing so.
49 struct FloatingPointTrait
;
52 struct FloatingPointTrait
<float> {
54 using Bits
= uint32_t;
56 static constexpr unsigned kExponentWidth
= 8;
57 static constexpr unsigned kSignificandWidth
= 23;
61 struct FloatingPointTrait
<double> {
63 using Bits
= uint64_t;
65 static constexpr unsigned kExponentWidth
= 11;
66 static constexpr unsigned kSignificandWidth
= 52;
72 * This struct contains details regarding the encoding of floating-point
73 * numbers that can be useful for direct bit manipulation. As of now, the
74 * template parameter has to be float or double.
76 * The nested typedef |Bits| is the unsigned integral type with the same size
77 * as T: uint32_t for float and uint64_t for double (static assertions
78 * double-check these assumptions).
80 * kExponentBias is the offset that is subtracted from the exponent when
81 * computing the value, i.e. one plus the opposite of the mininum possible
83 * kExponentShift is the shift that one needs to apply to retrieve the
84 * exponent component of the value.
86 * kSignBit contains a bits mask. Bit-and-ing with this mask will result in
87 * obtaining the sign bit.
88 * kExponentBits contains the mask needed for obtaining the exponent bits and
89 * kSignificandBits contains the mask needed for obtaining the significand
92 * Full details of how floating point number formats are encoded are beyond
93 * the scope of this comment. For more information, see
94 * http://en.wikipedia.org/wiki/IEEE_floating_point
95 * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
98 struct FloatingPoint final
: private detail::FloatingPointTrait
<T
> {
100 using Base
= detail::FloatingPointTrait
<T
>;
104 * An unsigned integral type suitable for accessing the bitwise representation
107 using Bits
= typename
Base::Bits
;
109 static_assert(sizeof(T
) == sizeof(Bits
), "Bits must be same size as T");
111 /** The bit-width of the exponent component of T. */
112 using Base::kExponentWidth
;
114 /** The bit-width of the significand component of T. */
115 using Base::kSignificandWidth
;
117 static_assert(1 + kExponentWidth
+ kSignificandWidth
== CHAR_BIT
* sizeof(T
),
118 "sign bit plus bit widths should sum to overall bit width");
121 * The exponent field in an IEEE-754 floating point number consists of bits
122 * encoding an unsigned number. The *actual* represented exponent (for all
123 * values finite and not denormal) is that value, minus a bias |kExponentBias|
124 * so that a useful range of numbers is represented.
126 static constexpr unsigned kExponentBias
= (1U << (kExponentWidth
- 1)) - 1;
129 * The amount by which the bits of the exponent-field in an IEEE-754 floating
130 * point number are shifted from the LSB of the floating point type.
132 static constexpr unsigned kExponentShift
= kSignificandWidth
;
134 /** The sign bit in the floating point representation. */
135 static constexpr Bits kSignBit
= static_cast<Bits
>(1)
136 << (CHAR_BIT
* sizeof(Bits
) - 1);
138 /** The exponent bits in the floating point representation. */
139 static constexpr Bits kExponentBits
=
140 ((static_cast<Bits
>(1) << kExponentWidth
) - 1) << kSignificandWidth
;
142 /** The significand bits in the floating point representation. */
143 static constexpr Bits kSignificandBits
=
144 (static_cast<Bits
>(1) << kSignificandWidth
) - 1;
146 static_assert((kSignBit
& kExponentBits
) == 0,
147 "sign bit shouldn't overlap exponent bits");
148 static_assert((kSignBit
& kSignificandBits
) == 0,
149 "sign bit shouldn't overlap significand bits");
150 static_assert((kExponentBits
& kSignificandBits
) == 0,
151 "exponent bits shouldn't overlap significand bits");
153 static_assert((kSignBit
| kExponentBits
| kSignificandBits
) == ~Bits(0),
154 "all bits accounted for");
157 /** Determines whether a float/double is NaN. */
158 template <typename T
>
159 static MOZ_ALWAYS_INLINE
bool IsNaN(T aValue
) {
161 * A float/double is NaN if all exponent bits are 1 and the significand
162 * contains at least one non-zero bit.
164 typedef FloatingPoint
<T
> Traits
;
165 typedef typename
Traits::Bits Bits
;
166 return (BitwiseCast
<Bits
>(aValue
) & Traits::kExponentBits
) ==
167 Traits::kExponentBits
&&
168 (BitwiseCast
<Bits
>(aValue
) & Traits::kSignificandBits
) != 0;
171 /** Determines whether a float/double is +Infinity or -Infinity. */
172 template <typename T
>
173 static MOZ_ALWAYS_INLINE
bool IsInfinite(T aValue
) {
174 /* Infinities have all exponent bits set to 1 and an all-0 significand. */
175 typedef FloatingPoint
<T
> Traits
;
176 typedef typename
Traits::Bits Bits
;
177 Bits bits
= BitwiseCast
<Bits
>(aValue
);
178 return (bits
& ~Traits::kSignBit
) == Traits::kExponentBits
;
181 /** Determines whether a float/double is not NaN or infinite. */
182 template <typename T
>
183 static MOZ_ALWAYS_INLINE
bool IsFinite(T aValue
) {
185 * NaN and Infinities are the only non-finite floats/doubles, and both have
186 * all exponent bits set to 1.
188 typedef FloatingPoint
<T
> Traits
;
189 typedef typename
Traits::Bits Bits
;
190 Bits bits
= BitwiseCast
<Bits
>(aValue
);
191 return (bits
& Traits::kExponentBits
) != Traits::kExponentBits
;
195 * Determines whether a float/double is negative or -0. It is an error
196 * to call this method on a float/double which is NaN.
198 template <typename T
>
199 static MOZ_ALWAYS_INLINE
bool IsNegative(T aValue
) {
200 MOZ_ASSERT(!IsNaN(aValue
), "NaN does not have a sign");
202 /* The sign bit is set if the double is negative. */
203 typedef FloatingPoint
<T
> Traits
;
204 typedef typename
Traits::Bits Bits
;
205 Bits bits
= BitwiseCast
<Bits
>(aValue
);
206 return (bits
& Traits::kSignBit
) != 0;
209 /** Determines whether a float/double represents -0. */
210 template <typename T
>
211 static MOZ_ALWAYS_INLINE
bool IsNegativeZero(T aValue
) {
212 /* Only the sign bit is set if the value is -0. */
213 typedef FloatingPoint
<T
> Traits
;
214 typedef typename
Traits::Bits Bits
;
215 Bits bits
= BitwiseCast
<Bits
>(aValue
);
216 return bits
== Traits::kSignBit
;
219 /** Determines wether a float/double represents +0. */
220 template <typename T
>
221 static MOZ_ALWAYS_INLINE
bool IsPositiveZero(T aValue
) {
222 /* All bits are zero if the value is +0. */
223 typedef FloatingPoint
<T
> Traits
;
224 typedef typename
Traits::Bits Bits
;
225 Bits bits
= BitwiseCast
<Bits
>(aValue
);
230 * Returns 0 if a float/double is NaN or infinite;
231 * otherwise, the float/double is returned.
233 template <typename T
>
234 static MOZ_ALWAYS_INLINE T
ToZeroIfNonfinite(T aValue
) {
235 return IsFinite(aValue
) ? aValue
: 0;
239 * Returns the exponent portion of the float/double.
241 * Zero is not special-cased, so ExponentComponent(0.0) is
242 * -int_fast16_t(Traits::kExponentBias).
244 template <typename T
>
245 static MOZ_ALWAYS_INLINE
int_fast16_t ExponentComponent(T aValue
) {
247 * The exponent component of a float/double is an unsigned number, biased
248 * from its actual value. Subtract the bias to retrieve the actual exponent.
250 typedef FloatingPoint
<T
> Traits
;
251 typedef typename
Traits::Bits Bits
;
252 Bits bits
= BitwiseCast
<Bits
>(aValue
);
253 return int_fast16_t((bits
& Traits::kExponentBits
) >>
254 Traits::kExponentShift
) -
255 int_fast16_t(Traits::kExponentBias
);
258 /** Returns +Infinity. */
259 template <typename T
>
260 static MOZ_ALWAYS_INLINE T
PositiveInfinity() {
262 * Positive infinity has all exponent bits set, sign bit set to 0, and no
265 typedef FloatingPoint
<T
> Traits
;
266 return BitwiseCast
<T
>(Traits::kExponentBits
);
269 /** Returns -Infinity. */
270 template <typename T
>
271 static MOZ_ALWAYS_INLINE T
NegativeInfinity() {
273 * Negative infinity has all exponent bits set, sign bit set to 1, and no
276 typedef FloatingPoint
<T
> Traits
;
277 return BitwiseCast
<T
>(Traits::kSignBit
| Traits::kExponentBits
);
281 * Computes the bit pattern for an infinity with the specified sign bit.
283 template <typename T
, int SignBit
>
284 struct InfinityBits
{
285 using Traits
= FloatingPoint
<T
>;
287 static_assert(SignBit
== 0 || SignBit
== 1, "bad sign bit");
288 static constexpr typename
Traits::Bits value
=
289 (SignBit
* Traits::kSignBit
) | Traits::kExponentBits
;
293 * Computes the bit pattern for a NaN with the specified sign bit and
296 template <typename T
, int SignBit
, typename FloatingPoint
<T
>::Bits Significand
>
297 struct SpecificNaNBits
{
298 using Traits
= FloatingPoint
<T
>;
300 static_assert(SignBit
== 0 || SignBit
== 1, "bad sign bit");
301 static_assert((Significand
& ~Traits::kSignificandBits
) == 0,
302 "significand must only have significand bits set");
303 static_assert(Significand
& Traits::kSignificandBits
,
304 "significand must be nonzero");
306 static constexpr typename
Traits::Bits value
=
307 (SignBit
* Traits::kSignBit
) | Traits::kExponentBits
| Significand
;
311 * Constructs a NaN value with the specified sign bit and significand bits.
313 * There is also a variant that returns the value directly. In most cases, the
314 * two variants should be identical. However, in the specific case of x86
315 * chips, the behavior differs: returning floating-point values directly is done
316 * through the x87 stack, and x87 loads and stores turn signaling NaNs into
317 * quiet NaNs... silently. Returning floating-point values via outparam,
318 * however, is done entirely within the SSE registers when SSE2 floating-point
319 * is enabled in the compiler, which has semantics-preserving behavior you would
322 * If preserving the distinction between signaling NaNs and quiet NaNs is
323 * important to you, you should use the outparam version. In all other cases,
324 * you should use the direct return version.
326 template <typename T
>
327 static MOZ_ALWAYS_INLINE
void SpecificNaN(
328 int signbit
, typename FloatingPoint
<T
>::Bits significand
, T
* result
) {
329 typedef FloatingPoint
<T
> Traits
;
330 MOZ_ASSERT(signbit
== 0 || signbit
== 1);
331 MOZ_ASSERT((significand
& ~Traits::kSignificandBits
) == 0);
332 MOZ_ASSERT(significand
& Traits::kSignificandBits
);
335 (signbit
? Traits::kSignBit
: 0) | Traits::kExponentBits
| significand
,
337 MOZ_ASSERT(IsNaN(*result
));
340 template <typename T
>
341 static MOZ_ALWAYS_INLINE T
342 SpecificNaN(int signbit
, typename FloatingPoint
<T
>::Bits significand
) {
344 SpecificNaN(signbit
, significand
, &t
);
348 /** Computes the smallest non-zero positive float/double value. */
349 template <typename T
>
350 static MOZ_ALWAYS_INLINE T
MinNumberValue() {
351 typedef FloatingPoint
<T
> Traits
;
352 typedef typename
Traits::Bits Bits
;
353 return BitwiseCast
<T
>(Bits(1));
358 template <typename Float
, typename SignedInteger
>
359 inline bool NumberEqualsSignedInteger(Float aValue
, SignedInteger
* aInteger
) {
360 static_assert(IsSame
<Float
, float>::value
|| IsSame
<Float
, double>::value
,
361 "Float must be an IEEE-754 floating point type");
362 static_assert(IsSigned
<SignedInteger
>::value
,
363 "this algorithm only works for signed types: a different one "
364 "will be required for unsigned types");
365 static_assert(sizeof(SignedInteger
) >= sizeof(int),
366 "this function *might* require some finessing for signed types "
367 "subject to integral promotion before it can be used on them");
369 MOZ_MAKE_MEM_UNDEFINED(aInteger
, sizeof(*aInteger
));
371 // NaNs and infinities are not integers.
372 if (!IsFinite(aValue
)) {
376 // Otherwise do direct comparisons against the minimum/maximum |SignedInteger|
377 // values that can be encoded in |Float|.
379 constexpr SignedInteger MaxIntValue
=
380 std::numeric_limits
<SignedInteger
>::max(); // e.g. INT32_MAX
381 constexpr SignedInteger MinValue
=
382 std::numeric_limits
<SignedInteger
>::min(); // e.g. INT32_MIN
384 static_assert(IsPowerOfTwo(Abs(MinValue
)),
385 "MinValue should be is a small power of two, thus exactly "
386 "representable in float/double both");
388 constexpr unsigned SignedIntegerWidth
= CHAR_BIT
* sizeof(SignedInteger
);
389 constexpr unsigned ExponentShift
= FloatingPoint
<Float
>::kExponentShift
;
391 // Careful! |MaxIntValue| may not be the maximum |SignedInteger| value that
392 // can be encoded in |Float|. Its |SignedIntegerWidth - 1| bits of precision
393 // may exceed |Float|'s |ExponentShift + 1| bits of precision. If necessary,
394 // compute the maximum |SignedInteger| that fits in |Float| from IEEE-754
395 // first principles. (|MinValue| doesn't have this problem because as a
396 // [relatively] small power of two it's always representable in |Float|.)
398 // Per C++11 [expr.const]p2, unevaluated subexpressions of logical AND/OR and
399 // conditional expressions *may* contain non-constant expressions, without
400 // making the enclosing expression not constexpr. MSVC implements this -- but
401 // it sometimes warns about undefined behavior in unevaluated subexpressions.
402 // This bites us if we initialize |MaxValue| the obvious way including an
403 // |uint64_t(1) << (SignedIntegerWidth - 2 - ExponentShift)| subexpression.
404 // Pull that shift-amount out and give it a not-too-huge value when it's in an
405 // unevaluated subexpression. 🙄
406 constexpr unsigned PrecisionExceededShiftAmount
=
407 ExponentShift
> SignedIntegerWidth
- 1
409 : SignedIntegerWidth
- 2 - ExponentShift
;
411 constexpr SignedInteger MaxValue
=
412 ExponentShift
> SignedIntegerWidth
- 1
414 : SignedInteger((uint64_t(1) << (SignedIntegerWidth
- 1)) -
415 (uint64_t(1) << PrecisionExceededShiftAmount
));
417 if (static_cast<Float
>(MinValue
) <= aValue
&&
418 aValue
<= static_cast<Float
>(MaxValue
)) {
419 auto possible
= static_cast<SignedInteger
>(aValue
);
420 if (static_cast<Float
>(possible
) == aValue
) {
421 *aInteger
= possible
;
429 template <typename Float
, typename SignedInteger
>
430 inline bool NumberIsSignedInteger(Float aValue
, SignedInteger
* aInteger
) {
431 static_assert(IsSame
<Float
, float>::value
|| IsSame
<Float
, double>::value
,
432 "Float must be an IEEE-754 floating point type");
433 static_assert(IsSigned
<SignedInteger
>::value
,
434 "this algorithm only works for signed types: a different one "
435 "will be required for unsigned types");
436 static_assert(sizeof(SignedInteger
) >= sizeof(int),
437 "this function *might* require some finessing for signed types "
438 "subject to integral promotion before it can be used on them");
440 MOZ_MAKE_MEM_UNDEFINED(aInteger
, sizeof(*aInteger
));
442 if (IsNegativeZero(aValue
)) {
446 return NumberEqualsSignedInteger(aValue
, aInteger
);
449 } // namespace detail
452 * If |aValue| is identical to some |int32_t| value, set |*aInt32| to that value
453 * and return true. Otherwise return false, leaving |*aInt32| in an
454 * indeterminate state.
456 * This method returns false for negative zero. If you want to consider -0 to
457 * be 0, use NumberEqualsInt32 below.
459 template <typename T
>
460 static MOZ_ALWAYS_INLINE
bool NumberIsInt32(T aValue
, int32_t* aInt32
) {
461 return detail::NumberIsSignedInteger(aValue
, aInt32
);
465 * If |aValue| is equal to some int32_t value (where -0 and +0 are considered
466 * equal), set |*aInt32| to that value and return true. Otherwise return false,
467 * leaving |*aInt32| in an indeterminate state.
469 * |NumberEqualsInt32(-0.0, ...)| will return true. To test whether a value can
470 * be losslessly converted to |int32_t| and back, use NumberIsInt32 above.
472 template <typename T
>
473 static MOZ_ALWAYS_INLINE
bool NumberEqualsInt32(T aValue
, int32_t* aInt32
) {
474 return detail::NumberEqualsSignedInteger(aValue
, aInt32
);
478 * Computes a NaN value. Do not use this method if you depend upon a particular
479 * NaN value being returned.
481 template <typename T
>
482 static MOZ_ALWAYS_INLINE T
UnspecifiedNaN() {
484 * If we can use any quiet NaN, we might as well use the all-ones NaN,
485 * since it's cheap to materialize on common platforms (such as x64, where
486 * this value can be represented in a 32-bit signed immediate field, allowing
487 * it to be stored to memory in a single instruction).
489 typedef FloatingPoint
<T
> Traits
;
490 return SpecificNaN
<T
>(1, Traits::kSignificandBits
);
494 * Compare two doubles for equality, *without* equating -0 to +0, and equating
495 * any NaN value to any other NaN value. (The normal equality operators equate
496 * -0 with +0, and they equate NaN to no other value.)
498 template <typename T
>
499 static inline bool NumbersAreIdentical(T aValue1
, T aValue2
) {
500 typedef FloatingPoint
<T
> Traits
;
501 typedef typename
Traits::Bits Bits
;
502 if (IsNaN(aValue1
)) {
503 return IsNaN(aValue2
);
505 return BitwiseCast
<Bits
>(aValue1
) == BitwiseCast
<Bits
>(aValue2
);
509 * Return true iff |aValue| and |aValue2| are equal (ignoring sign if both are
512 template <typename T
>
513 static inline bool EqualOrBothNaN(T aValue1
, T aValue2
) {
514 if (IsNaN(aValue1
)) {
515 return IsNaN(aValue2
);
517 return aValue1
== aValue2
;
522 template <typename T
>
523 struct FuzzyEqualsEpsilon
;
526 struct FuzzyEqualsEpsilon
<float> {
527 // A number near 1e-5 that is exactly representable in a float.
528 static float value() { return 1.0f
/ (1 << 17); }
532 struct FuzzyEqualsEpsilon
<double> {
533 // A number near 1e-12 that is exactly representable in a double.
534 static double value() { return 1.0 / (1LL << 40); }
537 } // namespace detail
540 * Compare two floating point values for equality, modulo rounding error. That
541 * is, the two values are considered equal if they are both not NaN and if they
542 * are less than or equal to aEpsilon apart. The default value of aEpsilon is
545 * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
546 * as it is more reasonable over the entire range of floating point numbers.
547 * This additive version should only be used if you know the range of the
548 * numbers you are dealing with is bounded and stays around the same order of
551 template <typename T
>
552 static MOZ_ALWAYS_INLINE
bool FuzzyEqualsAdditive(
553 T aValue1
, T aValue2
, T aEpsilon
= detail::FuzzyEqualsEpsilon
<T
>::value()) {
554 static_assert(IsFloatingPoint
<T
>::value
, "floating point type required");
555 return Abs(aValue1
- aValue2
) <= aEpsilon
;
559 * Compare two floating point values for equality, allowing for rounding error
560 * relative to the magnitude of the values. That is, the two values are
561 * considered equal if they are both not NaN and they are less than or equal to
562 * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
565 * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
566 * this function effectively masks out differences in the bottom few bits of
567 * the floating point numbers being compared, regardless of what order of
568 * magnitude those numbers are at.
570 template <typename T
>
571 static MOZ_ALWAYS_INLINE
bool FuzzyEqualsMultiplicative(
572 T aValue1
, T aValue2
, T aEpsilon
= detail::FuzzyEqualsEpsilon
<T
>::value()) {
573 static_assert(IsFloatingPoint
<T
>::value
, "floating point type required");
574 // can't use std::min because of bug 965340
575 T smaller
= Abs(aValue1
) < Abs(aValue2
) ? Abs(aValue1
) : Abs(aValue2
);
576 return Abs(aValue1
- aValue2
) <= aEpsilon
* smaller
;
580 * Returns true if |aValue| can be losslessly represented as an IEEE-754 single
581 * precision number, false otherwise. All NaN values are considered
582 * representable (even though the bit patterns of double precision NaNs can't
583 * all be exactly represented in single precision).
586 extern MFBT_API
bool IsFloat32Representable(double aValue
);
588 } /* namespace mozilla */
590 #endif /* mozilla_FloatingPoint_h */