no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / mfbt / FloatingPoint.h
blobf4ae36257bba2b6495e7f309ca50b990b1d199ee
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* Various predicates and operations on IEEE-754 floating point types. */
9 #ifndef mozilla_FloatingPoint_h
10 #define mozilla_FloatingPoint_h
12 #include "mozilla/Assertions.h"
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Casting.h"
15 #include "mozilla/MathAlgorithms.h"
16 #include "mozilla/MemoryChecking.h"
17 #include "mozilla/Types.h"
19 #include <algorithm>
20 #include <climits>
21 #include <limits>
22 #include <stdint.h>
24 namespace mozilla {
27 * It's reasonable to ask why we have this header at all. Don't isnan,
28 * copysign, the built-in comparison operators, and the like solve these
29 * problems? Unfortunately, they don't. We've found that various compilers
30 * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
31 * the standard methods in various situations, so we can't use them. Some of
32 * these compilers even have problems compiling seemingly reasonable bitwise
33 * algorithms! But with some care we've found algorithms that seem to not
34 * trigger those compiler bugs.
36 * For the aforementioned reasons, be very wary of making changes to any of
37 * these algorithms. If you must make changes, keep a careful eye out for
38 * compiler bustage, particularly PGO-specific bustage.
41 namespace detail {
44 * These implementations assume float/double are 32/64-bit single/double
45 * format number types compatible with the IEEE-754 standard. C++ doesn't
46 * require this, but we required it in implementations of these algorithms that
47 * preceded this header, so we shouldn't break anything to continue doing so.
49 template <typename T>
50 struct FloatingPointTrait;
52 template <>
53 struct FloatingPointTrait<float> {
54 protected:
55 using Bits = uint32_t;
57 static constexpr unsigned kExponentWidth = 8;
58 static constexpr unsigned kSignificandWidth = 23;
61 template <>
62 struct FloatingPointTrait<double> {
63 protected:
64 using Bits = uint64_t;
66 static constexpr unsigned kExponentWidth = 11;
67 static constexpr unsigned kSignificandWidth = 52;
70 } // namespace detail
73 * This struct contains details regarding the encoding of floating-point
74 * numbers that can be useful for direct bit manipulation. As of now, the
75 * template parameter has to be float or double.
77 * The nested typedef |Bits| is the unsigned integral type with the same size
78 * as T: uint32_t for float and uint64_t for double (static assertions
79 * double-check these assumptions).
81 * kExponentBias is the offset that is subtracted from the exponent when
82 * computing the value, i.e. one plus the opposite of the mininum possible
83 * exponent.
84 * kExponentShift is the shift that one needs to apply to retrieve the
85 * exponent component of the value.
87 * kSignBit contains a bits mask. Bit-and-ing with this mask will result in
88 * obtaining the sign bit.
89 * kExponentBits contains the mask needed for obtaining the exponent bits and
90 * kSignificandBits contains the mask needed for obtaining the significand
91 * bits.
93 * Full details of how floating point number formats are encoded are beyond
94 * the scope of this comment. For more information, see
95 * http://en.wikipedia.org/wiki/IEEE_floating_point
96 * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
98 template <typename T>
99 struct FloatingPoint final : private detail::FloatingPointTrait<T> {
100 private:
101 using Base = detail::FloatingPointTrait<T>;
103 public:
105 * An unsigned integral type suitable for accessing the bitwise representation
106 * of T.
108 using Bits = typename Base::Bits;
110 static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
112 /** The bit-width of the exponent component of T. */
113 using Base::kExponentWidth;
115 /** The bit-width of the significand component of T. */
116 using Base::kSignificandWidth;
118 static_assert(1 + kExponentWidth + kSignificandWidth == CHAR_BIT * sizeof(T),
119 "sign bit plus bit widths should sum to overall bit width");
122 * The exponent field in an IEEE-754 floating point number consists of bits
123 * encoding an unsigned number. The *actual* represented exponent (for all
124 * values finite and not denormal) is that value, minus a bias |kExponentBias|
125 * so that a useful range of numbers is represented.
127 static constexpr unsigned kExponentBias = (1U << (kExponentWidth - 1)) - 1;
130 * The amount by which the bits of the exponent-field in an IEEE-754 floating
131 * point number are shifted from the LSB of the floating point type.
133 static constexpr unsigned kExponentShift = kSignificandWidth;
135 /** The sign bit in the floating point representation. */
136 static constexpr Bits kSignBit = static_cast<Bits>(1)
137 << (CHAR_BIT * sizeof(Bits) - 1);
139 /** The exponent bits in the floating point representation. */
140 static constexpr Bits kExponentBits =
141 ((static_cast<Bits>(1) << kExponentWidth) - 1) << kSignificandWidth;
143 /** The significand bits in the floating point representation. */
144 static constexpr Bits kSignificandBits =
145 (static_cast<Bits>(1) << kSignificandWidth) - 1;
147 static_assert((kSignBit & kExponentBits) == 0,
148 "sign bit shouldn't overlap exponent bits");
149 static_assert((kSignBit & kSignificandBits) == 0,
150 "sign bit shouldn't overlap significand bits");
151 static_assert((kExponentBits & kSignificandBits) == 0,
152 "exponent bits shouldn't overlap significand bits");
154 static_assert((kSignBit | kExponentBits | kSignificandBits) == ~Bits(0),
155 "all bits accounted for");
159 * Determines whether a float/double is negative or -0. It is an error
160 * to call this method on a float/double which is NaN.
162 template <typename T>
163 static MOZ_ALWAYS_INLINE bool IsNegative(T aValue) {
164 MOZ_ASSERT(!std::isnan(aValue), "NaN does not have a sign");
165 return std::signbit(aValue);
168 /** Determines whether a float/double represents -0. */
169 template <typename T>
170 static MOZ_ALWAYS_INLINE bool IsNegativeZero(T aValue) {
171 /* Only the sign bit is set if the value is -0. */
172 typedef FloatingPoint<T> Traits;
173 typedef typename Traits::Bits Bits;
174 Bits bits = BitwiseCast<Bits>(aValue);
175 return bits == Traits::kSignBit;
178 /** Determines wether a float/double represents +0. */
179 template <typename T>
180 static MOZ_ALWAYS_INLINE bool IsPositiveZero(T aValue) {
181 /* All bits are zero if the value is +0. */
182 typedef FloatingPoint<T> Traits;
183 typedef typename Traits::Bits Bits;
184 Bits bits = BitwiseCast<Bits>(aValue);
185 return bits == 0;
189 * Returns 0 if a float/double is NaN or infinite;
190 * otherwise, the float/double is returned.
192 template <typename T>
193 static MOZ_ALWAYS_INLINE T ToZeroIfNonfinite(T aValue) {
194 return std::isfinite(aValue) ? aValue : 0;
198 * Returns the exponent portion of the float/double.
200 * Zero is not special-cased, so ExponentComponent(0.0) is
201 * -int_fast16_t(Traits::kExponentBias).
203 template <typename T>
204 static MOZ_ALWAYS_INLINE int_fast16_t ExponentComponent(T aValue) {
206 * The exponent component of a float/double is an unsigned number, biased
207 * from its actual value. Subtract the bias to retrieve the actual exponent.
209 typedef FloatingPoint<T> Traits;
210 typedef typename Traits::Bits Bits;
211 Bits bits = BitwiseCast<Bits>(aValue);
212 return int_fast16_t((bits & Traits::kExponentBits) >>
213 Traits::kExponentShift) -
214 int_fast16_t(Traits::kExponentBias);
217 /** Returns +Infinity. */
218 template <typename T>
219 static MOZ_ALWAYS_INLINE T PositiveInfinity() {
221 * Positive infinity has all exponent bits set, sign bit set to 0, and no
222 * significand.
224 typedef FloatingPoint<T> Traits;
225 return BitwiseCast<T>(Traits::kExponentBits);
228 /** Returns -Infinity. */
229 template <typename T>
230 static MOZ_ALWAYS_INLINE T NegativeInfinity() {
232 * Negative infinity has all exponent bits set, sign bit set to 1, and no
233 * significand.
235 typedef FloatingPoint<T> Traits;
236 return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits);
240 * Computes the bit pattern for an infinity with the specified sign bit.
242 template <typename T, int SignBit>
243 struct InfinityBits {
244 using Traits = FloatingPoint<T>;
246 static_assert(SignBit == 0 || SignBit == 1, "bad sign bit");
247 static constexpr typename Traits::Bits value =
248 (SignBit * Traits::kSignBit) | Traits::kExponentBits;
252 * Computes the bit pattern for a NaN with the specified sign bit and
253 * significand bits.
255 template <typename T, int SignBit, typename FloatingPoint<T>::Bits Significand>
256 struct SpecificNaNBits {
257 using Traits = FloatingPoint<T>;
259 static_assert(SignBit == 0 || SignBit == 1, "bad sign bit");
260 static_assert((Significand & ~Traits::kSignificandBits) == 0,
261 "significand must only have significand bits set");
262 static_assert(Significand & Traits::kSignificandBits,
263 "significand must be nonzero");
265 static constexpr typename Traits::Bits value =
266 (SignBit * Traits::kSignBit) | Traits::kExponentBits | Significand;
270 * Constructs a NaN value with the specified sign bit and significand bits.
272 * There is also a variant that returns the value directly. In most cases, the
273 * two variants should be identical. However, in the specific case of x86
274 * chips, the behavior differs: returning floating-point values directly is done
275 * through the x87 stack, and x87 loads and stores turn signaling NaNs into
276 * quiet NaNs... silently. Returning floating-point values via outparam,
277 * however, is done entirely within the SSE registers when SSE2 floating-point
278 * is enabled in the compiler, which has semantics-preserving behavior you would
279 * expect.
281 * If preserving the distinction between signaling NaNs and quiet NaNs is
282 * important to you, you should use the outparam version. In all other cases,
283 * you should use the direct return version.
285 template <typename T>
286 static MOZ_ALWAYS_INLINE void SpecificNaN(
287 int signbit, typename FloatingPoint<T>::Bits significand, T* result) {
288 typedef FloatingPoint<T> Traits;
289 MOZ_ASSERT(signbit == 0 || signbit == 1);
290 MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0);
291 MOZ_ASSERT(significand & Traits::kSignificandBits);
293 BitwiseCast<T>(
294 (signbit ? Traits::kSignBit : 0) | Traits::kExponentBits | significand,
295 result);
296 MOZ_ASSERT(std::isnan(*result));
299 template <typename T>
300 static MOZ_ALWAYS_INLINE T
301 SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand) {
302 T t;
303 SpecificNaN(signbit, significand, &t);
304 return t;
307 /** Computes the smallest non-zero positive float/double value. */
308 template <typename T>
309 static MOZ_ALWAYS_INLINE T MinNumberValue() {
310 typedef FloatingPoint<T> Traits;
311 typedef typename Traits::Bits Bits;
312 return BitwiseCast<T>(Bits(1));
315 namespace detail {
317 template <typename Float, typename SignedInteger>
318 inline bool NumberEqualsSignedInteger(Float aValue, SignedInteger* aInteger) {
319 static_assert(std::is_same_v<Float, float> || std::is_same_v<Float, double>,
320 "Float must be an IEEE-754 floating point type");
321 static_assert(std::is_signed_v<SignedInteger>,
322 "this algorithm only works for signed types: a different one "
323 "will be required for unsigned types");
324 static_assert(sizeof(SignedInteger) >= sizeof(int),
325 "this function *might* require some finessing for signed types "
326 "subject to integral promotion before it can be used on them");
328 MOZ_MAKE_MEM_UNDEFINED(aInteger, sizeof(*aInteger));
330 // NaNs and infinities are not integers.
331 if (!std::isfinite(aValue)) {
332 return false;
335 // Otherwise do direct comparisons against the minimum/maximum |SignedInteger|
336 // values that can be encoded in |Float|.
338 constexpr SignedInteger MaxIntValue =
339 std::numeric_limits<SignedInteger>::max(); // e.g. INT32_MAX
340 constexpr SignedInteger MinValue =
341 std::numeric_limits<SignedInteger>::min(); // e.g. INT32_MIN
343 static_assert(IsPowerOfTwo(Abs(MinValue)),
344 "MinValue should be is a small power of two, thus exactly "
345 "representable in float/double both");
347 constexpr unsigned SignedIntegerWidth = CHAR_BIT * sizeof(SignedInteger);
348 constexpr unsigned ExponentShift = FloatingPoint<Float>::kExponentShift;
350 // Careful! |MaxIntValue| may not be the maximum |SignedInteger| value that
351 // can be encoded in |Float|. Its |SignedIntegerWidth - 1| bits of precision
352 // may exceed |Float|'s |ExponentShift + 1| bits of precision. If necessary,
353 // compute the maximum |SignedInteger| that fits in |Float| from IEEE-754
354 // first principles. (|MinValue| doesn't have this problem because as a
355 // [relatively] small power of two it's always representable in |Float|.)
357 // Per C++11 [expr.const]p2, unevaluated subexpressions of logical AND/OR and
358 // conditional expressions *may* contain non-constant expressions, without
359 // making the enclosing expression not constexpr. MSVC implements this -- but
360 // it sometimes warns about undefined behavior in unevaluated subexpressions.
361 // This bites us if we initialize |MaxValue| the obvious way including an
362 // |uint64_t(1) << (SignedIntegerWidth - 2 - ExponentShift)| subexpression.
363 // Pull that shift-amount out and give it a not-too-huge value when it's in an
364 // unevaluated subexpression. 🙄
365 constexpr unsigned PrecisionExceededShiftAmount =
366 ExponentShift > SignedIntegerWidth - 1
368 : SignedIntegerWidth - 2 - ExponentShift;
370 constexpr SignedInteger MaxValue =
371 ExponentShift > SignedIntegerWidth - 1
372 ? MaxIntValue
373 : SignedInteger((uint64_t(1) << (SignedIntegerWidth - 1)) -
374 (uint64_t(1) << PrecisionExceededShiftAmount));
376 if (static_cast<Float>(MinValue) <= aValue &&
377 aValue <= static_cast<Float>(MaxValue)) {
378 auto possible = static_cast<SignedInteger>(aValue);
379 if (static_cast<Float>(possible) == aValue) {
380 *aInteger = possible;
381 return true;
385 return false;
388 template <typename Float, typename SignedInteger>
389 inline bool NumberIsSignedInteger(Float aValue, SignedInteger* aInteger) {
390 static_assert(std::is_same_v<Float, float> || std::is_same_v<Float, double>,
391 "Float must be an IEEE-754 floating point type");
392 static_assert(std::is_signed_v<SignedInteger>,
393 "this algorithm only works for signed types: a different one "
394 "will be required for unsigned types");
395 static_assert(sizeof(SignedInteger) >= sizeof(int),
396 "this function *might* require some finessing for signed types "
397 "subject to integral promotion before it can be used on them");
399 MOZ_MAKE_MEM_UNDEFINED(aInteger, sizeof(*aInteger));
401 if (IsNegativeZero(aValue)) {
402 return false;
405 return NumberEqualsSignedInteger(aValue, aInteger);
408 } // namespace detail
411 * If |aValue| is identical to some |int32_t| value, set |*aInt32| to that value
412 * and return true. Otherwise return false, leaving |*aInt32| in an
413 * indeterminate state.
415 * This method returns false for negative zero. If you want to consider -0 to
416 * be 0, use NumberEqualsInt32 below.
418 template <typename T>
419 static MOZ_ALWAYS_INLINE bool NumberIsInt32(T aValue, int32_t* aInt32) {
420 return detail::NumberIsSignedInteger(aValue, aInt32);
424 * If |aValue| is identical to some |int64_t| value, set |*aInt64| to that value
425 * and return true. Otherwise return false, leaving |*aInt64| in an
426 * indeterminate state.
428 * This method returns false for negative zero. If you want to consider -0 to
429 * be 0, use NumberEqualsInt64 below.
431 template <typename T>
432 static MOZ_ALWAYS_INLINE bool NumberIsInt64(T aValue, int64_t* aInt64) {
433 return detail::NumberIsSignedInteger(aValue, aInt64);
437 * If |aValue| is equal to some int32_t value (where -0 and +0 are considered
438 * equal), set |*aInt32| to that value and return true. Otherwise return false,
439 * leaving |*aInt32| in an indeterminate state.
441 * |NumberEqualsInt32(-0.0, ...)| will return true. To test whether a value can
442 * be losslessly converted to |int32_t| and back, use NumberIsInt32 above.
444 template <typename T>
445 static MOZ_ALWAYS_INLINE bool NumberEqualsInt32(T aValue, int32_t* aInt32) {
446 return detail::NumberEqualsSignedInteger(aValue, aInt32);
450 * If |aValue| is equal to some int64_t value (where -0 and +0 are considered
451 * equal), set |*aInt64| to that value and return true. Otherwise return false,
452 * leaving |*aInt64| in an indeterminate state.
454 * |NumberEqualsInt64(-0.0, ...)| will return true. To test whether a value can
455 * be losslessly converted to |int64_t| and back, use NumberIsInt64 above.
457 template <typename T>
458 static MOZ_ALWAYS_INLINE bool NumberEqualsInt64(T aValue, int64_t* aInt64) {
459 return detail::NumberEqualsSignedInteger(aValue, aInt64);
463 * Computes a NaN value. Do not use this method if you depend upon a particular
464 * NaN value being returned.
466 template <typename T>
467 static MOZ_ALWAYS_INLINE T UnspecifiedNaN() {
469 * If we can use any quiet NaN, we might as well use the all-ones NaN,
470 * since it's cheap to materialize on common platforms (such as x64, where
471 * this value can be represented in a 32-bit signed immediate field, allowing
472 * it to be stored to memory in a single instruction).
474 typedef FloatingPoint<T> Traits;
475 return SpecificNaN<T>(1, Traits::kSignificandBits);
479 * Compare two doubles for equality, *without* equating -0 to +0, and equating
480 * any NaN value to any other NaN value. (The normal equality operators equate
481 * -0 with +0, and they equate NaN to no other value.)
483 template <typename T>
484 static inline bool NumbersAreIdentical(T aValue1, T aValue2) {
485 using Bits = typename FloatingPoint<T>::Bits;
486 if (std::isnan(aValue1)) {
487 return std::isnan(aValue2);
489 return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
493 * Compare two floating point values for bit-wise equality.
495 template <typename T>
496 static inline bool NumbersAreBitwiseIdentical(T aValue1, T aValue2) {
497 using Bits = typename FloatingPoint<T>::Bits;
498 return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
502 * Return true iff |aValue| and |aValue2| are equal (ignoring sign if both are
503 * zero) or both NaN.
505 template <typename T>
506 static inline bool EqualOrBothNaN(T aValue1, T aValue2) {
507 if (std::isnan(aValue1)) {
508 return std::isnan(aValue2);
510 return aValue1 == aValue2;
514 * Return NaN if either |aValue1| or |aValue2| is NaN, or the minimum of
515 * |aValue1| and |aValue2| otherwise.
517 template <typename T>
518 static inline T NaNSafeMin(T aValue1, T aValue2) {
519 if (std::isnan(aValue1) || std::isnan(aValue2)) {
520 return UnspecifiedNaN<T>();
522 return std::min(aValue1, aValue2);
526 * Return NaN if either |aValue1| or |aValue2| is NaN, or the maximum of
527 * |aValue1| and |aValue2| otherwise.
529 template <typename T>
530 static inline T NaNSafeMax(T aValue1, T aValue2) {
531 if (std::isnan(aValue1) || std::isnan(aValue2)) {
532 return UnspecifiedNaN<T>();
534 return std::max(aValue1, aValue2);
537 namespace detail {
539 template <typename T>
540 struct FuzzyEqualsEpsilon;
542 template <>
543 struct FuzzyEqualsEpsilon<float> {
544 // A number near 1e-5 that is exactly representable in a float.
545 static float value() { return 1.0f / (1 << 17); }
548 template <>
549 struct FuzzyEqualsEpsilon<double> {
550 // A number near 1e-12 that is exactly representable in a double.
551 static double value() { return 1.0 / (1LL << 40); }
554 } // namespace detail
557 * Compare two floating point values for equality, modulo rounding error. That
558 * is, the two values are considered equal if they are both not NaN and if they
559 * are less than or equal to aEpsilon apart. The default value of aEpsilon is
560 * near 1e-5.
562 * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
563 * as it is more reasonable over the entire range of floating point numbers.
564 * This additive version should only be used if you know the range of the
565 * numbers you are dealing with is bounded and stays around the same order of
566 * magnitude.
568 template <typename T>
569 static MOZ_ALWAYS_INLINE bool FuzzyEqualsAdditive(
570 T aValue1, T aValue2, T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value()) {
571 static_assert(std::is_floating_point_v<T>, "floating point type required");
572 return Abs(aValue1 - aValue2) <= aEpsilon;
576 * Compare two floating point values for equality, allowing for rounding error
577 * relative to the magnitude of the values. That is, the two values are
578 * considered equal if they are both not NaN and they are less than or equal to
579 * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
580 * argument values.
582 * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
583 * this function effectively masks out differences in the bottom few bits of
584 * the floating point numbers being compared, regardless of what order of
585 * magnitude those numbers are at.
587 template <typename T>
588 static MOZ_ALWAYS_INLINE bool FuzzyEqualsMultiplicative(
589 T aValue1, T aValue2, T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value()) {
590 static_assert(std::is_floating_point_v<T>, "floating point type required");
591 // can't use std::min because of bug 965340
592 T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2);
593 return Abs(aValue1 - aValue2) <= aEpsilon * smaller;
597 * Returns true if |aValue| can be losslessly represented as an IEEE-754 single
598 * precision number, false otherwise. All NaN values are considered
599 * representable (even though the bit patterns of double precision NaNs can't
600 * all be exactly represented in single precision).
602 [[nodiscard]] extern MFBT_API bool IsFloat32Representable(double aValue);
604 } /* namespace mozilla */
606 #endif /* mozilla_FloatingPoint_h */