b2gdroid confvars.sh change for 2.5 merge r+a=me
[gecko.git] / mfbt / FloatingPoint.h
blob279363842cf27fad09ef5fe9d084b204f2f1eb9e
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* Various predicates and operations on IEEE-754 floating point types. */
9 #ifndef mozilla_FloatingPoint_h
10 #define mozilla_FloatingPoint_h
12 #include "mozilla/Assertions.h"
13 #include "mozilla/Attributes.h"
14 #include "mozilla/Casting.h"
15 #include "mozilla/MathAlgorithms.h"
16 #include "mozilla/Types.h"
18 #include <stdint.h>
20 namespace mozilla {
23 * It's reasonable to ask why we have this header at all. Don't isnan,
24 * copysign, the built-in comparison operators, and the like solve these
25 * problems? Unfortunately, they don't. We've found that various compilers
26 * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
27 * the standard methods in various situations, so we can't use them. Some of
28 * these compilers even have problems compiling seemingly reasonable bitwise
29 * algorithms! But with some care we've found algorithms that seem to not
30 * trigger those compiler bugs.
32 * For the aforementioned reasons, be very wary of making changes to any of
33 * these algorithms. If you must make changes, keep a careful eye out for
34 * compiler bustage, particularly PGO-specific bustage.
37 struct FloatTypeTraits
39 typedef uint32_t Bits;
41 static const unsigned kExponentBias = 127;
42 static const unsigned kExponentShift = 23;
44 static const Bits kSignBit = 0x80000000UL;
45 static const Bits kExponentBits = 0x7F800000UL;
46 static const Bits kSignificandBits = 0x007FFFFFUL;
49 struct DoubleTypeTraits
51 typedef uint64_t Bits;
53 static const unsigned kExponentBias = 1023;
54 static const unsigned kExponentShift = 52;
56 static const Bits kSignBit = 0x8000000000000000ULL;
57 static const Bits kExponentBits = 0x7ff0000000000000ULL;
58 static const Bits kSignificandBits = 0x000fffffffffffffULL;
61 template<typename T> struct SelectTrait;
62 template<> struct SelectTrait<float> : public FloatTypeTraits {};
63 template<> struct SelectTrait<double> : public DoubleTypeTraits {};
66 * This struct contains details regarding the encoding of floating-point
67 * numbers that can be useful for direct bit manipulation. As of now, the
68 * template parameter has to be float or double.
70 * The nested typedef |Bits| is the unsigned integral type with the same size
71 * as T: uint32_t for float and uint64_t for double (static assertions
72 * double-check these assumptions).
74 * kExponentBias is the offset that is subtracted from the exponent when
75 * computing the value, i.e. one plus the opposite of the mininum possible
76 * exponent.
77 * kExponentShift is the shift that one needs to apply to retrieve the
78 * exponent component of the value.
80 * kSignBit contains a bits mask. Bit-and-ing with this mask will result in
81 * obtaining the sign bit.
82 * kExponentBits contains the mask needed for obtaining the exponent bits and
83 * kSignificandBits contains the mask needed for obtaining the significand
84 * bits.
86 * Full details of how floating point number formats are encoded are beyond
87 * the scope of this comment. For more information, see
88 * http://en.wikipedia.org/wiki/IEEE_floating_point
89 * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
91 template<typename T>
92 struct FloatingPoint : public SelectTrait<T>
94 typedef SelectTrait<T> Base;
95 typedef typename Base::Bits Bits;
97 static_assert((Base::kSignBit & Base::kExponentBits) == 0,
98 "sign bit shouldn't overlap exponent bits");
99 static_assert((Base::kSignBit & Base::kSignificandBits) == 0,
100 "sign bit shouldn't overlap significand bits");
101 static_assert((Base::kExponentBits & Base::kSignificandBits) == 0,
102 "exponent bits shouldn't overlap significand bits");
104 static_assert((Base::kSignBit | Base::kExponentBits | Base::kSignificandBits) ==
105 ~Bits(0),
106 "all bits accounted for");
109 * These implementations assume float/double are 32/64-bit single/double
110 * format number types compatible with the IEEE-754 standard. C++ don't
111 * require this to be the case. But we required this in implementations of
112 * these algorithms that preceded this header, so we shouldn't break anything
113 * if we keep doing so.
115 static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
118 /** Determines whether a float/double is NaN. */
119 template<typename T>
120 static MOZ_ALWAYS_INLINE MOZ_CONSTEXPR bool
121 IsNaN(T aValue)
124 * A float/double is NaN if all exponent bits are 1 and the significand
125 * contains at least one non-zero bit.
127 typedef FloatingPoint<T> Traits;
128 typedef typename Traits::Bits Bits;
129 return (BitwiseCast<Bits>(aValue) & Traits::kExponentBits) == Traits::kExponentBits &&
130 (BitwiseCast<Bits>(aValue) & Traits::kSignificandBits) != 0;
133 /** Determines whether a float/double is +Infinity or -Infinity. */
134 template<typename T>
135 static MOZ_ALWAYS_INLINE bool
136 IsInfinite(T aValue)
138 /* Infinities have all exponent bits set to 1 and an all-0 significand. */
139 typedef FloatingPoint<T> Traits;
140 typedef typename Traits::Bits Bits;
141 Bits bits = BitwiseCast<Bits>(aValue);
142 return (bits & ~Traits::kSignBit) == Traits::kExponentBits;
145 /** Determines whether a float/double is not NaN or infinite. */
146 template<typename T>
147 static MOZ_ALWAYS_INLINE bool
148 IsFinite(T aValue)
151 * NaN and Infinities are the only non-finite floats/doubles, and both have
152 * all exponent bits set to 1.
154 typedef FloatingPoint<T> Traits;
155 typedef typename Traits::Bits Bits;
156 Bits bits = BitwiseCast<Bits>(aValue);
157 return (bits & Traits::kExponentBits) != Traits::kExponentBits;
161 * Determines whether a float/double is negative or -0. It is an error
162 * to call this method on a float/double which is NaN.
164 template<typename T>
165 static MOZ_ALWAYS_INLINE bool
166 IsNegative(T aValue)
168 MOZ_ASSERT(!IsNaN(aValue), "NaN does not have a sign");
170 /* The sign bit is set if the double is negative. */
171 typedef FloatingPoint<T> Traits;
172 typedef typename Traits::Bits Bits;
173 Bits bits = BitwiseCast<Bits>(aValue);
174 return (bits & Traits::kSignBit) != 0;
177 /** Determines whether a float/double represents -0. */
178 template<typename T>
179 static MOZ_ALWAYS_INLINE bool
180 IsNegativeZero(T aValue)
182 /* Only the sign bit is set if the value is -0. */
183 typedef FloatingPoint<T> Traits;
184 typedef typename Traits::Bits Bits;
185 Bits bits = BitwiseCast<Bits>(aValue);
186 return bits == Traits::kSignBit;
190 * Returns 0 if a float/double is NaN or infinite;
191 * otherwise, the float/double is returned.
193 template<typename T>
194 static MOZ_ALWAYS_INLINE T
195 ToZeroIfNonfinite(T aValue)
197 return IsFinite(aValue) ? aValue : 0;
201 * Returns the exponent portion of the float/double.
203 * Zero is not special-cased, so ExponentComponent(0.0) is
204 * -int_fast16_t(Traits::kExponentBias).
206 template<typename T>
207 static MOZ_ALWAYS_INLINE int_fast16_t
208 ExponentComponent(T aValue)
211 * The exponent component of a float/double is an unsigned number, biased
212 * from its actual value. Subtract the bias to retrieve the actual exponent.
214 typedef FloatingPoint<T> Traits;
215 typedef typename Traits::Bits Bits;
216 Bits bits = BitwiseCast<Bits>(aValue);
217 return int_fast16_t((bits & Traits::kExponentBits) >> Traits::kExponentShift) -
218 int_fast16_t(Traits::kExponentBias);
221 /** Returns +Infinity. */
222 template<typename T>
223 static MOZ_ALWAYS_INLINE T
224 PositiveInfinity()
227 * Positive infinity has all exponent bits set, sign bit set to 0, and no
228 * significand.
230 typedef FloatingPoint<T> Traits;
231 return BitwiseCast<T>(Traits::kExponentBits);
234 /** Returns -Infinity. */
235 template<typename T>
236 static MOZ_ALWAYS_INLINE T
237 NegativeInfinity()
240 * Negative infinity has all exponent bits set, sign bit set to 1, and no
241 * significand.
243 typedef FloatingPoint<T> Traits;
244 return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits);
248 /** Constructs a NaN value with the specified sign bit and significand bits. */
249 template<typename T>
250 static MOZ_ALWAYS_INLINE T
251 SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand)
253 typedef FloatingPoint<T> Traits;
254 MOZ_ASSERT(signbit == 0 || signbit == 1);
255 MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0);
256 MOZ_ASSERT(significand & Traits::kSignificandBits);
258 T t = BitwiseCast<T>((signbit ? Traits::kSignBit : 0) |
259 Traits::kExponentBits |
260 significand);
261 MOZ_ASSERT(IsNaN(t));
262 return t;
265 /** Computes the smallest non-zero positive float/double value. */
266 template<typename T>
267 static MOZ_ALWAYS_INLINE T
268 MinNumberValue()
270 typedef FloatingPoint<T> Traits;
271 typedef typename Traits::Bits Bits;
272 return BitwiseCast<T>(Bits(1));
276 * If aValue is equal to some int32_t value, set *aInt32 to that value and
277 * return true; otherwise return false.
279 * Note that negative zero is "equal" to zero here. To test whether a value can
280 * be losslessly converted to int32_t and back, use NumberIsInt32 instead.
282 template<typename T>
283 static MOZ_ALWAYS_INLINE bool
284 NumberEqualsInt32(T aValue, int32_t* aInt32)
287 * XXX Casting a floating-point value that doesn't truncate to int32_t, to
288 * int32_t, induces undefined behavior. We should definitely fix this
289 * (bug 744965), but as apparently it "works" in practice, it's not a
290 * pressing concern now.
292 return aValue == (*aInt32 = int32_t(aValue));
296 * If d can be converted to int32_t and back to an identical double value,
297 * set *aInt32 to that value and return true; otherwise return false.
299 * The difference between this and NumberEqualsInt32 is that this method returns
300 * false for negative zero.
302 template<typename T>
303 static MOZ_ALWAYS_INLINE bool
304 NumberIsInt32(T aValue, int32_t* aInt32)
306 return !IsNegativeZero(aValue) && NumberEqualsInt32(aValue, aInt32);
310 * Computes a NaN value. Do not use this method if you depend upon a particular
311 * NaN value being returned.
313 template<typename T>
314 static MOZ_ALWAYS_INLINE T
315 UnspecifiedNaN()
318 * If we can use any quiet NaN, we might as well use the all-ones NaN,
319 * since it's cheap to materialize on common platforms (such as x64, where
320 * this value can be represented in a 32-bit signed immediate field, allowing
321 * it to be stored to memory in a single instruction).
323 typedef FloatingPoint<T> Traits;
324 return SpecificNaN<T>(1, Traits::kSignificandBits);
328 * Compare two doubles for equality, *without* equating -0 to +0, and equating
329 * any NaN value to any other NaN value. (The normal equality operators equate
330 * -0 with +0, and they equate NaN to no other value.)
332 template<typename T>
333 static inline bool
334 NumbersAreIdentical(T aValue1, T aValue2)
336 typedef FloatingPoint<T> Traits;
337 typedef typename Traits::Bits Bits;
338 if (IsNaN(aValue1)) {
339 return IsNaN(aValue2);
341 return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
344 namespace detail {
346 template<typename T>
347 struct FuzzyEqualsEpsilon;
349 template<>
350 struct FuzzyEqualsEpsilon<float>
352 // A number near 1e-5 that is exactly representable in a float.
353 static float value() { return 1.0f / (1 << 17); }
356 template<>
357 struct FuzzyEqualsEpsilon<double>
359 // A number near 1e-12 that is exactly representable in a double.
360 static double value() { return 1.0 / (1LL << 40); }
363 } // namespace detail
366 * Compare two floating point values for equality, modulo rounding error. That
367 * is, the two values are considered equal if they are both not NaN and if they
368 * are less than or equal to aEpsilon apart. The default value of aEpsilon is
369 * near 1e-5.
371 * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
372 * as it is more reasonable over the entire range of floating point numbers.
373 * This additive version should only be used if you know the range of the
374 * numbers you are dealing with is bounded and stays around the same order of
375 * magnitude.
377 template<typename T>
378 static MOZ_ALWAYS_INLINE bool
379 FuzzyEqualsAdditive(T aValue1, T aValue2,
380 T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
382 static_assert(IsFloatingPoint<T>::value, "floating point type required");
383 return Abs(aValue1 - aValue2) <= aEpsilon;
387 * Compare two floating point values for equality, allowing for rounding error
388 * relative to the magnitude of the values. That is, the two values are
389 * considered equal if they are both not NaN and they are less than or equal to
390 * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
391 * argument values.
393 * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
394 * this function effectively masks out differences in the bottom few bits of
395 * the floating point numbers being compared, regardless of what order of
396 * magnitude those numbers are at.
398 template<typename T>
399 static MOZ_ALWAYS_INLINE bool
400 FuzzyEqualsMultiplicative(T aValue1, T aValue2,
401 T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
403 static_assert(IsFloatingPoint<T>::value, "floating point type required");
405 // Short-circuit the common case in order to avoid the expensive operations
406 // below.
407 if (aValue1 == aValue2) {
408 return true;
411 // can't use std::min because of bug 965340
412 T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2);
413 return Abs(aValue1 - aValue2) <= aEpsilon * smaller;
417 * Returns true if the given value can be losslessly represented as an IEEE-754
418 * single format number, false otherwise. All NaN values are considered
419 * representable (notwithstanding that the exact bit pattern of a double format
420 * NaN value can't be exactly represented in single format).
422 * This function isn't inlined to avoid buggy optimizations by MSVC.
424 MOZ_WARN_UNUSED_RESULT
425 extern MFBT_API bool
426 IsFloat32Representable(double aFloat32);
428 } /* namespace mozilla */
430 #endif /* mozilla_FloatingPoint_h */