mfbt/FloatingPoint.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /* Various predicates and operations on IEEE-754 floating point types. */
   8
   9 #ifndef mozilla_FloatingPoint_h
  10 #define mozilla_FloatingPoint_h
  11
  12 #include "mozilla/Assertions.h"
  13 #include "mozilla/Attributes.h"
  14 #include "mozilla/Casting.h"
  15 #include "mozilla/MathAlgorithms.h"
  16 #include "mozilla/Types.h"
  17
  18 #include <stdint.h>
  19
  20 namespace mozilla {
  21
  22 /*
  23  * It's reasonable to ask why we have this header at all.  Don't isnan,
  24  * copysign, the built-in comparison operators, and the like solve these
  25  * problems?  Unfortunately, they don't.  We've found that various compilers
  26  * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
  27  * the standard methods in various situations, so we can't use them.  Some of
  28  * these compilers even have problems compiling seemingly reasonable bitwise
  29  * algorithms!  But with some care we've found algorithms that seem to not
  30  * trigger those compiler bugs.
  31  *
  32  * For the aforementioned reasons, be very wary of making changes to any of
  33  * these algorithms.  If you must make changes, keep a careful eye out for
  34  * compiler bustage, particularly PGO-specific bustage.
  35  */
  36
  37 struct FloatTypeTraits
  38 {
  39   typedef uint32_t Bits;
  40
  41   static const unsigned kExponentBias = 127;
  42   static const unsigned kExponentShift = 23;
  43
  44   static const Bits kSignBit         = 0x80000000UL;
  45   static const Bits kExponentBits    = 0x7F800000UL;
  46   static const Bits kSignificandBits = 0x007FFFFFUL;
  47 };
  48
  49 struct DoubleTypeTraits
  50 {
  51   typedef uint64_t Bits;
  52
  53   static const unsigned kExponentBias = 1023;
  54   static const unsigned kExponentShift = 52;
  55
  56   static const Bits kSignBit         = 0x8000000000000000ULL;
  57   static const Bits kExponentBits    = 0x7ff0000000000000ULL;
  58   static const Bits kSignificandBits = 0x000fffffffffffffULL;
  59 };
  60
  61 template<typename T> struct SelectTrait;
  62 template<> struct SelectTrait<float> : public FloatTypeTraits {};
  63 template<> struct SelectTrait<double> : public DoubleTypeTraits {};
  64
  65 /*
  66  *  This struct contains details regarding the encoding of floating-point
  67  *  numbers that can be useful for direct bit manipulation. As of now, the
  68  *  template parameter has to be float or double.
  69  *
  70  *  The nested typedef |Bits| is the unsigned integral type with the same size
  71  *  as T: uint32_t for float and uint64_t for double (static assertions
  72  *  double-check these assumptions).
  73  *
  74  *  kExponentBias is the offset that is subtracted from the exponent when
  75  *  computing the value, i.e. one plus the opposite of the mininum possible
  76  *  exponent.
  77  *  kExponentShift is the shift that one needs to apply to retrieve the
  78  *  exponent component of the value.
  79  *
  80  *  kSignBit contains a bits mask. Bit-and-ing with this mask will result in
  81  *  obtaining the sign bit.
  82  *  kExponentBits contains the mask needed for obtaining the exponent bits and
  83  *  kSignificandBits contains the mask needed for obtaining the significand
  84  *  bits.
  85  *
  86  *  Full details of how floating point number formats are encoded are beyond
  87  *  the scope of this comment. For more information, see
  88  *  http://en.wikipedia.org/wiki/IEEE_floating_point
  89  *  http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
  90  */
  91 template<typename T>
  92 struct FloatingPoint : public SelectTrait<T>
  93 {
  94   typedef SelectTrait<T> Base;
  95   typedef typename Base::Bits Bits;
  96
  97   static_assert((Base::kSignBit & Base::kExponentBits) == 0,
  98                 "sign bit shouldn't overlap exponent bits");
  99   static_assert((Base::kSignBit & Base::kSignificandBits) == 0,
 100                 "sign bit shouldn't overlap significand bits");
 101   static_assert((Base::kExponentBits & Base::kSignificandBits) == 0,
 102                 "exponent bits shouldn't overlap significand bits");
 103
 104   static_assert((Base::kSignBit | Base::kExponentBits | Base::kSignificandBits) ==
 105                 ~Bits(0),
 106                 "all bits accounted for");
 107
 108   /*
 109    * These implementations assume float/double are 32/64-bit single/double
 110    * format number types compatible with the IEEE-754 standard.  C++ don't
 111    * require this to be the case.  But we required this in implementations of
 112    * these algorithms that preceded this header, so we shouldn't break anything
 113    * if we keep doing so.
 114    */
 115   static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
 116 };
 117
 118 /** Determines whether a float/double is NaN. */
 119 template<typename T>
 120 static MOZ_ALWAYS_INLINE MOZ_CONSTEXPR bool
 121 IsNaN(T aValue)
 122 {
 123   /*
 124    * A float/double is NaN if all exponent bits are 1 and the significand
 125    * contains at least one non-zero bit.
 126    */
 127   typedef FloatingPoint<T> Traits;
 128   typedef typename Traits::Bits Bits;
 129   return (BitwiseCast<Bits>(aValue) & Traits::kExponentBits) == Traits::kExponentBits &&
 130          (BitwiseCast<Bits>(aValue) & Traits::kSignificandBits) != 0;
 131 }
 132
 133 /** Determines whether a float/double is +Infinity or -Infinity. */
 134 template<typename T>
 135 static MOZ_ALWAYS_INLINE bool
 136 IsInfinite(T aValue)
 137 {
 138   /* Infinities have all exponent bits set to 1 and an all-0 significand. */
 139   typedef FloatingPoint<T> Traits;
 140   typedef typename Traits::Bits Bits;
 141   Bits bits = BitwiseCast<Bits>(aValue);
 142   return (bits & ~Traits::kSignBit) == Traits::kExponentBits;
 143 }
 144
 145 /** Determines whether a float/double is not NaN or infinite. */
 146 template<typename T>
 147 static MOZ_ALWAYS_INLINE bool
 148 IsFinite(T aValue)
 149 {
 150   /*
 151    * NaN and Infinities are the only non-finite floats/doubles, and both have
 152    * all exponent bits set to 1.
 153    */
 154   typedef FloatingPoint<T> Traits;
 155   typedef typename Traits::Bits Bits;
 156   Bits bits = BitwiseCast<Bits>(aValue);
 157   return (bits & Traits::kExponentBits) != Traits::kExponentBits;
 158 }
 159
 160 /**
 161  * Determines whether a float/double is negative or -0.  It is an error
 162  * to call this method on a float/double which is NaN.
 163  */
 164 template<typename T>
 165 static MOZ_ALWAYS_INLINE bool
 166 IsNegative(T aValue)
 167 {
 168   MOZ_ASSERT(!IsNaN(aValue), "NaN does not have a sign");
 169
 170   /* The sign bit is set if the double is negative. */
 171   typedef FloatingPoint<T> Traits;
 172   typedef typename Traits::Bits Bits;
 173   Bits bits = BitwiseCast<Bits>(aValue);
 174   return (bits & Traits::kSignBit) != 0;
 175 }
 176
 177 /** Determines whether a float/double represents -0. */
 178 template<typename T>
 179 static MOZ_ALWAYS_INLINE bool
 180 IsNegativeZero(T aValue)
 181 {
 182   /* Only the sign bit is set if the value is -0. */
 183   typedef FloatingPoint<T> Traits;
 184   typedef typename Traits::Bits Bits;
 185   Bits bits = BitwiseCast<Bits>(aValue);
 186   return bits == Traits::kSignBit;
 187 }
 188
 189 /**
 190  * Returns 0 if a float/double is NaN or infinite;
 191  * otherwise, the float/double is returned.
 192  */
 193 template<typename T>
 194 static MOZ_ALWAYS_INLINE T
 195 ToZeroIfNonfinite(T aValue)
 196 {
 197   return IsFinite(aValue) ? aValue : 0;
 198 }
 199
 200 /**
 201  * Returns the exponent portion of the float/double.
 202  *
 203  * Zero is not special-cased, so ExponentComponent(0.0) is
 204  * -int_fast16_t(Traits::kExponentBias).
 205  */
 206 template<typename T>
 207 static MOZ_ALWAYS_INLINE int_fast16_t
 208 ExponentComponent(T aValue)
 209 {
 210   /*
 211    * The exponent component of a float/double is an unsigned number, biased
 212    * from its actual value.  Subtract the bias to retrieve the actual exponent.
 213    */
 214   typedef FloatingPoint<T> Traits;
 215   typedef typename Traits::Bits Bits;
 216   Bits bits = BitwiseCast<Bits>(aValue);
 217   return int_fast16_t((bits & Traits::kExponentBits) >> Traits::kExponentShift) -
 218          int_fast16_t(Traits::kExponentBias);
 219 }
 220
 221 /** Returns +Infinity. */
 222 template<typename T>
 223 static MOZ_ALWAYS_INLINE T
 224 PositiveInfinity()
 225 {
 226   /*
 227    * Positive infinity has all exponent bits set, sign bit set to 0, and no
 228    * significand.
 229    */
 230   typedef FloatingPoint<T> Traits;
 231   return BitwiseCast<T>(Traits::kExponentBits);
 232 }
 233
 234 /** Returns -Infinity. */
 235 template<typename T>
 236 static MOZ_ALWAYS_INLINE T
 237 NegativeInfinity()
 238 {
 239   /*
 240    * Negative infinity has all exponent bits set, sign bit set to 1, and no
 241    * significand.
 242    */
 243   typedef FloatingPoint<T> Traits;
 244   return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits);
 245 }
 246
 247
 248 /** Constructs a NaN value with the specified sign bit and significand bits. */
 249 template<typename T>
 250 static MOZ_ALWAYS_INLINE T
 251 SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand)
 252 {
 253   typedef FloatingPoint<T> Traits;
 254   MOZ_ASSERT(signbit == 0 || signbit == 1);
 255   MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0);
 256   MOZ_ASSERT(significand & Traits::kSignificandBits);
 257
 258   T t = BitwiseCast<T>((signbit ? Traits::kSignBit : 0) |
 259                        Traits::kExponentBits |
 260                        significand);
 261   MOZ_ASSERT(IsNaN(t));
 262   return t;
 263 }
 264
 265 /** Computes the smallest non-zero positive float/double value. */
 266 template<typename T>
 267 static MOZ_ALWAYS_INLINE T
 268 MinNumberValue()
 269 {
 270   typedef FloatingPoint<T> Traits;
 271   typedef typename Traits::Bits Bits;
 272   return BitwiseCast<T>(Bits(1));
 273 }
 274
 275 /**
 276  * If aValue is equal to some int32_t value, set *aInt32 to that value and
 277  * return true; otherwise return false.
 278  *
 279  * Note that negative zero is "equal" to zero here. To test whether a value can
 280  * be losslessly converted to int32_t and back, use NumberIsInt32 instead.
 281  */
 282 template<typename T>
 283 static MOZ_ALWAYS_INLINE bool
 284 NumberEqualsInt32(T aValue, int32_t* aInt32)
 285 {
 286   /*
 287    * XXX Casting a floating-point value that doesn't truncate to int32_t, to
 288    *     int32_t, induces undefined behavior.  We should definitely fix this
 289    *     (bug 744965), but as apparently it "works" in practice, it's not a
 290    *     pressing concern now.
 291    */
 292   return aValue == (*aInt32 = int32_t(aValue));
 293 }
 294
 295 /**
 296  * If d can be converted to int32_t and back to an identical double value,
 297  * set *aInt32 to that value and return true; otherwise return false.
 298  *
 299  * The difference between this and NumberEqualsInt32 is that this method returns
 300  * false for negative zero.
 301  */
 302 template<typename T>
 303 static MOZ_ALWAYS_INLINE bool
 304 NumberIsInt32(T aValue, int32_t* aInt32)
 305 {
 306   return !IsNegativeZero(aValue) && NumberEqualsInt32(aValue, aInt32);
 307 }
 308
 309 /**
 310  * Computes a NaN value.  Do not use this method if you depend upon a particular
 311  * NaN value being returned.
 312  */
 313 template<typename T>
 314 static MOZ_ALWAYS_INLINE T
 315 UnspecifiedNaN()
 316 {
 317   /*
 318    * If we can use any quiet NaN, we might as well use the all-ones NaN,
 319    * since it's cheap to materialize on common platforms (such as x64, where
 320    * this value can be represented in a 32-bit signed immediate field, allowing
 321    * it to be stored to memory in a single instruction).
 322    */
 323   typedef FloatingPoint<T> Traits;
 324   return SpecificNaN<T>(1, Traits::kSignificandBits);
 325 }
 326
 327 /**
 328  * Compare two doubles for equality, *without* equating -0 to +0, and equating
 329  * any NaN value to any other NaN value.  (The normal equality operators equate
 330  * -0 with +0, and they equate NaN to no other value.)
 331  */
 332 template<typename T>
 333 static inline bool
 334 NumbersAreIdentical(T aValue1, T aValue2)
 335 {
 336   typedef FloatingPoint<T> Traits;
 337   typedef typename Traits::Bits Bits;
 338   if (IsNaN(aValue1)) {
 339     return IsNaN(aValue2);
 340   }
 341   return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
 342 }
 343
 344 namespace detail {
 345
 346 template<typename T>
 347 struct FuzzyEqualsEpsilon;
 348
 349 template<>
 350 struct FuzzyEqualsEpsilon<float>
 351 {
 352   // A number near 1e-5 that is exactly representable in a float.
 353   static float value() { return 1.0f / (1 << 17); }
 354 };
 355
 356 template<>
 357 struct FuzzyEqualsEpsilon<double>
 358 {
 359   // A number near 1e-12 that is exactly representable in a double.
 360   static double value() { return 1.0 / (1LL << 40); }
 361 };
 362
 363 } // namespace detail
 364
 365 /**
 366  * Compare two floating point values for equality, modulo rounding error. That
 367  * is, the two values are considered equal if they are both not NaN and if they
 368  * are less than or equal to aEpsilon apart. The default value of aEpsilon is
 369  * near 1e-5.
 370  *
 371  * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
 372  * as it is more reasonable over the entire range of floating point numbers.
 373  * This additive version should only be used if you know the range of the
 374  * numbers you are dealing with is bounded and stays around the same order of
 375  * magnitude.
 376  */
 377 template<typename T>
 378 static MOZ_ALWAYS_INLINE bool
 379 FuzzyEqualsAdditive(T aValue1, T aValue2,
 380                     T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
 381 {
 382   static_assert(IsFloatingPoint<T>::value, "floating point type required");
 383   return Abs(aValue1 - aValue2) <= aEpsilon;
 384 }
 385
 386 /**
 387  * Compare two floating point values for equality, allowing for rounding error
 388  * relative to the magnitude of the values. That is, the two values are
 389  * considered equal if they are both not NaN and they are less than or equal to
 390  * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
 391  * argument values.
 392  *
 393  * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
 394  * this function effectively masks out differences in the bottom few bits of
 395  * the floating point numbers being compared, regardless of what order of
 396  * magnitude those numbers are at.
 397  */
 398 template<typename T>
 399 static MOZ_ALWAYS_INLINE bool
 400 FuzzyEqualsMultiplicative(T aValue1, T aValue2,
 401                           T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
 402 {
 403   static_assert(IsFloatingPoint<T>::value, "floating point type required");
 404
 405   // Short-circuit the common case in order to avoid the expensive operations
 406   // below.
 407   if (aValue1 == aValue2) {
 408     return true;
 409   }
 410
 411   // can't use std::min because of bug 965340
 412   T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2);
 413   return Abs(aValue1 - aValue2) <= aEpsilon * smaller;
 414 }
 415
 416 /**
 417  * Returns true if the given value can be losslessly represented as an IEEE-754
 418  * single format number, false otherwise.  All NaN values are considered
 419  * representable (notwithstanding that the exact bit pattern of a double format
 420  * NaN value can't be exactly represented in single format).
 421  *
 422  * This function isn't inlined to avoid buggy optimizations by MSVC.
 423  */
 424 MOZ_WARN_UNUSED_RESULT
 425 extern MFBT_API bool
 426 IsFloat32Representable(double aFloat32);
 427
 428 } /* namespace mozilla */
 429
 430 #endif /* mozilla_FloatingPoint_h */