mfbt/FloatingPoint.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /* Various predicates and operations on IEEE-754 floating point types. */
   8
   9 #ifndef mozilla_FloatingPoint_h
  10 #define mozilla_FloatingPoint_h
  11
  12 #include "mozilla/Assertions.h"
  13 #include "mozilla/Attributes.h"
  14 #include "mozilla/Casting.h"
  15 #include "mozilla/MathAlgorithms.h"
  16 #include "mozilla/Types.h"
  17
  18 #include <stdint.h>
  19
  20 namespace mozilla {
  21
  22 /*
  23  * It's reasonable to ask why we have this header at all.  Don't isnan,
  24  * copysign, the built-in comparison operators, and the like solve these
  25  * problems?  Unfortunately, they don't.  We've found that various compilers
  26  * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
  27  * the standard methods in various situations, so we can't use them.  Some of
  28  * these compilers even have problems compiling seemingly reasonable bitwise
  29  * algorithms!  But with some care we've found algorithms that seem to not
  30  * trigger those compiler bugs.
  31  *
  32  * For the aforementioned reasons, be very wary of making changes to any of
  33  * these algorithms.  If you must make changes, keep a careful eye out for
  34  * compiler bustage, particularly PGO-specific bustage.
  35  */
  36
  37 struct FloatTypeTraits
  38 {
  39   typedef uint32_t Bits;
  40
  41   static const unsigned kExponentBias = 127;
  42   static const unsigned kExponentShift = 23;
  43
  44   static const Bits kSignBit         = 0x80000000UL;
  45   static const Bits kExponentBits    = 0x7F800000UL;
  46   static const Bits kSignificandBits = 0x007FFFFFUL;
  47 };
  48
  49 struct DoubleTypeTraits
  50 {
  51   typedef uint64_t Bits;
  52
  53   static const unsigned kExponentBias = 1023;
  54   static const unsigned kExponentShift = 52;
  55
  56   static const Bits kSignBit         = 0x8000000000000000ULL;
  57   static const Bits kExponentBits    = 0x7ff0000000000000ULL;
  58   static const Bits kSignificandBits = 0x000fffffffffffffULL;
  59 };
  60
  61 template<typename T> struct SelectTrait;
  62 template<> struct SelectTrait<float> : public FloatTypeTraits {};
  63 template<> struct SelectTrait<double> : public DoubleTypeTraits {};
  64
  65 /*
  66  *  This struct contains details regarding the encoding of floating-point
  67  *  numbers that can be useful for direct bit manipulation. As of now, the
  68  *  template parameter has to be float or double.
  69  *
  70  *  The nested typedef |Bits| is the unsigned integral type with the same size
  71  *  as T: uint32_t for float and uint64_t for double (static assertions
  72  *  double-check these assumptions).
  73  *
  74  *  kExponentBias is the offset that is subtracted from the exponent when
  75  *  computing the value, i.e. one plus the opposite of the mininum possible
  76  *  exponent.
  77  *  kExponentShift is the shift that one needs to apply to retrieve the
  78  *  exponent component of the value.
  79  *
  80  *  kSignBit contains a bits mask. Bit-and-ing with this mask will result in
  81  *  obtaining the sign bit.
  82  *  kExponentBits contains the mask needed for obtaining the exponent bits and
  83  *  kSignificandBits contains the mask needed for obtaining the significand
  84  *  bits.
  85  *
  86  *  Full details of how floating point number formats are encoded are beyond
  87  *  the scope of this comment. For more information, see
  88  *  http://en.wikipedia.org/wiki/IEEE_floating_point
  89  *  http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
  90  */
  91 template<typename T>
  92 struct FloatingPoint : public SelectTrait<T>
  93 {
  94   typedef SelectTrait<T> Base;
  95   typedef typename Base::Bits Bits;
  96
  97   static_assert((Base::kSignBit & Base::kExponentBits) == 0,
  98                 "sign bit shouldn't overlap exponent bits");
  99   static_assert((Base::kSignBit & Base::kSignificandBits) == 0,
 100                 "sign bit shouldn't overlap significand bits");
 101   static_assert((Base::kExponentBits & Base::kSignificandBits) == 0,
 102                 "exponent bits shouldn't overlap significand bits");
 103
 104   static_assert((Base::kSignBit | Base::kExponentBits | Base::kSignificandBits) ==
 105                 ~Bits(0),
 106                 "all bits accounted for");
 107
 108   /*
 109    * These implementations assume float/double are 32/64-bit single/double
 110    * format number types compatible with the IEEE-754 standard.  C++ don't
 111    * require this to be the case.  But we required this in implementations of
 112    * these algorithms that preceded this header, so we shouldn't break anything
 113    * if we keep doing so.
 114    */
 115   static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
 116 };
 117
 118 /** Determines whether a double is NaN. */
 119 template<typename T>
 120 static MOZ_ALWAYS_INLINE bool
 121 IsNaN(T aValue)
 122 {
 123   /*
 124    * A float/double is NaN if all exponent bits are 1 and the significand
 125    * contains at least one non-zero bit.
 126    */
 127   typedef FloatingPoint<T> Traits;
 128   typedef typename Traits::Bits Bits;
 129   Bits bits = BitwiseCast<Bits>(aValue);
 130   return (bits & Traits::kExponentBits) == Traits::kExponentBits &&
 131          (bits & Traits::kSignificandBits) != 0;
 132 }
 133
 134 /** Determines whether a float/double is +Infinity or -Infinity. */
 135 template<typename T>
 136 static MOZ_ALWAYS_INLINE bool
 137 IsInfinite(T aValue)
 138 {
 139   /* Infinities have all exponent bits set to 1 and an all-0 significand. */
 140   typedef FloatingPoint<T> Traits;
 141   typedef typename Traits::Bits Bits;
 142   Bits bits = BitwiseCast<Bits>(aValue);
 143   return (bits & ~Traits::kSignBit) == Traits::kExponentBits;
 144 }
 145
 146 /** Determines whether a float/double is not NaN or infinite. */
 147 template<typename T>
 148 static MOZ_ALWAYS_INLINE bool
 149 IsFinite(T aValue)
 150 {
 151   /*
 152    * NaN and Infinities are the only non-finite floats/doubles, and both have
 153    * all exponent bits set to 1.
 154    */
 155   typedef FloatingPoint<T> Traits;
 156   typedef typename Traits::Bits Bits;
 157   Bits bits = BitwiseCast<Bits>(aValue);
 158   return (bits & Traits::kExponentBits) != Traits::kExponentBits;
 159 }
 160
 161 /**
 162  * Determines whether a float/double is negative.  It is an error to call this
 163  * method on a float/double which is NaN.
 164  */
 165 template<typename T>
 166 static MOZ_ALWAYS_INLINE bool
 167 IsNegative(T aValue)
 168 {
 169   MOZ_ASSERT(!IsNaN(aValue), "NaN does not have a sign");
 170
 171   /* The sign bit is set if the double is negative. */
 172   typedef FloatingPoint<T> Traits;
 173   typedef typename Traits::Bits Bits;
 174   Bits bits = BitwiseCast<Bits>(aValue);
 175   return (bits & Traits::kSignBit) != 0;
 176 }
 177
 178 /** Determines whether a float/double represents -0. */
 179 template<typename T>
 180 static MOZ_ALWAYS_INLINE bool
 181 IsNegativeZero(T aValue)
 182 {
 183   /* Only the sign bit is set if the value is -0. */
 184   typedef FloatingPoint<T> Traits;
 185   typedef typename Traits::Bits Bits;
 186   Bits bits = BitwiseCast<Bits>(aValue);
 187   return bits == Traits::kSignBit;
 188 }
 189
 190 /**
 191  * Returns 0 if a float/double is NaN or infinite;
 192  * otherwise, the float/double is returned.
 193  */
 194 template<typename T>
 195 static MOZ_ALWAYS_INLINE T
 196 ToZeroIfNonfinite(T aValue)
 197 {
 198   return IsFinite(aValue) ? aValue : 0;
 199 }
 200
 201 /**
 202  * Returns the exponent portion of the float/double.
 203  *
 204  * Zero is not special-cased, so ExponentComponent(0.0) is
 205  * -int_fast16_t(Traits::kExponentBias).
 206  */
 207 template<typename T>
 208 static MOZ_ALWAYS_INLINE int_fast16_t
 209 ExponentComponent(T aValue)
 210 {
 211   /*
 212    * The exponent component of a float/double is an unsigned number, biased
 213    * from its actual value.  Subtract the bias to retrieve the actual exponent.
 214    */
 215   typedef FloatingPoint<T> Traits;
 216   typedef typename Traits::Bits Bits;
 217   Bits bits = BitwiseCast<Bits>(aValue);
 218   return int_fast16_t((bits & Traits::kExponentBits) >> Traits::kExponentShift) -
 219          int_fast16_t(Traits::kExponentBias);
 220 }
 221
 222 /** Returns +Infinity. */
 223 template<typename T>
 224 static MOZ_ALWAYS_INLINE T
 225 PositiveInfinity()
 226 {
 227   /*
 228    * Positive infinity has all exponent bits set, sign bit set to 0, and no
 229    * significand.
 230    */
 231   typedef FloatingPoint<T> Traits;
 232   return BitwiseCast<T>(Traits::kExponentBits);
 233 }
 234
 235 /** Returns -Infinity. */
 236 template<typename T>
 237 static MOZ_ALWAYS_INLINE T
 238 NegativeInfinity()
 239 {
 240   /*
 241    * Negative infinity has all exponent bits set, sign bit set to 1, and no
 242    * significand.
 243    */
 244   typedef FloatingPoint<T> Traits;
 245   return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits);
 246 }
 247
 248
 249 /** Constructs a NaN value with the specified sign bit and significand bits. */
 250 template<typename T>
 251 static MOZ_ALWAYS_INLINE T
 252 SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand)
 253 {
 254   typedef FloatingPoint<T> Traits;
 255   MOZ_ASSERT(signbit == 0 || signbit == 1);
 256   MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0);
 257   MOZ_ASSERT(significand & Traits::kSignificandBits);
 258
 259   T t = BitwiseCast<T>((signbit ? Traits::kSignBit : 0) |
 260                        Traits::kExponentBits |
 261                        significand);
 262   MOZ_ASSERT(IsNaN(t));
 263   return t;
 264 }
 265
 266 /** Computes the smallest non-zero positive float/double value. */
 267 template<typename T>
 268 static MOZ_ALWAYS_INLINE T
 269 MinNumberValue()
 270 {
 271   typedef FloatingPoint<T> Traits;
 272   typedef typename Traits::Bits Bits;
 273   return BitwiseCast<T>(Bits(1));
 274 }
 275
 276 /**
 277  * If aValue is equal to some int32_t value, set *aInt32 to that value and
 278  * return true; otherwise return false.
 279  *
 280  * Note that negative zero is "equal" to zero here. To test whether a value can
 281  * be losslessly converted to int32_t and back, use NumberIsInt32 instead.
 282  */
 283 template<typename T>
 284 static MOZ_ALWAYS_INLINE bool
 285 NumberEqualsInt32(T aValue, int32_t* aInt32)
 286 {
 287   /*
 288    * XXX Casting a floating-point value that doesn't truncate to int32_t, to
 289    *     int32_t, induces undefined behavior.  We should definitely fix this
 290    *     (bug 744965), but as apparently it "works" in practice, it's not a
 291    *     pressing concern now.
 292    */
 293   return aValue == (*aInt32 = int32_t(aValue));
 294 }
 295
 296 /**
 297  * If d can be converted to int32_t and back to an identical double value,
 298  * set *aInt32 to that value and return true; otherwise return false.
 299  *
 300  * The difference between this and NumberEqualsInt32 is that this method returns
 301  * false for negative zero.
 302  */
 303 template<typename T>
 304 static MOZ_ALWAYS_INLINE bool
 305 NumberIsInt32(T aValue, int32_t* aInt32)
 306 {
 307   return !IsNegativeZero(aValue) && NumberEqualsInt32(aValue, aInt32);
 308 }
 309
 310 /**
 311  * Computes a NaN value.  Do not use this method if you depend upon a particular
 312  * NaN value being returned.
 313  */
 314 template<typename T>
 315 static MOZ_ALWAYS_INLINE T
 316 UnspecifiedNaN()
 317 {
 318   /*
 319    * If we can use any quiet NaN, we might as well use the all-ones NaN,
 320    * since it's cheap to materialize on common platforms (such as x64, where
 321    * this value can be represented in a 32-bit signed immediate field, allowing
 322    * it to be stored to memory in a single instruction).
 323    */
 324   typedef FloatingPoint<T> Traits;
 325   return SpecificNaN<T>(1, Traits::kSignificandBits);
 326 }
 327
 328 /**
 329  * Compare two doubles for equality, *without* equating -0 to +0, and equating
 330  * any NaN value to any other NaN value.  (The normal equality operators equate
 331  * -0 with +0, and they equate NaN to no other value.)
 332  */
 333 template<typename T>
 334 static inline bool
 335 NumbersAreIdentical(T aValue1, T aValue2)
 336 {
 337   typedef FloatingPoint<T> Traits;
 338   typedef typename Traits::Bits Bits;
 339   if (IsNaN(aValue1)) {
 340     return IsNaN(aValue2);
 341   }
 342   return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
 343 }
 344
 345 namespace detail {
 346
 347 template<typename T>
 348 struct FuzzyEqualsEpsilon;
 349
 350 template<>
 351 struct FuzzyEqualsEpsilon<float>
 352 {
 353   // A number near 1e-5 that is exactly representable in a float.
 354   static float value() { return 1.0f / (1 << 17); }
 355 };
 356
 357 template<>
 358 struct FuzzyEqualsEpsilon<double>
 359 {
 360   // A number near 1e-12 that is exactly representable in a double.
 361   static double value() { return 1.0 / (1LL << 40); }
 362 };
 363
 364 } // namespace detail
 365
 366 /**
 367  * Compare two floating point values for equality, modulo rounding error. That
 368  * is, the two values are considered equal if they are both not NaN and if they
 369  * are less than or equal to aEpsilon apart. The default value of aEpsilon is
 370  * near 1e-5.
 371  *
 372  * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
 373  * as it is more reasonable over the entire range of floating point numbers.
 374  * This additive version should only be used if you know the range of the
 375  * numbers you are dealing with is bounded and stays around the same order of
 376  * magnitude.
 377  */
 378 template<typename T>
 379 static MOZ_ALWAYS_INLINE bool
 380 FuzzyEqualsAdditive(T aValue1, T aValue2,
 381                     T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
 382 {
 383   static_assert(IsFloatingPoint<T>::value, "floating point type required");
 384   return Abs(aValue1 - aValue2) <= aEpsilon;
 385 }
 386
 387 /**
 388  * Compare two floating point values for equality, allowing for rounding error
 389  * relative to the magnitude of the values. That is, the two values are
 390  * considered equal if they are both not NaN and they are less than or equal to
 391  * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
 392  * argument values.
 393  *
 394  * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
 395  * this function effectively masks out differences in the bottom few bits of
 396  * the floating point numbers being compared, regardless of what order of
 397  * magnitude those numbers are at.
 398  */
 399 template<typename T>
 400 static MOZ_ALWAYS_INLINE bool
 401 FuzzyEqualsMultiplicative(T aValue1, T aValue2,
 402                           T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
 403 {
 404   static_assert(IsFloatingPoint<T>::value, "floating point type required");
 405   // can't use std::min because of bug 965340
 406   T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2);
 407   return Abs(aValue1 - aValue2) <= aEpsilon * smaller;
 408 }
 409
 410 /**
 411  * Returns true if the given value can be losslessly represented as an IEEE-754
 412  * single format number, false otherwise.  All NaN values are considered
 413  * representable (notwithstanding that the exact bit pattern of a double format
 414  * NaN value can't be exactly represented in single format).
 415  *
 416  * This function isn't inlined to avoid buggy optimizations by MSVC.
 417  */
 418 MOZ_WARN_UNUSED_RESULT
 419 extern MFBT_API bool
 420 IsFloat32Representable(double aFloat32);
 421
 422 } /* namespace mozilla */
 423
 424 #endif /* mozilla_FloatingPoint_h */