mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Assertions.h"
  20 #include "mozilla/Attributes.h"
  21 #include "mozilla/Compiler.h"
  22
  23 #include <atomic>
  24
  25 #include <stdint.h>
  26 #include <type_traits>
  27
  28 namespace mozilla {
  29
  30 /**
  31  * An enum of memory ordering possibilities for atomics.
  32  *
  33  * Memory ordering is the observable state of distinct values in memory.
  34  * (It's a separate concept from atomicity, which concerns whether an
  35  * operation can ever be observed in an intermediate state.  Don't
  36  * conflate the two!)  Given a sequence of operations in source code on
  37  * memory, it is *not* always the case that, at all times and on all
  38  * cores, those operations will appear to have occurred in that exact
  39  * sequence.  First, the compiler might reorder that sequence, if it
  40  * thinks another ordering will be more efficient.  Second, the CPU may
  41  * not expose so consistent a view of memory.  CPUs will often perform
  42  * their own instruction reordering, above and beyond that performed by
  43  * the compiler.  And each core has its own memory caches, and accesses
  44  * (reads and writes both) to "memory" may only resolve to out-of-date
  45  * cache entries -- not to the "most recently" performed operation in
  46  * some global sense.  Any access to a value that may be used by
  47  * multiple threads, potentially across multiple cores, must therefore
  48  * have a memory ordering imposed on it, for all code on all
  49  * threads/cores to have a sufficiently coherent worldview.
  50  *
  51  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  52  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  53  * detail on all this, including examples of how each mode works.
  54  *
  55  * Note that for simplicity and practicality, not all of the modes in
  56  * C++11 are supported.  The missing C++11 modes are either subsumed by
  57  * the modes we provide below, or not relevant for the CPUs we support
  58  * in Gecko.  These three modes are confusing enough as it is!
  59  */
  60 enum MemoryOrdering {
  61   /*
  62    * Relaxed ordering is the simplest memory ordering: none at all.
  63    * When the result of a write is observed, nothing may be inferred
  64    * about other memory.  Writes ostensibly performed "before" on the
  65    * writing thread may not yet be visible.  Writes performed "after" on
  66    * the writing thread may already be visible, if the compiler or CPU
  67    * reordered them.  (The latter can happen if reads and/or writes get
  68    * held up in per-processor caches.)  Relaxed ordering means
  69    * operations can always use cached values (as long as the actual
  70    * updates to atomic values actually occur, correctly, eventually), so
  71    * it's usually the fastest sort of atomic access.  For this reason,
  72    * *it's also the most dangerous kind of access*.
  73    *
  74    * Relaxed ordering is good for things like process-wide statistics
  75    * counters that don't need to be consistent with anything else, so
  76    * long as updates themselves are atomic.  (And so long as any
  77    * observations of that value can tolerate being out-of-date -- if you
  78    * need some sort of up-to-date value, you need some sort of other
  79    * synchronizing operation.)  It's *not* good for locks, mutexes,
  80    * reference counts, etc. that mediate access to other memory, or must
  81    * be observably consistent with other memory.
  82    *
  83    * x86 architectures don't take advantage of the optimization
  84    * opportunities that relaxed ordering permits.  Thus it's possible
  85    * that using relaxed ordering will "work" on x86 but fail elsewhere
  86    * (ARM, say, which *does* implement non-sequentially-consistent
  87    * relaxed ordering semantics).  Be extra-careful using relaxed
  88    * ordering if you can't easily test non-x86 architectures!
  89    */
  90   Relaxed,
  91
  92   /*
  93    * When an atomic value is updated with ReleaseAcquire ordering, and
  94    * that new value is observed with ReleaseAcquire ordering, prior
  95    * writes (atomic or not) are also observable.  What ReleaseAcquire
  96    * *doesn't* give you is any observable ordering guarantees for
  97    * ReleaseAcquire-ordered operations on different objects.  For
  98    * example, if there are two cores that each perform ReleaseAcquire
  99    * operations on separate objects, each core may or may not observe
 100    * the operations made by the other core.  The only way the cores can
 101    * be synchronized with ReleaseAcquire is if they both
 102    * ReleaseAcquire-access the same object.  This implies that you can't
 103    * necessarily describe some global total ordering of ReleaseAcquire
 104    * operations.
 105    *
 106    * ReleaseAcquire ordering is good for (as the name implies) atomic
 107    * operations on values controlling ownership of things: reference
 108    * counts, mutexes, and the like.  However, if you are thinking about
 109    * using these to implement your own locks or mutexes, you should take
 110    * a good, hard look at actual lock or mutex primitives first.
 111    */
 112   ReleaseAcquire,
 113
 114   /*
 115    * When an atomic value is updated with SequentiallyConsistent
 116    * ordering, all writes observable when the update is observed, just
 117    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 118    * ordering of SequentiallyConsistent operations *can* be described.
 119    * For example, if two cores perform SequentiallyConsistent operations
 120    * on separate objects, one core will observably perform its update
 121    * (and all previous operations will have completed), then the other
 122    * core will observably perform its update (and all previous
 123    * operations will have completed).  (Although those previous
 124    * operations aren't themselves ordered -- they could be intermixed,
 125    * or ordered if they occur on atomic values with ordering
 126    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 127    * ordering of atomic operations -- it's always as if one operation
 128    * happens, then another, then another, in some order -- and every
 129    * core observes updates to happen in that single order.  Because it
 130    * has the most synchronization requirements, operations ordered this
 131    * way also tend to be slowest.
 132    *
 133    * SequentiallyConsistent ordering can be desirable when multiple
 134    * threads observe objects, and they all have to agree on the
 135    * observable order of changes to them.  People expect
 136    * SequentiallyConsistent ordering, even if they shouldn't, when
 137    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 138    * the ordering of choice when designing lockless data structures.  If
 139    * you don't know what order to use, use this one.
 140    */
 141   SequentiallyConsistent,
 142 };
 143
 144 namespace detail {
 145
 146 /*
 147  * We provide CompareExchangeFailureOrder to work around a bug in some
 148  * versions of GCC's <atomic> header.  See bug 898491.
 149  */
 150 template <MemoryOrdering Order>
 151 struct AtomicOrderConstraints;
 152
 153 template <>
 154 struct AtomicOrderConstraints<Relaxed> {
 155   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 156   static const std::memory_order LoadOrder = std::memory_order_relaxed;
 157   static const std::memory_order StoreOrder = std::memory_order_relaxed;
 158   static const std::memory_order CompareExchangeFailureOrder =
 159       std::memory_order_relaxed;
 160 };
 161
 162 template <>
 163 struct AtomicOrderConstraints<ReleaseAcquire> {
 164   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 165   static const std::memory_order LoadOrder = std::memory_order_acquire;
 166   static const std::memory_order StoreOrder = std::memory_order_release;
 167   static const std::memory_order CompareExchangeFailureOrder =
 168       std::memory_order_acquire;
 169 };
 170
 171 template <>
 172 struct AtomicOrderConstraints<SequentiallyConsistent> {
 173   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 174   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 175   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 176   static const std::memory_order CompareExchangeFailureOrder =
 177       std::memory_order_seq_cst;
 178 };
 179
 180 template <typename T, MemoryOrdering Order>
 181 struct IntrinsicBase {
 182   typedef std::atomic<T> ValueType;
 183   typedef AtomicOrderConstraints<Order> OrderedOp;
 184 };
 185
 186 template <typename T, MemoryOrdering Order>
 187 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order> {
 188   typedef IntrinsicBase<T, Order> Base;
 189
 190   static T load(const typename Base::ValueType& aPtr) {
 191     return aPtr.load(Base::OrderedOp::LoadOrder);
 192   }
 193
 194   static void store(typename Base::ValueType& aPtr, T aVal) {
 195     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
 196   }
 197
 198   static T exchange(typename Base::ValueType& aPtr, T aVal) {
 199     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
 200   }
 201
 202   static bool compareExchange(typename Base::ValueType& aPtr, T aOldVal,
 203                               T aNewVal) {
 204     return aPtr.compare_exchange_strong(
 205         aOldVal, aNewVal, Base::OrderedOp::AtomicRMWOrder,
 206         Base::OrderedOp::CompareExchangeFailureOrder);
 207   }
 208 };
 209
 210 template <typename T, MemoryOrdering Order>
 211 struct IntrinsicAddSub : public IntrinsicBase<T, Order> {
 212   typedef IntrinsicBase<T, Order> Base;
 213
 214   static T add(typename Base::ValueType& aPtr, T aVal) {
 215     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 216   }
 217
 218   static T sub(typename Base::ValueType& aPtr, T aVal) {
 219     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 220   }
 221 };
 222
 223 template <typename T, MemoryOrdering Order>
 224 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order> {
 225   typedef IntrinsicBase<T*, Order> Base;
 226
 227   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
 228     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 229   }
 230
 231   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
 232     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 233   }
 234 };
 235
 236 template <typename T, MemoryOrdering Order>
 237 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order> {
 238   typedef IntrinsicBase<T, Order> Base;
 239
 240   static T inc(typename Base::ValueType& aPtr) {
 241     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
 242   }
 243
 244   static T dec(typename Base::ValueType& aPtr) {
 245     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
 246   }
 247 };
 248
 249 template <typename T, MemoryOrdering Order>
 250 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 251                           public IntrinsicIncDec<T, Order> {
 252   typedef IntrinsicBase<T, Order> Base;
 253
 254   static T or_(typename Base::ValueType& aPtr, T aVal) {
 255     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
 256   }
 257
 258   static T xor_(typename Base::ValueType& aPtr, T aVal) {
 259     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
 260   }
 261
 262   static T and_(typename Base::ValueType& aPtr, T aVal) {
 263     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
 264   }
 265 };
 266
 267 template <typename T, MemoryOrdering Order>
 268 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 269                                      public IntrinsicIncDec<T*, Order> {};
 270
 271 template <typename T>
 272 struct ToStorageTypeArgument {
 273   static constexpr T convert(T aT) { return aT; }
 274 };
 275
 276 template <typename T, MemoryOrdering Order>
 277 class AtomicBase {
 278   static_assert(sizeof(T) == 4 || sizeof(T) == 8,
 279                 "mozilla/Atomics.h only supports 32-bit and 64-bit types");
 280
 281  protected:
 282   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 283   typedef typename Intrinsics::ValueType ValueType;
 284   ValueType mValue;
 285
 286  public:
 287   constexpr AtomicBase() : mValue() {}
 288   explicit constexpr AtomicBase(T aInit)
 289       : mValue(ToStorageTypeArgument<T>::convert(aInit)) {}
 290
 291   // Note: we can't provide operator T() here because Atomic<bool> inherits
 292   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
 293   // operator T() here, it would cause errors when comparing Atomic<bool> with
 294   // a regular bool.
 295
 296   T operator=(T aVal) {
 297     Intrinsics::store(mValue, aVal);
 298     return aVal;
 299   }
 300
 301   /**
 302    * Performs an atomic swap operation.  aVal is stored and the previous
 303    * value of this variable is returned.
 304    */
 305   T exchange(T aVal) { return Intrinsics::exchange(mValue, aVal); }
 306
 307   /**
 308    * Performs an atomic compare-and-swap operation and returns true if it
 309    * succeeded. This is equivalent to atomically doing
 310    *
 311    *   if (mValue == aOldValue) {
 312    *     mValue = aNewValue;
 313    *     return true;
 314    *   } else {
 315    *     return false;
 316    *   }
 317    */
 318   bool compareExchange(T aOldValue, T aNewValue) {
 319     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 320   }
 321
 322  private:
 323   AtomicBase(const AtomicBase& aCopy) = delete;
 324 };
 325
 326 template <typename T, MemoryOrdering Order>
 327 class AtomicBaseIncDec : public AtomicBase<T, Order> {
 328   typedef typename detail::AtomicBase<T, Order> Base;
 329
 330  public:
 331   constexpr AtomicBaseIncDec() : Base() {}
 332   explicit constexpr AtomicBaseIncDec(T aInit) : Base(aInit) {}
 333
 334   using Base::operator=;
 335
 336   operator T() const { return Base::Intrinsics::load(Base::mValue); }
 337   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 338   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 339   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 340   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 341
 342  private:
 343   AtomicBaseIncDec(const AtomicBaseIncDec& aCopy) = delete;
 344 };
 345
 346 }  // namespace detail
 347
 348 /**
 349  * A wrapper for a type that enforces that all memory accesses are atomic.
 350  *
 351  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
 352  * its place.  Implementations for integral and pointer types are provided
 353  * below.
 354  *
 355  * Atomic accesses are sequentially consistent by default.  You should
 356  * use the default unless you are tall enough to ride the
 357  * memory-ordering roller coaster (if you're not sure, you aren't) and
 358  * you have a compelling reason to do otherwise.
 359  *
 360  * There is one exception to the case of atomic memory accesses: providing an
 361  * initial value of the atomic value is not guaranteed to be atomic.  This is a
 362  * deliberate design choice that enables static atomic variables to be declared
 363  * without introducing extra static constructors.
 364  */
 365 template <typename T, MemoryOrdering Order = SequentiallyConsistent,
 366           typename Enable = void>
 367 class Atomic;
 368
 369 /**
 370  * Atomic<T> implementation for integral types.
 371  *
 372  * In addition to atomic store and load operations, compound assignment and
 373  * increment/decrement operators are implemented which perform the
 374  * corresponding read-modify-write operation atomically.  Finally, an atomic
 375  * swap method is provided.
 376  */
 377 template <typename T, MemoryOrdering Order>
 378 class Atomic<
 379     T, Order,
 380     std::enable_if_t<std::is_integral_v<T> && !std::is_same_v<T, bool>>>
 381     : public detail::AtomicBaseIncDec<T, Order> {
 382   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
 383
 384  public:
 385   constexpr Atomic() : Base() {}
 386   explicit constexpr Atomic(T aInit) : Base(aInit) {}
 387
 388   using Base::operator=;
 389
 390   T operator+=(T aDelta) {
 391     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
 392   }
 393
 394   T operator-=(T aDelta) {
 395     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
 396   }
 397
 398   T operator|=(T aVal) {
 399     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
 400   }
 401
 402   T operator^=(T aVal) {
 403     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
 404   }
 405
 406   T operator&=(T aVal) {
 407     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
 408   }
 409
 410  private:
 411   Atomic(Atomic& aOther) = delete;
 412 };
 413
 414 /**
 415  * Atomic<T> implementation for pointer types.
 416  *
 417  * An atomic compare-and-swap primitive for pointer variables is provided, as
 418  * are atomic increment and decement operators.  Also provided are the compound
 419  * assignment operators for addition and subtraction. Atomic swap (via
 420  * exchange()) is included as well.
 421  */
 422 template <typename T, MemoryOrdering Order>
 423 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order> {
 424   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
 425
 426  public:
 427   constexpr Atomic() : Base() {}
 428   explicit constexpr Atomic(T* aInit) : Base(aInit) {}
 429
 430   using Base::operator=;
 431
 432   T* operator+=(ptrdiff_t aDelta) {
 433     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
 434   }
 435
 436   T* operator-=(ptrdiff_t aDelta) {
 437     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
 438   }
 439
 440  private:
 441   Atomic(Atomic& aOther) = delete;
 442 };
 443
 444 /**
 445  * Atomic<T> implementation for enum types.
 446  *
 447  * The atomic store and load operations and the atomic swap method is provided.
 448  */
 449 template <typename T, MemoryOrdering Order>
 450 class Atomic<T, Order, std::enable_if_t<std::is_enum_v<T>>>
 451     : public detail::AtomicBase<T, Order> {
 452   typedef typename detail::AtomicBase<T, Order> Base;
 453
 454  public:
 455   constexpr Atomic() : Base() {}
 456   explicit constexpr Atomic(T aInit) : Base(aInit) {}
 457
 458   operator T() const { return T(Base::Intrinsics::load(Base::mValue)); }
 459
 460   using Base::operator=;
 461
 462  private:
 463   Atomic(Atomic& aOther) = delete;
 464 };
 465
 466 /**
 467  * Atomic<T> implementation for boolean types.
 468  *
 469  * The atomic store and load operations and the atomic swap method is provided.
 470  *
 471  * Note:
 472  *
 473  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
 474  *   bool and/or some implementations of std::atomic. This is allowed in
 475  *   [atomic.types.generic]p9.
 476  *
 477  * - It's not obvious whether the 8-bit atomic functions on Windows are always
 478  *   inlined or not. If they are not inlined, the corresponding functions in the
 479  *   runtime library are not available on Windows XP. This is why we implement
 480  *   Atomic<bool> with an underlying type of uint32_t.
 481  */
 482 template <MemoryOrdering Order>
 483 class Atomic<bool, Order> : protected detail::AtomicBase<uint32_t, Order> {
 484   typedef typename detail::AtomicBase<uint32_t, Order> Base;
 485
 486  public:
 487   constexpr Atomic() : Base() {}
 488   explicit constexpr Atomic(bool aInit) : Base(aInit) {}
 489
 490   // We provide boolean wrappers for the underlying AtomicBase methods.
 491   MOZ_IMPLICIT operator bool() const {
 492     return Base::Intrinsics::load(Base::mValue);
 493   }
 494
 495   bool operator=(bool aVal) { return Base::operator=(aVal); }
 496
 497   bool exchange(bool aVal) { return Base::exchange(aVal); }
 498
 499   bool compareExchange(bool aOldValue, bool aNewValue) {
 500     return Base::compareExchange(aOldValue, aNewValue);
 501   }
 502
 503  private:
 504   Atomic(Atomic& aOther) = delete;
 505 };
 506
 507 }  // namespace mozilla
 508
 509 namespace std {
 510
 511 // If you want to atomically swap two atomic values, use exchange().
 512 template <typename T, mozilla::MemoryOrdering Order>
 513 void swap(mozilla::Atomic<T, Order>&, mozilla::Atomic<T, Order>&) = delete;
 514
 515 }  // namespace std
 516
 517 #endif /* mozilla_Atomics_h */