mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Assertions.h"
  20 #include "mozilla/Attributes.h"
  21 #include "mozilla/Compiler.h"
  22
  23 #ifdef __wasi__
  24 #  include "mozilla/WasiAtomic.h"
  25 #else
  26 #  include <atomic>
  27 #endif  // __wasi__
  28
  29 #include <stdint.h>
  30 #include <type_traits>
  31
  32 namespace mozilla {
  33
  34 /**
  35  * An enum of memory ordering possibilities for atomics.
  36  *
  37  * Memory ordering is the observable state of distinct values in memory.
  38  * (It's a separate concept from atomicity, which concerns whether an
  39  * operation can ever be observed in an intermediate state.  Don't
  40  * conflate the two!)  Given a sequence of operations in source code on
  41  * memory, it is *not* always the case that, at all times and on all
  42  * cores, those operations will appear to have occurred in that exact
  43  * sequence.  First, the compiler might reorder that sequence, if it
  44  * thinks another ordering will be more efficient.  Second, the CPU may
  45  * not expose so consistent a view of memory.  CPUs will often perform
  46  * their own instruction reordering, above and beyond that performed by
  47  * the compiler.  And each core has its own memory caches, and accesses
  48  * (reads and writes both) to "memory" may only resolve to out-of-date
  49  * cache entries -- not to the "most recently" performed operation in
  50  * some global sense.  Any access to a value that may be used by
  51  * multiple threads, potentially across multiple cores, must therefore
  52  * have a memory ordering imposed on it, for all code on all
  53  * threads/cores to have a sufficiently coherent worldview.
  54  *
  55  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  56  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  57  * detail on all this, including examples of how each mode works.
  58  *
  59  * Note that for simplicity and practicality, not all of the modes in
  60  * C++11 are supported.  The missing C++11 modes are either subsumed by
  61  * the modes we provide below, or not relevant for the CPUs we support
  62  * in Gecko.  These three modes are confusing enough as it is!
  63  */
  64 enum MemoryOrdering {
  65   /*
  66    * Relaxed ordering is the simplest memory ordering: none at all.
  67    * When the result of a write is observed, nothing may be inferred
  68    * about other memory.  Writes ostensibly performed "before" on the
  69    * writing thread may not yet be visible.  Writes performed "after" on
  70    * the writing thread may already be visible, if the compiler or CPU
  71    * reordered them.  (The latter can happen if reads and/or writes get
  72    * held up in per-processor caches.)  Relaxed ordering means
  73    * operations can always use cached values (as long as the actual
  74    * updates to atomic values actually occur, correctly, eventually), so
  75    * it's usually the fastest sort of atomic access.  For this reason,
  76    * *it's also the most dangerous kind of access*.
  77    *
  78    * Relaxed ordering is good for things like process-wide statistics
  79    * counters that don't need to be consistent with anything else, so
  80    * long as updates themselves are atomic.  (And so long as any
  81    * observations of that value can tolerate being out-of-date -- if you
  82    * need some sort of up-to-date value, you need some sort of other
  83    * synchronizing operation.)  It's *not* good for locks, mutexes,
  84    * reference counts, etc. that mediate access to other memory, or must
  85    * be observably consistent with other memory.
  86    *
  87    * x86 architectures don't take advantage of the optimization
  88    * opportunities that relaxed ordering permits.  Thus it's possible
  89    * that using relaxed ordering will "work" on x86 but fail elsewhere
  90    * (ARM, say, which *does* implement non-sequentially-consistent
  91    * relaxed ordering semantics).  Be extra-careful using relaxed
  92    * ordering if you can't easily test non-x86 architectures!
  93    */
  94   Relaxed,
  95
  96   /*
  97    * When an atomic value is updated with ReleaseAcquire ordering, and
  98    * that new value is observed with ReleaseAcquire ordering, prior
  99    * writes (atomic or not) are also observable.  What ReleaseAcquire
 100    * *doesn't* give you is any observable ordering guarantees for
 101    * ReleaseAcquire-ordered operations on different objects.  For
 102    * example, if there are two cores that each perform ReleaseAcquire
 103    * operations on separate objects, each core may or may not observe
 104    * the operations made by the other core.  The only way the cores can
 105    * be synchronized with ReleaseAcquire is if they both
 106    * ReleaseAcquire-access the same object.  This implies that you can't
 107    * necessarily describe some global total ordering of ReleaseAcquire
 108    * operations.
 109    *
 110    * ReleaseAcquire ordering is good for (as the name implies) atomic
 111    * operations on values controlling ownership of things: reference
 112    * counts, mutexes, and the like.  However, if you are thinking about
 113    * using these to implement your own locks or mutexes, you should take
 114    * a good, hard look at actual lock or mutex primitives first.
 115    */
 116   ReleaseAcquire,
 117
 118   /*
 119    * When an atomic value is updated with SequentiallyConsistent
 120    * ordering, all writes observable when the update is observed, just
 121    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 122    * ordering of SequentiallyConsistent operations *can* be described.
 123    * For example, if two cores perform SequentiallyConsistent operations
 124    * on separate objects, one core will observably perform its update
 125    * (and all previous operations will have completed), then the other
 126    * core will observably perform its update (and all previous
 127    * operations will have completed).  (Although those previous
 128    * operations aren't themselves ordered -- they could be intermixed,
 129    * or ordered if they occur on atomic values with ordering
 130    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 131    * ordering of atomic operations -- it's always as if one operation
 132    * happens, then another, then another, in some order -- and every
 133    * core observes updates to happen in that single order.  Because it
 134    * has the most synchronization requirements, operations ordered this
 135    * way also tend to be slowest.
 136    *
 137    * SequentiallyConsistent ordering can be desirable when multiple
 138    * threads observe objects, and they all have to agree on the
 139    * observable order of changes to them.  People expect
 140    * SequentiallyConsistent ordering, even if they shouldn't, when
 141    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 142    * the ordering of choice when designing lockless data structures.  If
 143    * you don't know what order to use, use this one.
 144    */
 145   SequentiallyConsistent,
 146 };
 147
 148 namespace detail {
 149
 150 /*
 151  * We provide CompareExchangeFailureOrder to work around a bug in some
 152  * versions of GCC's <atomic> header.  See bug 898491.
 153  */
 154 template <MemoryOrdering Order>
 155 struct AtomicOrderConstraints;
 156
 157 template <>
 158 struct AtomicOrderConstraints<Relaxed> {
 159   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 160   static const std::memory_order LoadOrder = std::memory_order_relaxed;
 161   static const std::memory_order StoreOrder = std::memory_order_relaxed;
 162   static const std::memory_order CompareExchangeFailureOrder =
 163       std::memory_order_relaxed;
 164 };
 165
 166 template <>
 167 struct AtomicOrderConstraints<ReleaseAcquire> {
 168   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 169   static const std::memory_order LoadOrder = std::memory_order_acquire;
 170   static const std::memory_order StoreOrder = std::memory_order_release;
 171   static const std::memory_order CompareExchangeFailureOrder =
 172       std::memory_order_acquire;
 173 };
 174
 175 template <>
 176 struct AtomicOrderConstraints<SequentiallyConsistent> {
 177   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 178   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 179   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 180   static const std::memory_order CompareExchangeFailureOrder =
 181       std::memory_order_seq_cst;
 182 };
 183
 184 template <typename T, MemoryOrdering Order>
 185 struct IntrinsicBase {
 186   typedef std::atomic<T> ValueType;
 187   typedef AtomicOrderConstraints<Order> OrderedOp;
 188 };
 189
 190 template <typename T, MemoryOrdering Order>
 191 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order> {
 192   typedef IntrinsicBase<T, Order> Base;
 193
 194   static T load(const typename Base::ValueType& aPtr) {
 195     return aPtr.load(Base::OrderedOp::LoadOrder);
 196   }
 197
 198   static void store(typename Base::ValueType& aPtr, T aVal) {
 199     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
 200   }
 201
 202   static T exchange(typename Base::ValueType& aPtr, T aVal) {
 203     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
 204   }
 205
 206   static bool compareExchange(typename Base::ValueType& aPtr, T aOldVal,
 207                               T aNewVal) {
 208     return aPtr.compare_exchange_strong(
 209         aOldVal, aNewVal, Base::OrderedOp::AtomicRMWOrder,
 210         Base::OrderedOp::CompareExchangeFailureOrder);
 211   }
 212 };
 213
 214 template <typename T, MemoryOrdering Order>
 215 struct IntrinsicAddSub : public IntrinsicBase<T, Order> {
 216   typedef IntrinsicBase<T, Order> Base;
 217
 218   static T add(typename Base::ValueType& aPtr, T aVal) {
 219     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 220   }
 221
 222   static T sub(typename Base::ValueType& aPtr, T aVal) {
 223     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 224   }
 225 };
 226
 227 template <typename T, MemoryOrdering Order>
 228 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order> {
 229   typedef IntrinsicBase<T*, Order> Base;
 230
 231   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
 232     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 233   }
 234
 235   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
 236     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 237   }
 238 };
 239
 240 template <typename T, MemoryOrdering Order>
 241 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order> {
 242   typedef IntrinsicBase<T, Order> Base;
 243
 244   static T inc(typename Base::ValueType& aPtr) {
 245     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
 246   }
 247
 248   static T dec(typename Base::ValueType& aPtr) {
 249     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
 250   }
 251 };
 252
 253 template <typename T, MemoryOrdering Order>
 254 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 255                           public IntrinsicIncDec<T, Order> {
 256   typedef IntrinsicBase<T, Order> Base;
 257
 258   static T or_(typename Base::ValueType& aPtr, T aVal) {
 259     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
 260   }
 261
 262   static T xor_(typename Base::ValueType& aPtr, T aVal) {
 263     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
 264   }
 265
 266   static T and_(typename Base::ValueType& aPtr, T aVal) {
 267     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
 268   }
 269 };
 270
 271 template <typename T, MemoryOrdering Order>
 272 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 273                                      public IntrinsicIncDec<T*, Order> {};
 274
 275 template <typename T>
 276 struct ToStorageTypeArgument {
 277   static constexpr T convert(T aT) { return aT; }
 278 };
 279
 280 template <typename T, MemoryOrdering Order>
 281 class AtomicBase {
 282   static_assert(sizeof(T) == 4 || sizeof(T) == 8,
 283                 "mozilla/Atomics.h only supports 32-bit and 64-bit types");
 284
 285  protected:
 286   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 287   typedef typename Intrinsics::ValueType ValueType;
 288   ValueType mValue;
 289
 290  public:
 291   constexpr AtomicBase() : mValue() {}
 292   explicit constexpr AtomicBase(T aInit)
 293       : mValue(ToStorageTypeArgument<T>::convert(aInit)) {}
 294
 295   // Note: we can't provide operator T() here because Atomic<bool> inherits
 296   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
 297   // operator T() here, it would cause errors when comparing Atomic<bool> with
 298   // a regular bool.
 299
 300   T operator=(T aVal) {
 301     Intrinsics::store(mValue, aVal);
 302     return aVal;
 303   }
 304
 305   /**
 306    * Performs an atomic swap operation.  aVal is stored and the previous
 307    * value of this variable is returned.
 308    */
 309   T exchange(T aVal) { return Intrinsics::exchange(mValue, aVal); }
 310
 311   /**
 312    * Performs an atomic compare-and-swap operation and returns true if it
 313    * succeeded. This is equivalent to atomically doing
 314    *
 315    *   if (mValue == aOldValue) {
 316    *     mValue = aNewValue;
 317    *     return true;
 318    *   } else {
 319    *     return false;
 320    *   }
 321    */
 322   bool compareExchange(T aOldValue, T aNewValue) {
 323     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 324   }
 325
 326  private:
 327   AtomicBase(const AtomicBase& aCopy) = delete;
 328 };
 329
 330 template <typename T, MemoryOrdering Order>
 331 class AtomicBaseIncDec : public AtomicBase<T, Order> {
 332   typedef typename detail::AtomicBase<T, Order> Base;
 333
 334  public:
 335   constexpr AtomicBaseIncDec() : Base() {}
 336   explicit constexpr AtomicBaseIncDec(T aInit) : Base(aInit) {}
 337
 338   using Base::operator=;
 339
 340   operator T() const { return Base::Intrinsics::load(Base::mValue); }
 341   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 342   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 343   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 344   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 345
 346  private:
 347   AtomicBaseIncDec(const AtomicBaseIncDec& aCopy) = delete;
 348 };
 349
 350 }  // namespace detail
 351
 352 /**
 353  * A wrapper for a type that enforces that all memory accesses are atomic.
 354  *
 355  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
 356  * its place.  Implementations for integral and pointer types are provided
 357  * below.
 358  *
 359  * Atomic accesses are sequentially consistent by default.  You should
 360  * use the default unless you are tall enough to ride the
 361  * memory-ordering roller coaster (if you're not sure, you aren't) and
 362  * you have a compelling reason to do otherwise.
 363  *
 364  * There is one exception to the case of atomic memory accesses: providing an
 365  * initial value of the atomic value is not guaranteed to be atomic.  This is a
 366  * deliberate design choice that enables static atomic variables to be declared
 367  * without introducing extra static constructors.
 368  */
 369 template <typename T, MemoryOrdering Order = SequentiallyConsistent,
 370           typename Enable = void>
 371 class Atomic;
 372
 373 /**
 374  * Atomic<T> implementation for integral types.
 375  *
 376  * In addition to atomic store and load operations, compound assignment and
 377  * increment/decrement operators are implemented which perform the
 378  * corresponding read-modify-write operation atomically.  Finally, an atomic
 379  * swap method is provided.
 380  */
 381 template <typename T, MemoryOrdering Order>
 382 class Atomic<
 383     T, Order,
 384     std::enable_if_t<std::is_integral_v<T> && !std::is_same_v<T, bool>>>
 385     : public detail::AtomicBaseIncDec<T, Order> {
 386   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
 387
 388  public:
 389   constexpr Atomic() : Base() {}
 390   explicit constexpr Atomic(T aInit) : Base(aInit) {}
 391
 392   using Base::operator=;
 393
 394   T operator+=(T aDelta) {
 395     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
 396   }
 397
 398   T operator-=(T aDelta) {
 399     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
 400   }
 401
 402   T operator|=(T aVal) {
 403     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
 404   }
 405
 406   T operator^=(T aVal) {
 407     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
 408   }
 409
 410   T operator&=(T aVal) {
 411     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
 412   }
 413
 414  private:
 415   Atomic(Atomic& aOther) = delete;
 416 };
 417
 418 /**
 419  * Atomic<T> implementation for pointer types.
 420  *
 421  * An atomic compare-and-swap primitive for pointer variables is provided, as
 422  * are atomic increment and decement operators.  Also provided are the compound
 423  * assignment operators for addition and subtraction. Atomic swap (via
 424  * exchange()) is included as well.
 425  */
 426 template <typename T, MemoryOrdering Order>
 427 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order> {
 428   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
 429
 430  public:
 431   constexpr Atomic() : Base() {}
 432   explicit constexpr Atomic(T* aInit) : Base(aInit) {}
 433
 434   using Base::operator=;
 435
 436   T* operator+=(ptrdiff_t aDelta) {
 437     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
 438   }
 439
 440   T* operator-=(ptrdiff_t aDelta) {
 441     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
 442   }
 443
 444  private:
 445   Atomic(Atomic& aOther) = delete;
 446 };
 447
 448 /**
 449  * Atomic<T> implementation for enum types.
 450  *
 451  * The atomic store and load operations and the atomic swap method is provided.
 452  */
 453 template <typename T, MemoryOrdering Order>
 454 class Atomic<T, Order, std::enable_if_t<std::is_enum_v<T>>>
 455     : public detail::AtomicBase<T, Order> {
 456   typedef typename detail::AtomicBase<T, Order> Base;
 457
 458  public:
 459   constexpr Atomic() : Base() {}
 460   explicit constexpr Atomic(T aInit) : Base(aInit) {}
 461
 462   operator T() const { return T(Base::Intrinsics::load(Base::mValue)); }
 463
 464   using Base::operator=;
 465
 466  private:
 467   Atomic(Atomic& aOther) = delete;
 468 };
 469
 470 /**
 471  * Atomic<T> implementation for boolean types.
 472  *
 473  * The atomic store and load operations and the atomic swap method is provided.
 474  *
 475  * Note:
 476  *
 477  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
 478  *   bool and/or some implementations of std::atomic. This is allowed in
 479  *   [atomic.types.generic]p9.
 480  *
 481  * - It's not obvious whether the 8-bit atomic functions on Windows are always
 482  *   inlined or not. If they are not inlined, the corresponding functions in the
 483  *   runtime library are not available on Windows XP. This is why we implement
 484  *   Atomic<bool> with an underlying type of uint32_t.
 485  */
 486 template <MemoryOrdering Order>
 487 class Atomic<bool, Order> : protected detail::AtomicBase<uint32_t, Order> {
 488   typedef typename detail::AtomicBase<uint32_t, Order> Base;
 489
 490  public:
 491   constexpr Atomic() : Base() {}
 492   explicit constexpr Atomic(bool aInit) : Base(aInit) {}
 493
 494   // We provide boolean wrappers for the underlying AtomicBase methods.
 495   MOZ_IMPLICIT operator bool() const {
 496     return Base::Intrinsics::load(Base::mValue);
 497   }
 498
 499   bool operator=(bool aVal) { return Base::operator=(aVal); }
 500
 501   bool exchange(bool aVal) { return Base::exchange(aVal); }
 502
 503   bool compareExchange(bool aOldValue, bool aNewValue) {
 504     return Base::compareExchange(aOldValue, aNewValue);
 505   }
 506
 507  private:
 508   Atomic(Atomic& aOther) = delete;
 509 };
 510
 511 }  // namespace mozilla
 512
 513 namespace std {
 514
 515 // If you want to atomically swap two atomic values, use exchange().
 516 template <typename T, mozilla::MemoryOrdering Order>
 517 void swap(mozilla::Atomic<T, Order>&, mozilla::Atomic<T, Order>&) = delete;
 518
 519 }  // namespace std
 520
 521 #endif /* mozilla_Atomics_h */