mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Attributes.h"
  20
  21 #ifdef __wasi__
  22 #  include "mozilla/WasiAtomic.h"
  23 #else
  24 #  include <atomic>
  25 #endif  // __wasi__
  26
  27 #include <stddef.h>  // For ptrdiff_t
  28 #include <stdint.h>
  29 #include <type_traits>
  30
  31 namespace mozilla {
  32
  33 /**
  34  * An enum of memory ordering possibilities for atomics.
  35  *
  36  * Memory ordering is the observable state of distinct values in memory.
  37  * (It's a separate concept from atomicity, which concerns whether an
  38  * operation can ever be observed in an intermediate state.  Don't
  39  * conflate the two!)  Given a sequence of operations in source code on
  40  * memory, it is *not* always the case that, at all times and on all
  41  * cores, those operations will appear to have occurred in that exact
  42  * sequence.  First, the compiler might reorder that sequence, if it
  43  * thinks another ordering will be more efficient.  Second, the CPU may
  44  * not expose so consistent a view of memory.  CPUs will often perform
  45  * their own instruction reordering, above and beyond that performed by
  46  * the compiler.  And each core has its own memory caches, and accesses
  47  * (reads and writes both) to "memory" may only resolve to out-of-date
  48  * cache entries -- not to the "most recently" performed operation in
  49  * some global sense.  Any access to a value that may be used by
  50  * multiple threads, potentially across multiple cores, must therefore
  51  * have a memory ordering imposed on it, for all code on all
  52  * threads/cores to have a sufficiently coherent worldview.
  53  *
  54  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  55  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  56  * detail on all this, including examples of how each mode works.
  57  *
  58  * Note that for simplicity and practicality, not all of the modes in
  59  * C++11 are supported.  The missing C++11 modes are either subsumed by
  60  * the modes we provide below, or not relevant for the CPUs we support
  61  * in Gecko.  These three modes are confusing enough as it is!
  62  */
  63 enum MemoryOrdering {
  64   /*
  65    * Relaxed ordering is the simplest memory ordering: none at all.
  66    * When the result of a write is observed, nothing may be inferred
  67    * about other memory.  Writes ostensibly performed "before" on the
  68    * writing thread may not yet be visible.  Writes performed "after" on
  69    * the writing thread may already be visible, if the compiler or CPU
  70    * reordered them.  (The latter can happen if reads and/or writes get
  71    * held up in per-processor caches.)  Relaxed ordering means
  72    * operations can always use cached values (as long as the actual
  73    * updates to atomic values actually occur, correctly, eventually), so
  74    * it's usually the fastest sort of atomic access.  For this reason,
  75    * *it's also the most dangerous kind of access*.
  76    *
  77    * Relaxed ordering is good for things like process-wide statistics
  78    * counters that don't need to be consistent with anything else, so
  79    * long as updates themselves are atomic.  (And so long as any
  80    * observations of that value can tolerate being out-of-date -- if you
  81    * need some sort of up-to-date value, you need some sort of other
  82    * synchronizing operation.)  It's *not* good for locks, mutexes,
  83    * reference counts, etc. that mediate access to other memory, or must
  84    * be observably consistent with other memory.
  85    *
  86    * x86 architectures don't take advantage of the optimization
  87    * opportunities that relaxed ordering permits.  Thus it's possible
  88    * that using relaxed ordering will "work" on x86 but fail elsewhere
  89    * (ARM, say, which *does* implement non-sequentially-consistent
  90    * relaxed ordering semantics).  Be extra-careful using relaxed
  91    * ordering if you can't easily test non-x86 architectures!
  92    */
  93   Relaxed,
  94
  95   /*
  96    * When an atomic value is updated with ReleaseAcquire ordering, and
  97    * that new value is observed with ReleaseAcquire ordering, prior
  98    * writes (atomic or not) are also observable.  What ReleaseAcquire
  99    * *doesn't* give you is any observable ordering guarantees for
 100    * ReleaseAcquire-ordered operations on different objects.  For
 101    * example, if there are two cores that each perform ReleaseAcquire
 102    * operations on separate objects, each core may or may not observe
 103    * the operations made by the other core.  The only way the cores can
 104    * be synchronized with ReleaseAcquire is if they both
 105    * ReleaseAcquire-access the same object.  This implies that you can't
 106    * necessarily describe some global total ordering of ReleaseAcquire
 107    * operations.
 108    *
 109    * ReleaseAcquire ordering is good for (as the name implies) atomic
 110    * operations on values controlling ownership of things: reference
 111    * counts, mutexes, and the like.  However, if you are thinking about
 112    * using these to implement your own locks or mutexes, you should take
 113    * a good, hard look at actual lock or mutex primitives first.
 114    */
 115   ReleaseAcquire,
 116
 117   /*
 118    * When an atomic value is updated with SequentiallyConsistent
 119    * ordering, all writes observable when the update is observed, just
 120    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 121    * ordering of SequentiallyConsistent operations *can* be described.
 122    * For example, if two cores perform SequentiallyConsistent operations
 123    * on separate objects, one core will observably perform its update
 124    * (and all previous operations will have completed), then the other
 125    * core will observably perform its update (and all previous
 126    * operations will have completed).  (Although those previous
 127    * operations aren't themselves ordered -- they could be intermixed,
 128    * or ordered if they occur on atomic values with ordering
 129    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 130    * ordering of atomic operations -- it's always as if one operation
 131    * happens, then another, then another, in some order -- and every
 132    * core observes updates to happen in that single order.  Because it
 133    * has the most synchronization requirements, operations ordered this
 134    * way also tend to be slowest.
 135    *
 136    * SequentiallyConsistent ordering can be desirable when multiple
 137    * threads observe objects, and they all have to agree on the
 138    * observable order of changes to them.  People expect
 139    * SequentiallyConsistent ordering, even if they shouldn't, when
 140    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 141    * the ordering of choice when designing lockless data structures.  If
 142    * you don't know what order to use, use this one.
 143    */
 144   SequentiallyConsistent,
 145 };
 146
 147 namespace detail {
 148
 149 /*
 150  * We provide CompareExchangeFailureOrder to work around a bug in some
 151  * versions of GCC's <atomic> header.  See bug 898491.
 152  */
 153 template <MemoryOrdering Order>
 154 struct AtomicOrderConstraints;
 155
 156 template <>
 157 struct AtomicOrderConstraints<Relaxed> {
 158   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 159   static const std::memory_order LoadOrder = std::memory_order_relaxed;
 160   static const std::memory_order StoreOrder = std::memory_order_relaxed;
 161   static const std::memory_order CompareExchangeFailureOrder =
 162       std::memory_order_relaxed;
 163 };
 164
 165 template <>
 166 struct AtomicOrderConstraints<ReleaseAcquire> {
 167   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 168   static const std::memory_order LoadOrder = std::memory_order_acquire;
 169   static const std::memory_order StoreOrder = std::memory_order_release;
 170   static const std::memory_order CompareExchangeFailureOrder =
 171       std::memory_order_acquire;
 172 };
 173
 174 template <>
 175 struct AtomicOrderConstraints<SequentiallyConsistent> {
 176   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 177   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 178   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 179   static const std::memory_order CompareExchangeFailureOrder =
 180       std::memory_order_seq_cst;
 181 };
 182
 183 template <typename T, MemoryOrdering Order>
 184 struct IntrinsicBase {
 185   typedef std::atomic<T> ValueType;
 186   typedef AtomicOrderConstraints<Order> OrderedOp;
 187 };
 188
 189 template <typename T, MemoryOrdering Order>
 190 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order> {
 191   typedef IntrinsicBase<T, Order> Base;
 192
 193   static T load(const typename Base::ValueType& aPtr) {
 194     return aPtr.load(Base::OrderedOp::LoadOrder);
 195   }
 196
 197   static void store(typename Base::ValueType& aPtr, T aVal) {
 198     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
 199   }
 200
 201   static T exchange(typename Base::ValueType& aPtr, T aVal) {
 202     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
 203   }
 204
 205   static bool compareExchange(typename Base::ValueType& aPtr, T aOldVal,
 206                               T aNewVal) {
 207     return aPtr.compare_exchange_strong(
 208         aOldVal, aNewVal, Base::OrderedOp::AtomicRMWOrder,
 209         Base::OrderedOp::CompareExchangeFailureOrder);
 210   }
 211 };
 212
 213 template <typename T, MemoryOrdering Order>
 214 struct IntrinsicAddSub : public IntrinsicBase<T, Order> {
 215   typedef IntrinsicBase<T, Order> Base;
 216
 217   static T add(typename Base::ValueType& aPtr, T aVal) {
 218     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 219   }
 220
 221   static T sub(typename Base::ValueType& aPtr, T aVal) {
 222     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 223   }
 224 };
 225
 226 template <typename T, MemoryOrdering Order>
 227 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order> {
 228   typedef IntrinsicBase<T*, Order> Base;
 229
 230   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
 231     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 232   }
 233
 234   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
 235     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 236   }
 237 };
 238
 239 template <typename T, MemoryOrdering Order>
 240 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order> {
 241   typedef IntrinsicBase<T, Order> Base;
 242
 243   static T inc(typename Base::ValueType& aPtr) {
 244     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
 245   }
 246
 247   static T dec(typename Base::ValueType& aPtr) {
 248     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
 249   }
 250 };
 251
 252 template <typename T, MemoryOrdering Order>
 253 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 254                           public IntrinsicIncDec<T, Order> {
 255   typedef IntrinsicBase<T, Order> Base;
 256
 257   static T or_(typename Base::ValueType& aPtr, T aVal) {
 258     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
 259   }
 260
 261   static T xor_(typename Base::ValueType& aPtr, T aVal) {
 262     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
 263   }
 264
 265   static T and_(typename Base::ValueType& aPtr, T aVal) {
 266     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
 267   }
 268 };
 269
 270 template <typename T, MemoryOrdering Order>
 271 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 272                                      public IntrinsicIncDec<T*, Order> {};
 273
 274 template <typename T>
 275 struct ToStorageTypeArgument {
 276   static constexpr T convert(T aT) { return aT; }
 277 };
 278
 279 template <typename T, MemoryOrdering Order>
 280 class AtomicBase {
 281   static_assert(sizeof(T) == 4 || sizeof(T) == 8,
 282                 "mozilla/Atomics.h only supports 32-bit and 64-bit types");
 283
 284  protected:
 285   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 286   typedef typename Intrinsics::ValueType ValueType;
 287   ValueType mValue;
 288
 289  public:
 290   constexpr AtomicBase() : mValue() {}
 291   explicit constexpr AtomicBase(T aInit)
 292       : mValue(ToStorageTypeArgument<T>::convert(aInit)) {}
 293
 294   // Note: we can't provide operator T() here because Atomic<bool> inherits
 295   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
 296   // operator T() here, it would cause errors when comparing Atomic<bool> with
 297   // a regular bool.
 298
 299   T operator=(T aVal) {
 300     Intrinsics::store(mValue, aVal);
 301     return aVal;
 302   }
 303
 304   /**
 305    * Performs an atomic swap operation.  aVal is stored and the previous
 306    * value of this variable is returned.
 307    */
 308   T exchange(T aVal) { return Intrinsics::exchange(mValue, aVal); }
 309
 310   /**
 311    * Performs an atomic compare-and-swap operation and returns true if it
 312    * succeeded. This is equivalent to atomically doing
 313    *
 314    *   if (mValue == aOldValue) {
 315    *     mValue = aNewValue;
 316    *     return true;
 317    *   } else {
 318    *     return false;
 319    *   }
 320    */
 321   bool compareExchange(T aOldValue, T aNewValue) {
 322     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 323   }
 324
 325  private:
 326   AtomicBase(const AtomicBase& aCopy) = delete;
 327 };
 328
 329 template <typename T, MemoryOrdering Order>
 330 class AtomicBaseIncDec : public AtomicBase<T, Order> {
 331   typedef typename detail::AtomicBase<T, Order> Base;
 332
 333  public:
 334   constexpr AtomicBaseIncDec() : Base() {}
 335   explicit constexpr AtomicBaseIncDec(T aInit) : Base(aInit) {}
 336
 337   using Base::operator=;
 338
 339   operator T() const { return Base::Intrinsics::load(Base::mValue); }
 340   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 341   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 342   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 343   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 344
 345  private:
 346   AtomicBaseIncDec(const AtomicBaseIncDec& aCopy) = delete;
 347 };
 348
 349 }  // namespace detail
 350
 351 /**
 352  * A wrapper for a type that enforces that all memory accesses are atomic.
 353  *
 354  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
 355  * its place.  Implementations for integral and pointer types are provided
 356  * below.
 357  *
 358  * Atomic accesses are sequentially consistent by default.  You should
 359  * use the default unless you are tall enough to ride the
 360  * memory-ordering roller coaster (if you're not sure, you aren't) and
 361  * you have a compelling reason to do otherwise.
 362  *
 363  * There is one exception to the case of atomic memory accesses: providing an
 364  * initial value of the atomic value is not guaranteed to be atomic.  This is a
 365  * deliberate design choice that enables static atomic variables to be declared
 366  * without introducing extra static constructors.
 367  */
 368 template <typename T, MemoryOrdering Order = SequentiallyConsistent,
 369           typename Enable = void>
 370 class Atomic;
 371
 372 /**
 373  * Atomic<T> implementation for integral types.
 374  *
 375  * In addition to atomic store and load operations, compound assignment and
 376  * increment/decrement operators are implemented which perform the
 377  * corresponding read-modify-write operation atomically.  Finally, an atomic
 378  * swap method is provided.
 379  */
 380 template <typename T, MemoryOrdering Order>
 381 class Atomic<
 382     T, Order,
 383     std::enable_if_t<std::is_integral_v<T> && !std::is_same_v<T, bool>>>
 384     : public detail::AtomicBaseIncDec<T, Order> {
 385   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
 386
 387  public:
 388   constexpr Atomic() : Base() {}
 389   explicit constexpr Atomic(T aInit) : Base(aInit) {}
 390
 391   using Base::operator=;
 392
 393   T operator+=(T aDelta) {
 394     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
 395   }
 396
 397   T operator-=(T aDelta) {
 398     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
 399   }
 400
 401   T operator|=(T aVal) {
 402     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
 403   }
 404
 405   T operator^=(T aVal) {
 406     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
 407   }
 408
 409   T operator&=(T aVal) {
 410     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
 411   }
 412
 413  private:
 414   Atomic(Atomic& aOther) = delete;
 415 };
 416
 417 /**
 418  * Atomic<T> implementation for pointer types.
 419  *
 420  * An atomic compare-and-swap primitive for pointer variables is provided, as
 421  * are atomic increment and decement operators.  Also provided are the compound
 422  * assignment operators for addition and subtraction. Atomic swap (via
 423  * exchange()) is included as well.
 424  */
 425 template <typename T, MemoryOrdering Order>
 426 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order> {
 427   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
 428
 429  public:
 430   constexpr Atomic() : Base() {}
 431   explicit constexpr Atomic(T* aInit) : Base(aInit) {}
 432
 433   using Base::operator=;
 434
 435   T* operator+=(ptrdiff_t aDelta) {
 436     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
 437   }
 438
 439   T* operator-=(ptrdiff_t aDelta) {
 440     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
 441   }
 442
 443  private:
 444   Atomic(Atomic& aOther) = delete;
 445 };
 446
 447 /**
 448  * Atomic<T> implementation for enum types.
 449  *
 450  * The atomic store and load operations and the atomic swap method is provided.
 451  */
 452 template <typename T, MemoryOrdering Order>
 453 class Atomic<T, Order, std::enable_if_t<std::is_enum_v<T>>>
 454     : public detail::AtomicBase<T, Order> {
 455   typedef typename detail::AtomicBase<T, Order> Base;
 456
 457  public:
 458   constexpr Atomic() : Base() {}
 459   explicit constexpr Atomic(T aInit) : Base(aInit) {}
 460
 461   operator T() const { return T(Base::Intrinsics::load(Base::mValue)); }
 462
 463   using Base::operator=;
 464
 465  private:
 466   Atomic(Atomic& aOther) = delete;
 467 };
 468
 469 /**
 470  * Atomic<T> implementation for boolean types.
 471  *
 472  * The atomic store and load operations and the atomic swap method is provided.
 473  *
 474  * Note:
 475  *
 476  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
 477  *   bool and/or some implementations of std::atomic. This is allowed in
 478  *   [atomic.types.generic]p9.
 479  *
 480  * - It's not obvious whether the 8-bit atomic functions on Windows are always
 481  *   inlined or not. If they are not inlined, the corresponding functions in the
 482  *   runtime library are not available on Windows XP. This is why we implement
 483  *   Atomic<bool> with an underlying type of uint32_t.
 484  */
 485 template <MemoryOrdering Order>
 486 class Atomic<bool, Order> : protected detail::AtomicBase<uint32_t, Order> {
 487   typedef typename detail::AtomicBase<uint32_t, Order> Base;
 488
 489  public:
 490   constexpr Atomic() : Base() {}
 491   explicit constexpr Atomic(bool aInit) : Base(aInit) {}
 492
 493   // We provide boolean wrappers for the underlying AtomicBase methods.
 494   MOZ_IMPLICIT operator bool() const {
 495     return Base::Intrinsics::load(Base::mValue);
 496   }
 497
 498   bool operator=(bool aVal) { return Base::operator=(aVal); }
 499
 500   bool exchange(bool aVal) { return Base::exchange(aVal); }
 501
 502   bool compareExchange(bool aOldValue, bool aNewValue) {
 503     return Base::compareExchange(aOldValue, aNewValue);
 504   }
 505
 506  private:
 507   Atomic(Atomic& aOther) = delete;
 508 };
 509
 510 }  // namespace mozilla
 511
 512 namespace std {
 513
 514 // If you want to atomically swap two atomic values, use exchange().
 515 template <typename T, mozilla::MemoryOrdering Order>
 516 void swap(mozilla::Atomic<T, Order>&, mozilla::Atomic<T, Order>&) = delete;
 517
 518 }  // namespace std
 519
 520 #endif /* mozilla_Atomics_h */