mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Assertions.h"
  20 #include "mozilla/Attributes.h"
  21 #include "mozilla/Compiler.h"
  22 #include "mozilla/TypeTraits.h"
  23
  24 #include <stdint.h>
  25
  26 /*
  27  * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
  28  * does not have <atomic>.  So be sure to check for <atomic> support
  29  * along with C++0x support.
  30  */
  31 #if defined(__clang__) || defined(__GNUC__)
  32    /*
  33     * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
  34     * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
  35     * definitions for unspecialized std::atomic and causes linking errors.
  36     * Therefore, we require at least 4.7.0 for using libstdc++.
  37     *
  38     * libc++ <atomic> is only functional with clang.
  39     */
  40 #  if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
  41 #    define MOZ_HAVE_CXX11_ATOMICS
  42 #  elif MOZ_USING_LIBCXX && defined(__clang__)
  43 #    define MOZ_HAVE_CXX11_ATOMICS
  44 #  endif
  45 /*
  46  * Although Visual Studio 2012's CRT supports <atomic>, its atomic load
  47  * implementation unnecessarily uses an atomic intrinsic for the less
  48  * restrictive memory orderings, which can be prohibitively expensive.
  49  * Therefore, we require at least Visual Studio 2013 for using the CRT
  50  * (bug 1061764).
  51  */
  52 #elif defined(_MSC_VER) && _MSC_VER >= 1800
  53 #  if defined(DEBUG)
  54      /*
  55       * Provide our own failure code since we're having trouble linking to
  56       * std::_Debug_message (bug 982310).
  57       */
  58 #    define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")
  59 #  endif
  60 #  define MOZ_HAVE_CXX11_ATOMICS
  61 #endif
  62
  63 namespace mozilla {
  64
  65 /**
  66  * An enum of memory ordering possibilities for atomics.
  67  *
  68  * Memory ordering is the observable state of distinct values in memory.
  69  * (It's a separate concept from atomicity, which concerns whether an
  70  * operation can ever be observed in an intermediate state.  Don't
  71  * conflate the two!)  Given a sequence of operations in source code on
  72  * memory, it is *not* always the case that, at all times and on all
  73  * cores, those operations will appear to have occurred in that exact
  74  * sequence.  First, the compiler might reorder that sequence, if it
  75  * thinks another ordering will be more efficient.  Second, the CPU may
  76  * not expose so consistent a view of memory.  CPUs will often perform
  77  * their own instruction reordering, above and beyond that performed by
  78  * the compiler.  And each core has its own memory caches, and accesses
  79  * (reads and writes both) to "memory" may only resolve to out-of-date
  80  * cache entries -- not to the "most recently" performed operation in
  81  * some global sense.  Any access to a value that may be used by
  82  * multiple threads, potentially across multiple cores, must therefore
  83  * have a memory ordering imposed on it, for all code on all
  84  * threads/cores to have a sufficiently coherent worldview.
  85  *
  86  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  87  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  88  * detail on all this, including examples of how each mode works.
  89  *
  90  * Note that for simplicity and practicality, not all of the modes in
  91  * C++11 are supported.  The missing C++11 modes are either subsumed by
  92  * the modes we provide below, or not relevant for the CPUs we support
  93  * in Gecko.  These three modes are confusing enough as it is!
  94  */
  95 enum MemoryOrdering {
  96   /*
  97    * Relaxed ordering is the simplest memory ordering: none at all.
  98    * When the result of a write is observed, nothing may be inferred
  99    * about other memory.  Writes ostensibly performed "before" on the
 100    * writing thread may not yet be visible.  Writes performed "after" on
 101    * the writing thread may already be visible, if the compiler or CPU
 102    * reordered them.  (The latter can happen if reads and/or writes get
 103    * held up in per-processor caches.)  Relaxed ordering means
 104    * operations can always use cached values (as long as the actual
 105    * updates to atomic values actually occur, correctly, eventually), so
 106    * it's usually the fastest sort of atomic access.  For this reason,
 107    * *it's also the most dangerous kind of access*.
 108    *
 109    * Relaxed ordering is good for things like process-wide statistics
 110    * counters that don't need to be consistent with anything else, so
 111    * long as updates themselves are atomic.  (And so long as any
 112    * observations of that value can tolerate being out-of-date -- if you
 113    * need some sort of up-to-date value, you need some sort of other
 114    * synchronizing operation.)  It's *not* good for locks, mutexes,
 115    * reference counts, etc. that mediate access to other memory, or must
 116    * be observably consistent with other memory.
 117    *
 118    * x86 architectures don't take advantage of the optimization
 119    * opportunities that relaxed ordering permits.  Thus it's possible
 120    * that using relaxed ordering will "work" on x86 but fail elsewhere
 121    * (ARM, say, which *does* implement non-sequentially-consistent
 122    * relaxed ordering semantics).  Be extra-careful using relaxed
 123    * ordering if you can't easily test non-x86 architectures!
 124    */
 125   Relaxed,
 126
 127   /*
 128    * When an atomic value is updated with ReleaseAcquire ordering, and
 129    * that new value is observed with ReleaseAcquire ordering, prior
 130    * writes (atomic or not) are also observable.  What ReleaseAcquire
 131    * *doesn't* give you is any observable ordering guarantees for
 132    * ReleaseAcquire-ordered operations on different objects.  For
 133    * example, if there are two cores that each perform ReleaseAcquire
 134    * operations on separate objects, each core may or may not observe
 135    * the operations made by the other core.  The only way the cores can
 136    * be synchronized with ReleaseAcquire is if they both
 137    * ReleaseAcquire-access the same object.  This implies that you can't
 138    * necessarily describe some global total ordering of ReleaseAcquire
 139    * operations.
 140    *
 141    * ReleaseAcquire ordering is good for (as the name implies) atomic
 142    * operations on values controlling ownership of things: reference
 143    * counts, mutexes, and the like.  However, if you are thinking about
 144    * using these to implement your own locks or mutexes, you should take
 145    * a good, hard look at actual lock or mutex primitives first.
 146    */
 147   ReleaseAcquire,
 148
 149   /*
 150    * When an atomic value is updated with SequentiallyConsistent
 151    * ordering, all writes observable when the update is observed, just
 152    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 153    * ordering of SequentiallyConsistent operations *can* be described.
 154    * For example, if two cores perform SequentiallyConsistent operations
 155    * on separate objects, one core will observably perform its update
 156    * (and all previous operations will have completed), then the other
 157    * core will observably perform its update (and all previous
 158    * operations will have completed).  (Although those previous
 159    * operations aren't themselves ordered -- they could be intermixed,
 160    * or ordered if they occur on atomic values with ordering
 161    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 162    * ordering of atomic operations -- it's always as if one operation
 163    * happens, then another, then another, in some order -- and every
 164    * core observes updates to happen in that single order.  Because it
 165    * has the most synchronization requirements, operations ordered this
 166    * way also tend to be slowest.
 167    *
 168    * SequentiallyConsistent ordering can be desirable when multiple
 169    * threads observe objects, and they all have to agree on the
 170    * observable order of changes to them.  People expect
 171    * SequentiallyConsistent ordering, even if they shouldn't, when
 172    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 173    * the ordering of choice when designing lockless data structures.  If
 174    * you don't know what order to use, use this one.
 175    */
 176   SequentiallyConsistent,
 177 };
 178
 179 } // namespace mozilla
 180
 181 // Build up the underlying intrinsics.
 182 #ifdef MOZ_HAVE_CXX11_ATOMICS
 183
 184 #  include <atomic>
 185
 186 namespace mozilla {
 187 namespace detail {
 188
 189 /*
 190  * We provide CompareExchangeFailureOrder to work around a bug in some
 191  * versions of GCC's <atomic> header.  See bug 898491.
 192  */
 193 template<MemoryOrdering Order> struct AtomicOrderConstraints;
 194
 195 template<>
 196 struct AtomicOrderConstraints<Relaxed>
 197 {
 198   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 199   static const std::memory_order LoadOrder = std::memory_order_relaxed;
 200   static const std::memory_order StoreOrder = std::memory_order_relaxed;
 201   static const std::memory_order CompareExchangeFailureOrder =
 202     std::memory_order_relaxed;
 203 };
 204
 205 template<>
 206 struct AtomicOrderConstraints<ReleaseAcquire>
 207 {
 208   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 209   static const std::memory_order LoadOrder = std::memory_order_acquire;
 210   static const std::memory_order StoreOrder = std::memory_order_release;
 211   static const std::memory_order CompareExchangeFailureOrder =
 212     std::memory_order_acquire;
 213 };
 214
 215 template<>
 216 struct AtomicOrderConstraints<SequentiallyConsistent>
 217 {
 218   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 219   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 220   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 221   static const std::memory_order CompareExchangeFailureOrder =
 222     std::memory_order_seq_cst;
 223 };
 224
 225 template<typename T, MemoryOrdering Order>
 226 struct IntrinsicBase
 227 {
 228   typedef std::atomic<T> ValueType;
 229   typedef AtomicOrderConstraints<Order> OrderedOp;
 230 };
 231
 232 template<typename T, MemoryOrdering Order>
 233 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
 234 {
 235   typedef IntrinsicBase<T, Order> Base;
 236
 237   static T load(const typename Base::ValueType& aPtr)
 238   {
 239     return aPtr.load(Base::OrderedOp::LoadOrder);
 240   }
 241
 242   static void store(typename Base::ValueType& aPtr, T aVal)
 243   {
 244     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
 245   }
 246
 247   static T exchange(typename Base::ValueType& aPtr, T aVal)
 248   {
 249     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
 250   }
 251
 252   static bool compareExchange(typename Base::ValueType& aPtr,
 253                               T aOldVal, T aNewVal)
 254   {
 255     return aPtr.compare_exchange_strong(aOldVal, aNewVal,
 256                                         Base::OrderedOp::AtomicRMWOrder,
 257                                         Base::OrderedOp::CompareExchangeFailureOrder);
 258   }
 259 };
 260
 261 template<typename T, MemoryOrdering Order>
 262 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
 263 {
 264   typedef IntrinsicBase<T, Order> Base;
 265
 266   static T add(typename Base::ValueType& aPtr, T aVal)
 267   {
 268     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 269   }
 270
 271   static T sub(typename Base::ValueType& aPtr, T aVal)
 272   {
 273     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 274   }
 275 };
 276
 277 template<typename T, MemoryOrdering Order>
 278 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
 279 {
 280   typedef IntrinsicBase<T*, Order> Base;
 281
 282   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal)
 283   {
 284     return aPtr.fetch_add(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
 285   }
 286
 287   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal)
 288   {
 289     return aPtr.fetch_sub(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
 290   }
 291 private:
 292   /*
 293    * GCC 4.6's <atomic> header has a bug where adding X to an
 294    * atomic<T*> is not the same as adding X to a T*.  Hence the need
 295    * for this function to provide the correct addend.
 296    */
 297   static ptrdiff_t fixupAddend(ptrdiff_t aVal)
 298   {
 299 #if defined(__clang__) || defined(_MSC_VER)
 300     return aVal;
 301 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
 302     !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
 303     return aVal * sizeof(T);
 304 #else
 305     return aVal;
 306 #endif
 307   }
 308 };
 309
 310 template<typename T, MemoryOrdering Order>
 311 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
 312 {
 313   typedef IntrinsicBase<T, Order> Base;
 314
 315   static T inc(typename Base::ValueType& aPtr)
 316   {
 317     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
 318   }
 319
 320   static T dec(typename Base::ValueType& aPtr)
 321   {
 322     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
 323   }
 324 };
 325
 326 template<typename T, MemoryOrdering Order>
 327 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 328                           public IntrinsicIncDec<T, Order>
 329 {
 330   typedef IntrinsicBase<T, Order> Base;
 331
 332   static T or_(typename Base::ValueType& aPtr, T aVal)
 333   {
 334     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
 335   }
 336
 337   static T xor_(typename Base::ValueType& aPtr, T aVal)
 338   {
 339     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
 340   }
 341
 342   static T and_(typename Base::ValueType& aPtr, T aVal)
 343   {
 344     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
 345   }
 346 };
 347
 348 template<typename T, MemoryOrdering Order>
 349 struct AtomicIntrinsics<T*, Order>
 350   : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
 351 {
 352 };
 353
 354 } // namespace detail
 355 } // namespace mozilla
 356
 357 #elif defined(__GNUC__)
 358
 359 namespace mozilla {
 360 namespace detail {
 361
 362 /*
 363  * The __sync_* family of intrinsics is documented here:
 364  *
 365  * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
 366  *
 367  * While these intrinsics are deprecated in favor of the newer __atomic_*
 368  * family of intrincs:
 369  *
 370  * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
 371  *
 372  * any GCC version that supports the __atomic_* intrinsics will also support
 373  * the <atomic> header and so will be handled above.  We provide a version of
 374  * atomics using the __sync_* intrinsics to support older versions of GCC.
 375  *
 376  * All __sync_* intrinsics that we use below act as full memory barriers, for
 377  * both compiler and hardware reordering, except for __sync_lock_test_and_set,
 378  * which is a only an acquire barrier.  When we call __sync_lock_test_and_set,
 379  * we add a barrier above it as appropriate.
 380  */
 381
 382 template<MemoryOrdering Order> struct Barrier;
 383
 384 /*
 385  * Some processors (in particular, x86) don't require quite so many calls to
 386  * __sync_sychronize as our specializations of Barrier produce.  If
 387  * performance turns out to be an issue, defining these specializations
 388  * on a per-processor basis would be a good first tuning step.
 389  */
 390
 391 template<>
 392 struct Barrier<Relaxed>
 393 {
 394   static void beforeLoad() {}
 395   static void afterLoad() {}
 396   static void beforeStore() {}
 397   static void afterStore() {}
 398 };
 399
 400 template<>
 401 struct Barrier<ReleaseAcquire>
 402 {
 403   static void beforeLoad() {}
 404   static void afterLoad() { __sync_synchronize(); }
 405   static void beforeStore() { __sync_synchronize(); }
 406   static void afterStore() {}
 407 };
 408
 409 template<>
 410 struct Barrier<SequentiallyConsistent>
 411 {
 412   static void beforeLoad() { __sync_synchronize(); }
 413   static void afterLoad() { __sync_synchronize(); }
 414   static void beforeStore() { __sync_synchronize(); }
 415   static void afterStore() { __sync_synchronize(); }
 416 };
 417
 418 template<typename T, MemoryOrdering Order>
 419 struct IntrinsicMemoryOps
 420 {
 421   static T load(const T& aPtr)
 422   {
 423     Barrier<Order>::beforeLoad();
 424     T val = aPtr;
 425     Barrier<Order>::afterLoad();
 426     return val;
 427   }
 428
 429   static void store(T& aPtr, T aVal)
 430   {
 431     Barrier<Order>::beforeStore();
 432     aPtr = aVal;
 433     Barrier<Order>::afterStore();
 434   }
 435
 436   static T exchange(T& aPtr, T aVal)
 437   {
 438     // __sync_lock_test_and_set is only an acquire barrier; loads and stores
 439     // can't be moved up from after to before it, but they can be moved down
 440     // from before to after it.  We may want a stricter ordering, so we need
 441     // an explicit barrier.
 442     Barrier<Order>::beforeStore();
 443     return __sync_lock_test_and_set(&aPtr, aVal);
 444   }
 445
 446   static bool compareExchange(T& aPtr, T aOldVal, T aNewVal)
 447   {
 448     return __sync_bool_compare_and_swap(&aPtr, aOldVal, aNewVal);
 449   }
 450 };
 451
 452 template<typename T>
 453 struct IntrinsicAddSub
 454 {
 455   typedef T ValueType;
 456
 457   static T add(T& aPtr, T aVal)
 458   {
 459     return __sync_fetch_and_add(&aPtr, aVal);
 460   }
 461
 462   static T sub(T& aPtr, T aVal)
 463   {
 464     return __sync_fetch_and_sub(&aPtr, aVal);
 465   }
 466 };
 467
 468 template<typename T>
 469 struct IntrinsicAddSub<T*>
 470 {
 471   typedef T* ValueType;
 472
 473   /*
 474    * The reinterpret_casts are needed so that
 475    * __sync_fetch_and_{add,sub} will properly type-check.
 476    *
 477    * Also, these functions do not provide standard semantics for
 478    * pointer types, so we need to adjust the addend.
 479    */
 480   static ValueType add(ValueType& aPtr, ptrdiff_t aVal)
 481   {
 482     ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
 483     return __sync_fetch_and_add(&aPtr, amount);
 484   }
 485
 486   static ValueType sub(ValueType& aPtr, ptrdiff_t aVal)
 487   {
 488     ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
 489     return __sync_fetch_and_sub(&aPtr, amount);
 490   }
 491 };
 492
 493 template<typename T>
 494 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 495 {
 496   static T inc(T& aPtr) { return IntrinsicAddSub<T>::add(aPtr, 1); }
 497   static T dec(T& aPtr) { return IntrinsicAddSub<T>::sub(aPtr, 1); }
 498 };
 499
 500 template<typename T, MemoryOrdering Order>
 501 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 502                           public IntrinsicIncDec<T>
 503 {
 504   static T or_( T& aPtr, T aVal) { return __sync_fetch_and_or(&aPtr, aVal); }
 505   static T xor_(T& aPtr, T aVal) { return __sync_fetch_and_xor(&aPtr, aVal); }
 506   static T and_(T& aPtr, T aVal) { return __sync_fetch_and_and(&aPtr, aVal); }
 507 };
 508
 509 template<typename T, MemoryOrdering Order>
 510 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 511                                      public IntrinsicIncDec<T*>
 512 {
 513 };
 514
 515 } // namespace detail
 516 } // namespace mozilla
 517
 518 #elif defined(_MSC_VER)
 519
 520 /*
 521  * Windows comes with a full complement of atomic operations.
 522  * Unfortunately, most of those aren't available for Windows XP (even if
 523  * the compiler supports intrinsics for them), which is the oldest
 524  * version of Windows we support.  Therefore, we only provide operations
 525  * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
 526  * versions, we support 64-bit datatypes as well.
 527  */
 528
 529 #  include <intrin.h>
 530
 531 #  pragma intrinsic(_InterlockedExchangeAdd)
 532 #  pragma intrinsic(_InterlockedOr)
 533 #  pragma intrinsic(_InterlockedXor)
 534 #  pragma intrinsic(_InterlockedAnd)
 535 #  pragma intrinsic(_InterlockedExchange)
 536 #  pragma intrinsic(_InterlockedCompareExchange)
 537
 538 namespace mozilla {
 539 namespace detail {
 540
 541 #  if !defined(_M_IX86) && !defined(_M_X64)
 542      /*
 543       * The implementations below are optimized for x86ish systems.  You
 544       * will have to modify them if you are porting to Windows on a
 545       * different architecture.
 546       */
 547 #    error "Unknown CPU type"
 548 #  endif
 549
 550 /*
 551  * The PrimitiveIntrinsics template should define |Type|, the datatype of size
 552  * DataSize upon which we operate, and the following eight functions.
 553  *
 554  * static Type add(Type* aPtr, Type aVal);
 555  * static Type sub(Type* aPtr, Type aVal);
 556  * static Type or_(Type* aPtr, Type aVal);
 557  * static Type xor_(Type* aPtr, Type aVal);
 558  * static Type and_(Type* aPtr, Type aVal);
 559  *
 560  *   These functions perform the obvious operation on the value contained in
 561  *   |*aPtr| combined with |aVal| and return the value previously stored in
 562  *   |*aPtr|.
 563  *
 564  * static void store(Type* aPtr, Type aVal);
 565  *
 566  *   This function atomically stores |aVal| into |*aPtr| and must provide a full
 567  *   memory fence after the store to prevent compiler and hardware instruction
 568  *   reordering.  It should also act as a compiler barrier to prevent reads and
 569  *   writes from moving to after the store.
 570  *
 571  * static Type exchange(Type* aPtr, Type aVal);
 572  *
 573  *   This function atomically stores |aVal| into |*aPtr| and returns the
 574  *   previous contents of |*aPtr|;
 575  *
 576  * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal);
 577  *
 578  *   This function atomically performs the following operation:
 579  *
 580  *     if (*aPtr == aOldVal) {
 581  *       *aPtr = aNewVal;
 582  *       return true;
 583  *     } else {
 584  *       return false;
 585  *     }
 586  *
 587  */
 588 template<size_t DataSize> struct PrimitiveIntrinsics;
 589
 590 template<>
 591 struct PrimitiveIntrinsics<4>
 592 {
 593   typedef long Type;
 594
 595   static Type add(Type* aPtr, Type aVal)
 596   {
 597     return _InterlockedExchangeAdd(aPtr, aVal);
 598   }
 599
 600   static Type sub(Type* aPtr, Type aVal)
 601   {
 602     /*
 603      * _InterlockedExchangeSubtract isn't available before Windows 7,
 604      * and we must support Windows XP.
 605      */
 606     return _InterlockedExchangeAdd(aPtr, -aVal);
 607   }
 608
 609   static Type or_(Type* aPtr, Type aVal)
 610   {
 611     return _InterlockedOr(aPtr, aVal);
 612   }
 613
 614   static Type xor_(Type* aPtr, Type aVal)
 615   {
 616     return _InterlockedXor(aPtr, aVal);
 617   }
 618
 619   static Type and_(Type* aPtr, Type aVal)
 620   {
 621     return _InterlockedAnd(aPtr, aVal);
 622   }
 623
 624   static void store(Type* aPtr, Type aVal)
 625   {
 626     _InterlockedExchange(aPtr, aVal);
 627   }
 628
 629   static Type exchange(Type* aPtr, Type aVal)
 630   {
 631     return _InterlockedExchange(aPtr, aVal);
 632   }
 633
 634   static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
 635   {
 636     return _InterlockedCompareExchange(aPtr, aNewVal, aOldVal) == aOldVal;
 637   }
 638 };
 639
 640 #  if defined(_M_X64)
 641
 642 #    pragma intrinsic(_InterlockedExchangeAdd64)
 643 #    pragma intrinsic(_InterlockedOr64)
 644 #    pragma intrinsic(_InterlockedXor64)
 645 #    pragma intrinsic(_InterlockedAnd64)
 646 #    pragma intrinsic(_InterlockedExchange64)
 647 #    pragma intrinsic(_InterlockedCompareExchange64)
 648
 649 template <>
 650 struct PrimitiveIntrinsics<8>
 651 {
 652   typedef __int64 Type;
 653
 654   static Type add(Type* aPtr, Type aVal)
 655   {
 656     return _InterlockedExchangeAdd64(aPtr, aVal);
 657   }
 658
 659   static Type sub(Type* aPtr, Type aVal)
 660   {
 661     /*
 662      * There is no _InterlockedExchangeSubtract64.
 663      */
 664     return _InterlockedExchangeAdd64(aPtr, -aVal);
 665   }
 666
 667   static Type or_(Type* aPtr, Type aVal)
 668   {
 669     return _InterlockedOr64(aPtr, aVal);
 670   }
 671
 672   static Type xor_(Type* aPtr, Type aVal)
 673   {
 674     return _InterlockedXor64(aPtr, aVal);
 675   }
 676
 677   static Type and_(Type* aPtr, Type aVal)
 678   {
 679     return _InterlockedAnd64(aPtr, aVal);
 680   }
 681
 682   static void store(Type* aPtr, Type aVal)
 683   {
 684     _InterlockedExchange64(aPtr, aVal);
 685   }
 686
 687   static Type exchange(Type* aPtr, Type aVal)
 688   {
 689     return _InterlockedExchange64(aPtr, aVal);
 690   }
 691
 692   static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
 693   {
 694     return _InterlockedCompareExchange64(aPtr, aNewVal, aOldVal) == aOldVal;
 695   }
 696 };
 697
 698 #  endif
 699
 700 #  pragma intrinsic(_ReadWriteBarrier)
 701
 702 template<MemoryOrdering Order> struct Barrier;
 703
 704 /*
 705  * We do not provide an afterStore method in Barrier, as Relaxed and
 706  * ReleaseAcquire orderings do not require one, and the required barrier
 707  * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
 708  */
 709
 710 template<>
 711 struct Barrier<Relaxed>
 712 {
 713   static void beforeLoad() {}
 714   static void afterLoad() {}
 715   static void beforeStore() {}
 716 };
 717
 718 template<>
 719 struct Barrier<ReleaseAcquire>
 720 {
 721   static void beforeLoad() {}
 722   static void afterLoad() { _ReadWriteBarrier(); }
 723   static void beforeStore() { _ReadWriteBarrier(); }
 724 };
 725
 726 template<>
 727 struct Barrier<SequentiallyConsistent>
 728 {
 729   static void beforeLoad() { _ReadWriteBarrier(); }
 730   static void afterLoad() { _ReadWriteBarrier(); }
 731   static void beforeStore() { _ReadWriteBarrier(); }
 732 };
 733
 734 template<typename PrimType, typename T>
 735 struct CastHelper
 736 {
 737   static PrimType toPrimType(T aVal) { return static_cast<PrimType>(aVal); }
 738   static T fromPrimType(PrimType aVal) { return static_cast<T>(aVal); }
 739 };
 740
 741 template<typename PrimType, typename T>
 742 struct CastHelper<PrimType, T*>
 743 {
 744   static PrimType toPrimType(T* aVal) { return reinterpret_cast<PrimType>(aVal); }
 745   static T* fromPrimType(PrimType aVal) { return reinterpret_cast<T*>(aVal); }
 746 };
 747
 748 template<typename T>
 749 struct IntrinsicBase
 750 {
 751   typedef T ValueType;
 752   typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
 753   typedef typename Primitives::Type PrimType;
 754   static_assert(sizeof(PrimType) == sizeof(T),
 755                 "Selection of PrimitiveIntrinsics was wrong");
 756   typedef CastHelper<PrimType, T> Cast;
 757 };
 758
 759 template<typename T, MemoryOrdering Order>
 760 struct IntrinsicMemoryOps : public IntrinsicBase<T>
 761 {
 762   typedef typename IntrinsicBase<T>::ValueType ValueType;
 763   typedef typename IntrinsicBase<T>::Primitives Primitives;
 764   typedef typename IntrinsicBase<T>::PrimType PrimType;
 765   typedef typename IntrinsicBase<T>::Cast Cast;
 766
 767   static ValueType load(const ValueType& aPtr)
 768   {
 769     Barrier<Order>::beforeLoad();
 770     ValueType val = aPtr;
 771     Barrier<Order>::afterLoad();
 772     return val;
 773   }
 774
 775   static void store(ValueType& aPtr, ValueType aVal)
 776   {
 777     // For SequentiallyConsistent, Primitives::store() will generate the
 778     // proper memory fence.  Everything else just needs a barrier before
 779     // the store.
 780     if (Order == SequentiallyConsistent) {
 781       Primitives::store(reinterpret_cast<PrimType*>(&aPtr),
 782                         Cast::toPrimType(aVal));
 783     } else {
 784       Barrier<Order>::beforeStore();
 785       aPtr = aVal;
 786     }
 787   }
 788
 789   static ValueType exchange(ValueType& aPtr, ValueType aVal)
 790   {
 791     PrimType oldval =
 792       Primitives::exchange(reinterpret_cast<PrimType*>(&aPtr),
 793                            Cast::toPrimType(aVal));
 794     return Cast::fromPrimType(oldval);
 795   }
 796
 797   static bool compareExchange(ValueType& aPtr, ValueType aOldVal,
 798                               ValueType aNewVal)
 799   {
 800     return Primitives::compareExchange(reinterpret_cast<PrimType*>(&aPtr),
 801                                        Cast::toPrimType(aOldVal),
 802                                        Cast::toPrimType(aNewVal));
 803   }
 804 };
 805
 806 template<typename T>
 807 struct IntrinsicApplyHelper : public IntrinsicBase<T>
 808 {
 809   typedef typename IntrinsicBase<T>::ValueType ValueType;
 810   typedef typename IntrinsicBase<T>::PrimType PrimType;
 811   typedef typename IntrinsicBase<T>::Cast Cast;
 812   typedef PrimType (*BinaryOp)(PrimType*, PrimType);
 813   typedef PrimType (*UnaryOp)(PrimType*);
 814
 815   static ValueType applyBinaryFunction(BinaryOp aOp, ValueType& aPtr,
 816                                        ValueType aVal)
 817   {
 818     PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
 819     PrimType primTypeVal = Cast::toPrimType(aVal);
 820     return Cast::fromPrimType(aOp(primTypePtr, primTypeVal));
 821   }
 822
 823   static ValueType applyUnaryFunction(UnaryOp aOp, ValueType& aPtr)
 824   {
 825     PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
 826     return Cast::fromPrimType(aOp(primTypePtr));
 827   }
 828 };
 829
 830 template<typename T>
 831 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
 832 {
 833   typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
 834   typedef typename IntrinsicBase<T>::Primitives Primitives;
 835
 836   static ValueType add(ValueType& aPtr, ValueType aVal)
 837   {
 838     return applyBinaryFunction(&Primitives::add, aPtr, aVal);
 839   }
 840
 841   static ValueType sub(ValueType& aPtr, ValueType aVal)
 842   {
 843     return applyBinaryFunction(&Primitives::sub, aPtr, aVal);
 844   }
 845 };
 846
 847 template<typename T>
 848 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
 849 {
 850   typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
 851   typedef typename IntrinsicBase<T*>::Primitives Primitives;
 852
 853   static ValueType add(ValueType& aPtr, ptrdiff_t aAmount)
 854   {
 855     return applyBinaryFunction(&Primitives::add, aPtr,
 856                                (ValueType)(aAmount * sizeof(T)));
 857   }
 858
 859   static ValueType sub(ValueType& aPtr, ptrdiff_t aAmount)
 860   {
 861     return applyBinaryFunction(&Primitives::sub, aPtr,
 862                                (ValueType)(aAmount * sizeof(T)));
 863   }
 864 };
 865
 866 template<typename T>
 867 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 868 {
 869   typedef typename IntrinsicAddSub<T>::ValueType ValueType;
 870   static ValueType inc(ValueType& aPtr) { return add(aPtr, 1); }
 871   static ValueType dec(ValueType& aPtr) { return sub(aPtr, 1); }
 872 };
 873
 874 template<typename T, MemoryOrdering Order>
 875 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 876                           public IntrinsicIncDec<T>
 877 {
 878   typedef typename IntrinsicIncDec<T>::ValueType ValueType;
 879   typedef typename IntrinsicBase<T>::Primitives Primitives;
 880
 881   static ValueType or_(ValueType& aPtr, T aVal)
 882   {
 883     return applyBinaryFunction(&Primitives::or_, aPtr, aVal);
 884   }
 885
 886   static ValueType xor_(ValueType& aPtr, T aVal)
 887   {
 888     return applyBinaryFunction(&Primitives::xor_, aPtr, aVal);
 889   }
 890
 891   static ValueType and_(ValueType& aPtr, T aVal)
 892   {
 893     return applyBinaryFunction(&Primitives::and_, aPtr, aVal);
 894   }
 895 };
 896
 897 template<typename T, MemoryOrdering Order>
 898 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 899                                      public IntrinsicIncDec<T*>
 900 {
 901   typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
 902   // This is required to make us be able to build with MSVC10, for unknown
 903   // reasons.
 904   typedef typename IntrinsicBase<T*>::Primitives Primitives;
 905 };
 906
 907 } // namespace detail
 908 } // namespace mozilla
 909
 910 #else
 911 # error "Atomic compiler intrinsics are not supported on your platform"
 912 #endif
 913
 914 namespace mozilla {
 915
 916 namespace detail {
 917
 918 template<typename T, MemoryOrdering Order>
 919 class AtomicBase
 920 {
 921   // We only support 32-bit types on 32-bit Windows, which constrains our
 922   // implementation elsewhere.  But we support pointer-sized types everywhere.
 923   static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
 924                 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
 925
 926 protected:
 927   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 928   typename Intrinsics::ValueType mValue;
 929
 930 public:
 931   MOZ_CONSTEXPR AtomicBase() : mValue() {}
 932   explicit MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
 933
 934   // Note: we can't provide operator T() here because Atomic<bool> inherits
 935   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
 936   // operator T() here, it would cause errors when comparing Atomic<bool> with
 937   // a regular bool.
 938
 939   T operator=(T aVal)
 940   {
 941     Intrinsics::store(mValue, aVal);
 942     return aVal;
 943   }
 944
 945   /**
 946    * Performs an atomic swap operation.  aVal is stored and the previous
 947    * value of this variable is returned.
 948    */
 949   T exchange(T aVal)
 950   {
 951     return Intrinsics::exchange(mValue, aVal);
 952   }
 953
 954   /**
 955    * Performs an atomic compare-and-swap operation and returns true if it
 956    * succeeded. This is equivalent to atomically doing
 957    *
 958    *   if (mValue == aOldValue) {
 959    *     mValue = aNewValue;
 960    *     return true;
 961    *   } else {
 962    *     return false;
 963    *   }
 964    */
 965   bool compareExchange(T aOldValue, T aNewValue)
 966   {
 967     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 968   }
 969
 970 private:
 971   template<MemoryOrdering AnyOrder>
 972   AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
 973 };
 974
 975 template<typename T, MemoryOrdering Order>
 976 class AtomicBaseIncDec : public AtomicBase<T, Order>
 977 {
 978   typedef typename detail::AtomicBase<T, Order> Base;
 979
 980 public:
 981   MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
 982   explicit MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
 983
 984   using Base::operator=;
 985
 986   operator T() const { return Base::Intrinsics::load(Base::mValue); }
 987   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 988   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 989   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 990   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 991
 992 private:
 993   template<MemoryOrdering AnyOrder>
 994   AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
 995 };
 996
 997 } // namespace detail
 998
 999 /**
1000  * A wrapper for a type that enforces that all memory accesses are atomic.
1001  *
1002  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
1003  * its place.  Implementations for integral and pointer types are provided
1004  * below.
1005  *
1006  * Atomic accesses are sequentially consistent by default.  You should
1007  * use the default unless you are tall enough to ride the
1008  * memory-ordering roller coaster (if you're not sure, you aren't) and
1009  * you have a compelling reason to do otherwise.
1010  *
1011  * There is one exception to the case of atomic memory accesses: providing an
1012  * initial value of the atomic value is not guaranteed to be atomic.  This is a
1013  * deliberate design choice that enables static atomic variables to be declared
1014  * without introducing extra static constructors.
1015  */
1016 template<typename T,
1017          MemoryOrdering Order = SequentiallyConsistent,
1018          typename Enable = void>
1019 class Atomic;
1020
1021 /**
1022  * Atomic<T> implementation for integral types.
1023  *
1024  * In addition to atomic store and load operations, compound assignment and
1025  * increment/decrement operators are implemented which perform the
1026  * corresponding read-modify-write operation atomically.  Finally, an atomic
1027  * swap method is provided.
1028  */
1029 template<typename T, MemoryOrdering Order>
1030 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value &&
1031                        !IsSame<T, bool>::value>::Type>
1032   : public detail::AtomicBaseIncDec<T, Order>
1033 {
1034   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
1035
1036 public:
1037   MOZ_CONSTEXPR Atomic() : Base() {}
1038   explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1039
1040   using Base::operator=;
1041
1042   T operator+=(T aDelta)
1043   {
1044     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1045   }
1046
1047   T operator-=(T aDelta)
1048   {
1049     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1050   }
1051
1052   T operator|=(T aVal)
1053   {
1054     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
1055   }
1056
1057   T operator^=(T aVal)
1058   {
1059     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
1060   }
1061
1062   T operator&=(T aVal)
1063   {
1064     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
1065   }
1066
1067 private:
1068   Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1069 };
1070
1071 /**
1072  * Atomic<T> implementation for pointer types.
1073  *
1074  * An atomic compare-and-swap primitive for pointer variables is provided, as
1075  * are atomic increment and decement operators.  Also provided are the compound
1076  * assignment operators for addition and subtraction. Atomic swap (via
1077  * exchange()) is included as well.
1078  */
1079 template<typename T, MemoryOrdering Order>
1080 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
1081 {
1082   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
1083
1084 public:
1085   MOZ_CONSTEXPR Atomic() : Base() {}
1086   explicit MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
1087
1088   using Base::operator=;
1089
1090   T* operator+=(ptrdiff_t aDelta)
1091   {
1092     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1093   }
1094
1095   T* operator-=(ptrdiff_t aDelta)
1096   {
1097     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1098   }
1099
1100 private:
1101   Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
1102 };
1103
1104 /**
1105  * Atomic<T> implementation for enum types.
1106  *
1107  * The atomic store and load operations and the atomic swap method is provided.
1108  */
1109 template<typename T, MemoryOrdering Order>
1110 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
1111   : public detail::AtomicBase<T, Order>
1112 {
1113   typedef typename detail::AtomicBase<T, Order> Base;
1114
1115 public:
1116   MOZ_CONSTEXPR Atomic() : Base() {}
1117   explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1118
1119   operator T() const { return Base::Intrinsics::load(Base::mValue); }
1120
1121   using Base::operator=;
1122
1123 private:
1124   Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1125 };
1126
1127 /**
1128  * Atomic<T> implementation for boolean types.
1129  *
1130  * The atomic store and load operations and the atomic swap method is provided.
1131  *
1132  * Note:
1133  *
1134  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1135  *   bool and/or some implementations of std::atomic. This is allowed in
1136  *   [atomic.types.generic]p9.
1137  *
1138  * - It's not obvious whether the 8-bit atomic functions on Windows are always
1139  *   inlined or not. If they are not inlined, the corresponding functions in the
1140  *   runtime library are not available on Windows XP. This is why we implement
1141  *   Atomic<bool> with an underlying type of uint32_t.
1142  */
1143 template<MemoryOrdering Order>
1144 class Atomic<bool, Order>
1145   : protected detail::AtomicBase<uint32_t, Order>
1146 {
1147   typedef typename detail::AtomicBase<uint32_t, Order> Base;
1148
1149 public:
1150   MOZ_CONSTEXPR Atomic() : Base() {}
1151   explicit MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
1152
1153   // We provide boolean wrappers for the underlying AtomicBase methods.
1154   operator bool() const
1155   {
1156     return Base::Intrinsics::load(Base::mValue);
1157   }
1158
1159   bool operator=(bool aVal)
1160   {
1161     return Base::operator=(aVal);
1162   }
1163
1164   bool exchange(bool aVal)
1165   {
1166     return Base::exchange(aVal);
1167   }
1168
1169   bool compareExchange(bool aOldValue, bool aNewValue)
1170   {
1171     return Base::compareExchange(aOldValue, aNewValue);
1172   }
1173
1174 private:
1175   Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;
1176 };
1177
1178 } // namespace mozilla
1179
1180 #endif /* mozilla_Atomics_h */