mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Assertions.h"
  20 #include "mozilla/Attributes.h"
  21 #include "mozilla/Compiler.h"
  22 #include "mozilla/TypeTraits.h"
  23
  24 #include <stdint.h>
  25
  26 /*
  27  * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
  28  * does not have <atomic>.  So be sure to check for <atomic> support
  29  * along with C++0x support.
  30  */
  31 #if defined(__clang__) || defined(__GNUC__)
  32    /*
  33     * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
  34     * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
  35     * definitions for unspecialized std::atomic and causes linking errors.
  36     * Therefore, we require at least 4.7.0 for using libstdc++.
  37     *
  38     * libc++ <atomic> is only functional with clang.
  39     */
  40 #  if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
  41 #    define MOZ_HAVE_CXX11_ATOMICS
  42 #  elif MOZ_USING_LIBCXX && defined(__clang__)
  43 #    define MOZ_HAVE_CXX11_ATOMICS
  44 #  endif
  45 #elif defined(_MSC_VER)
  46 #  define MOZ_HAVE_CXX11_ATOMICS
  47 #endif
  48
  49 namespace mozilla {
  50
  51 /**
  52  * An enum of memory ordering possibilities for atomics.
  53  *
  54  * Memory ordering is the observable state of distinct values in memory.
  55  * (It's a separate concept from atomicity, which concerns whether an
  56  * operation can ever be observed in an intermediate state.  Don't
  57  * conflate the two!)  Given a sequence of operations in source code on
  58  * memory, it is *not* always the case that, at all times and on all
  59  * cores, those operations will appear to have occurred in that exact
  60  * sequence.  First, the compiler might reorder that sequence, if it
  61  * thinks another ordering will be more efficient.  Second, the CPU may
  62  * not expose so consistent a view of memory.  CPUs will often perform
  63  * their own instruction reordering, above and beyond that performed by
  64  * the compiler.  And each core has its own memory caches, and accesses
  65  * (reads and writes both) to "memory" may only resolve to out-of-date
  66  * cache entries -- not to the "most recently" performed operation in
  67  * some global sense.  Any access to a value that may be used by
  68  * multiple threads, potentially across multiple cores, must therefore
  69  * have a memory ordering imposed on it, for all code on all
  70  * threads/cores to have a sufficiently coherent worldview.
  71  *
  72  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  73  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  74  * detail on all this, including examples of how each mode works.
  75  *
  76  * Note that for simplicity and practicality, not all of the modes in
  77  * C++11 are supported.  The missing C++11 modes are either subsumed by
  78  * the modes we provide below, or not relevant for the CPUs we support
  79  * in Gecko.  These three modes are confusing enough as it is!
  80  */
  81 enum MemoryOrdering {
  82   /*
  83    * Relaxed ordering is the simplest memory ordering: none at all.
  84    * When the result of a write is observed, nothing may be inferred
  85    * about other memory.  Writes ostensibly performed "before" on the
  86    * writing thread may not yet be visible.  Writes performed "after" on
  87    * the writing thread may already be visible, if the compiler or CPU
  88    * reordered them.  (The latter can happen if reads and/or writes get
  89    * held up in per-processor caches.)  Relaxed ordering means
  90    * operations can always use cached values (as long as the actual
  91    * updates to atomic values actually occur, correctly, eventually), so
  92    * it's usually the fastest sort of atomic access.  For this reason,
  93    * *it's also the most dangerous kind of access*.
  94    *
  95    * Relaxed ordering is good for things like process-wide statistics
  96    * counters that don't need to be consistent with anything else, so
  97    * long as updates themselves are atomic.  (And so long as any
  98    * observations of that value can tolerate being out-of-date -- if you
  99    * need some sort of up-to-date value, you need some sort of other
 100    * synchronizing operation.)  It's *not* good for locks, mutexes,
 101    * reference counts, etc. that mediate access to other memory, or must
 102    * be observably consistent with other memory.
 103    *
 104    * x86 architectures don't take advantage of the optimization
 105    * opportunities that relaxed ordering permits.  Thus it's possible
 106    * that using relaxed ordering will "work" on x86 but fail elsewhere
 107    * (ARM, say, which *does* implement non-sequentially-consistent
 108    * relaxed ordering semantics).  Be extra-careful using relaxed
 109    * ordering if you can't easily test non-x86 architectures!
 110    */
 111   Relaxed,
 112
 113   /*
 114    * When an atomic value is updated with ReleaseAcquire ordering, and
 115    * that new value is observed with ReleaseAcquire ordering, prior
 116    * writes (atomic or not) are also observable.  What ReleaseAcquire
 117    * *doesn't* give you is any observable ordering guarantees for
 118    * ReleaseAcquire-ordered operations on different objects.  For
 119    * example, if there are two cores that each perform ReleaseAcquire
 120    * operations on separate objects, each core may or may not observe
 121    * the operations made by the other core.  The only way the cores can
 122    * be synchronized with ReleaseAcquire is if they both
 123    * ReleaseAcquire-access the same object.  This implies that you can't
 124    * necessarily describe some global total ordering of ReleaseAcquire
 125    * operations.
 126    *
 127    * ReleaseAcquire ordering is good for (as the name implies) atomic
 128    * operations on values controlling ownership of things: reference
 129    * counts, mutexes, and the like.  However, if you are thinking about
 130    * using these to implement your own locks or mutexes, you should take
 131    * a good, hard look at actual lock or mutex primitives first.
 132    */
 133   ReleaseAcquire,
 134
 135   /*
 136    * When an atomic value is updated with SequentiallyConsistent
 137    * ordering, all writes observable when the update is observed, just
 138    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 139    * ordering of SequentiallyConsistent operations *can* be described.
 140    * For example, if two cores perform SequentiallyConsistent operations
 141    * on separate objects, one core will observably perform its update
 142    * (and all previous operations will have completed), then the other
 143    * core will observably perform its update (and all previous
 144    * operations will have completed).  (Although those previous
 145    * operations aren't themselves ordered -- they could be intermixed,
 146    * or ordered if they occur on atomic values with ordering
 147    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 148    * ordering of atomic operations -- it's always as if one operation
 149    * happens, then another, then another, in some order -- and every
 150    * core observes updates to happen in that single order.  Because it
 151    * has the most synchronization requirements, operations ordered this
 152    * way also tend to be slowest.
 153    *
 154    * SequentiallyConsistent ordering can be desirable when multiple
 155    * threads observe objects, and they all have to agree on the
 156    * observable order of changes to them.  People expect
 157    * SequentiallyConsistent ordering, even if they shouldn't, when
 158    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 159    * the ordering of choice when designing lockless data structures.  If
 160    * you don't know what order to use, use this one.
 161    */
 162   SequentiallyConsistent,
 163 };
 164
 165 } // namespace mozilla
 166
 167 // Build up the underlying intrinsics.
 168 #ifdef MOZ_HAVE_CXX11_ATOMICS
 169
 170 #  include <atomic>
 171
 172 namespace mozilla {
 173 namespace detail {
 174
 175 /*
 176  * We provide CompareExchangeFailureOrder to work around a bug in some
 177  * versions of GCC's <atomic> header.  See bug 898491.
 178  */
 179 template<MemoryOrdering Order> struct AtomicOrderConstraints;
 180
 181 template<>
 182 struct AtomicOrderConstraints<Relaxed>
 183 {
 184   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 185   static const std::memory_order LoadOrder = std::memory_order_relaxed;
 186   static const std::memory_order StoreOrder = std::memory_order_relaxed;
 187   static const std::memory_order CompareExchangeFailureOrder =
 188     std::memory_order_relaxed;
 189 };
 190
 191 template<>
 192 struct AtomicOrderConstraints<ReleaseAcquire>
 193 {
 194   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 195   static const std::memory_order LoadOrder = std::memory_order_acquire;
 196   static const std::memory_order StoreOrder = std::memory_order_release;
 197   static const std::memory_order CompareExchangeFailureOrder =
 198     std::memory_order_acquire;
 199 };
 200
 201 template<>
 202 struct AtomicOrderConstraints<SequentiallyConsistent>
 203 {
 204   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 205   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 206   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 207   static const std::memory_order CompareExchangeFailureOrder =
 208     std::memory_order_seq_cst;
 209 };
 210
 211 template<typename T, MemoryOrdering Order>
 212 struct IntrinsicBase
 213 {
 214   typedef std::atomic<T> ValueType;
 215   typedef AtomicOrderConstraints<Order> OrderedOp;
 216 };
 217
 218 template<typename T, MemoryOrdering Order>
 219 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
 220 {
 221   typedef IntrinsicBase<T, Order> Base;
 222
 223   static T load(const typename Base::ValueType& aPtr)
 224   {
 225     return aPtr.load(Base::OrderedOp::LoadOrder);
 226   }
 227
 228   static void store(typename Base::ValueType& aPtr, T aVal)
 229   {
 230     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
 231   }
 232
 233   static T exchange(typename Base::ValueType& aPtr, T aVal)
 234   {
 235     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
 236   }
 237
 238   static bool compareExchange(typename Base::ValueType& aPtr,
 239                               T aOldVal, T aNewVal)
 240   {
 241     return aPtr.compare_exchange_strong(aOldVal, aNewVal,
 242                                         Base::OrderedOp::AtomicRMWOrder,
 243                                         Base::OrderedOp::CompareExchangeFailureOrder);
 244   }
 245 };
 246
 247 template<typename T, MemoryOrdering Order>
 248 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
 249 {
 250   typedef IntrinsicBase<T, Order> Base;
 251
 252   static T add(typename Base::ValueType& aPtr, T aVal)
 253   {
 254     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 255   }
 256
 257   static T sub(typename Base::ValueType& aPtr, T aVal)
 258   {
 259     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 260   }
 261 };
 262
 263 template<typename T, MemoryOrdering Order>
 264 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
 265 {
 266   typedef IntrinsicBase<T*, Order> Base;
 267
 268   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal)
 269   {
 270     return aPtr.fetch_add(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
 271   }
 272
 273   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal)
 274   {
 275     return aPtr.fetch_sub(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
 276   }
 277 private:
 278   /*
 279    * GCC 4.6's <atomic> header has a bug where adding X to an
 280    * atomic<T*> is not the same as adding X to a T*.  Hence the need
 281    * for this function to provide the correct addend.
 282    */
 283   static ptrdiff_t fixupAddend(ptrdiff_t aVal)
 284   {
 285 #if defined(__clang__) || defined(_MSC_VER)
 286     return aVal;
 287 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
 288     !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
 289     return aVal * sizeof(T);
 290 #else
 291     return aVal;
 292 #endif
 293   }
 294 };
 295
 296 template<typename T, MemoryOrdering Order>
 297 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
 298 {
 299   typedef IntrinsicBase<T, Order> Base;
 300
 301   static T inc(typename Base::ValueType& aPtr)
 302   {
 303     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
 304   }
 305
 306   static T dec(typename Base::ValueType& aPtr)
 307   {
 308     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
 309   }
 310 };
 311
 312 template<typename T, MemoryOrdering Order>
 313 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 314                           public IntrinsicIncDec<T, Order>
 315 {
 316   typedef IntrinsicBase<T, Order> Base;
 317
 318   static T or_(typename Base::ValueType& aPtr, T aVal)
 319   {
 320     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
 321   }
 322
 323   static T xor_(typename Base::ValueType& aPtr, T aVal)
 324   {
 325     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
 326   }
 327
 328   static T and_(typename Base::ValueType& aPtr, T aVal)
 329   {
 330     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
 331   }
 332 };
 333
 334 template<typename T, MemoryOrdering Order>
 335 struct AtomicIntrinsics<T*, Order>
 336   : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
 337 {
 338 };
 339
 340 } // namespace detail
 341 } // namespace mozilla
 342
 343 #elif defined(__GNUC__)
 344
 345 namespace mozilla {
 346 namespace detail {
 347
 348 /*
 349  * The __sync_* family of intrinsics is documented here:
 350  *
 351  * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
 352  *
 353  * While these intrinsics are deprecated in favor of the newer __atomic_*
 354  * family of intrincs:
 355  *
 356  * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
 357  *
 358  * any GCC version that supports the __atomic_* intrinsics will also support
 359  * the <atomic> header and so will be handled above.  We provide a version of
 360  * atomics using the __sync_* intrinsics to support older versions of GCC.
 361  *
 362  * All __sync_* intrinsics that we use below act as full memory barriers, for
 363  * both compiler and hardware reordering, except for __sync_lock_test_and_set,
 364  * which is a only an acquire barrier.  When we call __sync_lock_test_and_set,
 365  * we add a barrier above it as appropriate.
 366  */
 367
 368 template<MemoryOrdering Order> struct Barrier;
 369
 370 /*
 371  * Some processors (in particular, x86) don't require quite so many calls to
 372  * __sync_sychronize as our specializations of Barrier produce.  If
 373  * performance turns out to be an issue, defining these specializations
 374  * on a per-processor basis would be a good first tuning step.
 375  */
 376
 377 template<>
 378 struct Barrier<Relaxed>
 379 {
 380   static void beforeLoad() {}
 381   static void afterLoad() {}
 382   static void beforeStore() {}
 383   static void afterStore() {}
 384 };
 385
 386 template<>
 387 struct Barrier<ReleaseAcquire>
 388 {
 389   static void beforeLoad() {}
 390   static void afterLoad() { __sync_synchronize(); }
 391   static void beforeStore() { __sync_synchronize(); }
 392   static void afterStore() {}
 393 };
 394
 395 template<>
 396 struct Barrier<SequentiallyConsistent>
 397 {
 398   static void beforeLoad() { __sync_synchronize(); }
 399   static void afterLoad() { __sync_synchronize(); }
 400   static void beforeStore() { __sync_synchronize(); }
 401   static void afterStore() { __sync_synchronize(); }
 402 };
 403
 404 template<typename T, MemoryOrdering Order>
 405 struct IntrinsicMemoryOps
 406 {
 407   static T load(const T& aPtr)
 408   {
 409     Barrier<Order>::beforeLoad();
 410     T val = aPtr;
 411     Barrier<Order>::afterLoad();
 412     return val;
 413   }
 414
 415   static void store(T& aPtr, T aVal)
 416   {
 417     Barrier<Order>::beforeStore();
 418     aPtr = aVal;
 419     Barrier<Order>::afterStore();
 420   }
 421
 422   static T exchange(T& aPtr, T aVal)
 423   {
 424     // __sync_lock_test_and_set is only an acquire barrier; loads and stores
 425     // can't be moved up from after to before it, but they can be moved down
 426     // from before to after it.  We may want a stricter ordering, so we need
 427     // an explicit barrier.
 428     Barrier<Order>::beforeStore();
 429     return __sync_lock_test_and_set(&aPtr, aVal);
 430   }
 431
 432   static bool compareExchange(T& aPtr, T aOldVal, T aNewVal)
 433   {
 434     return __sync_bool_compare_and_swap(&aPtr, aOldVal, aNewVal);
 435   }
 436 };
 437
 438 template<typename T>
 439 struct IntrinsicAddSub
 440 {
 441   typedef T ValueType;
 442
 443   static T add(T& aPtr, T aVal)
 444   {
 445     return __sync_fetch_and_add(&aPtr, aVal);
 446   }
 447
 448   static T sub(T& aPtr, T aVal)
 449   {
 450     return __sync_fetch_and_sub(&aPtr, aVal);
 451   }
 452 };
 453
 454 template<typename T>
 455 struct IntrinsicAddSub<T*>
 456 {
 457   typedef T* ValueType;
 458
 459   /*
 460    * The reinterpret_casts are needed so that
 461    * __sync_fetch_and_{add,sub} will properly type-check.
 462    *
 463    * Also, these functions do not provide standard semantics for
 464    * pointer types, so we need to adjust the addend.
 465    */
 466   static ValueType add(ValueType& aPtr, ptrdiff_t aVal)
 467   {
 468     ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
 469     return __sync_fetch_and_add(&aPtr, amount);
 470   }
 471
 472   static ValueType sub(ValueType& aPtr, ptrdiff_t aVal)
 473   {
 474     ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
 475     return __sync_fetch_and_sub(&aPtr, amount);
 476   }
 477 };
 478
 479 template<typename T>
 480 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 481 {
 482   static T inc(T& aPtr) { return IntrinsicAddSub<T>::add(aPtr, 1); }
 483   static T dec(T& aPtr) { return IntrinsicAddSub<T>::sub(aPtr, 1); }
 484 };
 485
 486 template<typename T, MemoryOrdering Order>
 487 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 488                           public IntrinsicIncDec<T>
 489 {
 490   static T or_( T& aPtr, T aVal) { return __sync_fetch_and_or(&aPtr, aVal); }
 491   static T xor_(T& aPtr, T aVal) { return __sync_fetch_and_xor(&aPtr, aVal); }
 492   static T and_(T& aPtr, T aVal) { return __sync_fetch_and_and(&aPtr, aVal); }
 493 };
 494
 495 template<typename T, MemoryOrdering Order>
 496 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 497                                      public IntrinsicIncDec<T*>
 498 {
 499 };
 500
 501 } // namespace detail
 502 } // namespace mozilla
 503
 504 #elif defined(_MSC_VER)
 505
 506 /*
 507  * Windows comes with a full complement of atomic operations.
 508  * Unfortunately, most of those aren't available for Windows XP (even if
 509  * the compiler supports intrinsics for them), which is the oldest
 510  * version of Windows we support.  Therefore, we only provide operations
 511  * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
 512  * versions, we support 64-bit datatypes as well.
 513  */
 514
 515 #  include <intrin.h>
 516
 517 #  pragma intrinsic(_InterlockedExchangeAdd)
 518 #  pragma intrinsic(_InterlockedOr)
 519 #  pragma intrinsic(_InterlockedXor)
 520 #  pragma intrinsic(_InterlockedAnd)
 521 #  pragma intrinsic(_InterlockedExchange)
 522 #  pragma intrinsic(_InterlockedCompareExchange)
 523
 524 namespace mozilla {
 525 namespace detail {
 526
 527 #  if !defined(_M_IX86) && !defined(_M_X64)
 528      /*
 529       * The implementations below are optimized for x86ish systems.  You
 530       * will have to modify them if you are porting to Windows on a
 531       * different architecture.
 532       */
 533 #    error "Unknown CPU type"
 534 #  endif
 535
 536 /*
 537  * The PrimitiveIntrinsics template should define |Type|, the datatype of size
 538  * DataSize upon which we operate, and the following eight functions.
 539  *
 540  * static Type add(Type* aPtr, Type aVal);
 541  * static Type sub(Type* aPtr, Type aVal);
 542  * static Type or_(Type* aPtr, Type aVal);
 543  * static Type xor_(Type* aPtr, Type aVal);
 544  * static Type and_(Type* aPtr, Type aVal);
 545  *
 546  *   These functions perform the obvious operation on the value contained in
 547  *   |*aPtr| combined with |aVal| and return the value previously stored in
 548  *   |*aPtr|.
 549  *
 550  * static void store(Type* aPtr, Type aVal);
 551  *
 552  *   This function atomically stores |aVal| into |*aPtr| and must provide a full
 553  *   memory fence after the store to prevent compiler and hardware instruction
 554  *   reordering.  It should also act as a compiler barrier to prevent reads and
 555  *   writes from moving to after the store.
 556  *
 557  * static Type exchange(Type* aPtr, Type aVal);
 558  *
 559  *   This function atomically stores |aVal| into |*aPtr| and returns the
 560  *   previous contents of |*aPtr|;
 561  *
 562  * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal);
 563  *
 564  *   This function atomically performs the following operation:
 565  *
 566  *     if (*aPtr == aOldVal) {
 567  *       *aPtr = aNewVal;
 568  *       return true;
 569  *     } else {
 570  *       return false;
 571  *     }
 572  *
 573  */
 574 template<size_t DataSize> struct PrimitiveIntrinsics;
 575
 576 template<>
 577 struct PrimitiveIntrinsics<4>
 578 {
 579   typedef long Type;
 580
 581   static Type add(Type* aPtr, Type aVal)
 582   {
 583     return _InterlockedExchangeAdd(aPtr, aVal);
 584   }
 585
 586   static Type sub(Type* aPtr, Type aVal)
 587   {
 588     /*
 589      * _InterlockedExchangeSubtract isn't available before Windows 7,
 590      * and we must support Windows XP.
 591      */
 592     return _InterlockedExchangeAdd(aPtr, -aVal);
 593   }
 594
 595   static Type or_(Type* aPtr, Type aVal)
 596   {
 597     return _InterlockedOr(aPtr, aVal);
 598   }
 599
 600   static Type xor_(Type* aPtr, Type aVal)
 601   {
 602     return _InterlockedXor(aPtr, aVal);
 603   }
 604
 605   static Type and_(Type* aPtr, Type aVal)
 606   {
 607     return _InterlockedAnd(aPtr, aVal);
 608   }
 609
 610   static void store(Type* aPtr, Type aVal)
 611   {
 612     _InterlockedExchange(aPtr, aVal);
 613   }
 614
 615   static Type exchange(Type* aPtr, Type aVal)
 616   {
 617     return _InterlockedExchange(aPtr, aVal);
 618   }
 619
 620   static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
 621   {
 622     return _InterlockedCompareExchange(aPtr, aNewVal, aOldVal) == aOldVal;
 623   }
 624 };
 625
 626 #  if defined(_M_X64)
 627
 628 #    pragma intrinsic(_InterlockedExchangeAdd64)
 629 #    pragma intrinsic(_InterlockedOr64)
 630 #    pragma intrinsic(_InterlockedXor64)
 631 #    pragma intrinsic(_InterlockedAnd64)
 632 #    pragma intrinsic(_InterlockedExchange64)
 633 #    pragma intrinsic(_InterlockedCompareExchange64)
 634
 635 template <>
 636 struct PrimitiveIntrinsics<8>
 637 {
 638   typedef __int64 Type;
 639
 640   static Type add(Type* aPtr, Type aVal)
 641   {
 642     return _InterlockedExchangeAdd64(aPtr, aVal);
 643   }
 644
 645   static Type sub(Type* aPtr, Type aVal)
 646   {
 647     /*
 648      * There is no _InterlockedExchangeSubtract64.
 649      */
 650     return _InterlockedExchangeAdd64(aPtr, -aVal);
 651   }
 652
 653   static Type or_(Type* aPtr, Type aVal)
 654   {
 655     return _InterlockedOr64(aPtr, aVal);
 656   }
 657
 658   static Type xor_(Type* aPtr, Type aVal)
 659   {
 660     return _InterlockedXor64(aPtr, aVal);
 661   }
 662
 663   static Type and_(Type* aPtr, Type aVal)
 664   {
 665     return _InterlockedAnd64(aPtr, aVal);
 666   }
 667
 668   static void store(Type* aPtr, Type aVal)
 669   {
 670     _InterlockedExchange64(aPtr, aVal);
 671   }
 672
 673   static Type exchange(Type* aPtr, Type aVal)
 674   {
 675     return _InterlockedExchange64(aPtr, aVal);
 676   }
 677
 678   static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
 679   {
 680     return _InterlockedCompareExchange64(aPtr, aNewVal, aOldVal) == aOldVal;
 681   }
 682 };
 683
 684 #  endif
 685
 686 #  pragma intrinsic(_ReadWriteBarrier)
 687
 688 template<MemoryOrdering Order> struct Barrier;
 689
 690 /*
 691  * We do not provide an afterStore method in Barrier, as Relaxed and
 692  * ReleaseAcquire orderings do not require one, and the required barrier
 693  * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
 694  */
 695
 696 template<>
 697 struct Barrier<Relaxed>
 698 {
 699   static void beforeLoad() {}
 700   static void afterLoad() {}
 701   static void beforeStore() {}
 702 };
 703
 704 template<>
 705 struct Barrier<ReleaseAcquire>
 706 {
 707   static void beforeLoad() {}
 708   static void afterLoad() { _ReadWriteBarrier(); }
 709   static void beforeStore() { _ReadWriteBarrier(); }
 710 };
 711
 712 template<>
 713 struct Barrier<SequentiallyConsistent>
 714 {
 715   static void beforeLoad() { _ReadWriteBarrier(); }
 716   static void afterLoad() { _ReadWriteBarrier(); }
 717   static void beforeStore() { _ReadWriteBarrier(); }
 718 };
 719
 720 template<typename PrimType, typename T>
 721 struct CastHelper
 722 {
 723   static PrimType toPrimType(T aVal) { return static_cast<PrimType>(aVal); }
 724   static T fromPrimType(PrimType aVal) { return static_cast<T>(aVal); }
 725 };
 726
 727 template<typename PrimType, typename T>
 728 struct CastHelper<PrimType, T*>
 729 {
 730   static PrimType toPrimType(T* aVal) { return reinterpret_cast<PrimType>(aVal); }
 731   static T* fromPrimType(PrimType aVal) { return reinterpret_cast<T*>(aVal); }
 732 };
 733
 734 template<typename T>
 735 struct IntrinsicBase
 736 {
 737   typedef T ValueType;
 738   typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
 739   typedef typename Primitives::Type PrimType;
 740   static_assert(sizeof(PrimType) == sizeof(T),
 741                 "Selection of PrimitiveIntrinsics was wrong");
 742   typedef CastHelper<PrimType, T> Cast;
 743 };
 744
 745 template<typename T, MemoryOrdering Order>
 746 struct IntrinsicMemoryOps : public IntrinsicBase<T>
 747 {
 748   typedef typename IntrinsicBase<T>::ValueType ValueType;
 749   typedef typename IntrinsicBase<T>::Primitives Primitives;
 750   typedef typename IntrinsicBase<T>::PrimType PrimType;
 751   typedef typename IntrinsicBase<T>::Cast Cast;
 752
 753   static ValueType load(const ValueType& aPtr)
 754   {
 755     Barrier<Order>::beforeLoad();
 756     ValueType val = aPtr;
 757     Barrier<Order>::afterLoad();
 758     return val;
 759   }
 760
 761   static void store(ValueType& aPtr, ValueType aVal)
 762   {
 763     // For SequentiallyConsistent, Primitives::store() will generate the
 764     // proper memory fence.  Everything else just needs a barrier before
 765     // the store.
 766     if (Order == SequentiallyConsistent) {
 767       Primitives::store(reinterpret_cast<PrimType*>(&aPtr),
 768                         Cast::toPrimType(aVal));
 769     } else {
 770       Barrier<Order>::beforeStore();
 771       aPtr = aVal;
 772     }
 773   }
 774
 775   static ValueType exchange(ValueType& aPtr, ValueType aVal)
 776   {
 777     PrimType oldval =
 778       Primitives::exchange(reinterpret_cast<PrimType*>(&aPtr),
 779                            Cast::toPrimType(aVal));
 780     return Cast::fromPrimType(oldval);
 781   }
 782
 783   static bool compareExchange(ValueType& aPtr, ValueType aOldVal,
 784                               ValueType aNewVal)
 785   {
 786     return Primitives::compareExchange(reinterpret_cast<PrimType*>(&aPtr),
 787                                        Cast::toPrimType(aOldVal),
 788                                        Cast::toPrimType(aNewVal));
 789   }
 790 };
 791
 792 template<typename T>
 793 struct IntrinsicApplyHelper : public IntrinsicBase<T>
 794 {
 795   typedef typename IntrinsicBase<T>::ValueType ValueType;
 796   typedef typename IntrinsicBase<T>::PrimType PrimType;
 797   typedef typename IntrinsicBase<T>::Cast Cast;
 798   typedef PrimType (*BinaryOp)(PrimType*, PrimType);
 799   typedef PrimType (*UnaryOp)(PrimType*);
 800
 801   static ValueType applyBinaryFunction(BinaryOp aOp, ValueType& aPtr,
 802                                        ValueType aVal)
 803   {
 804     PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
 805     PrimType primTypeVal = Cast::toPrimType(aVal);
 806     return Cast::fromPrimType(aOp(primTypePtr, primTypeVal));
 807   }
 808
 809   static ValueType applyUnaryFunction(UnaryOp aOp, ValueType& aPtr)
 810   {
 811     PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
 812     return Cast::fromPrimType(aOp(primTypePtr));
 813   }
 814 };
 815
 816 template<typename T>
 817 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
 818 {
 819   typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
 820   typedef typename IntrinsicBase<T>::Primitives Primitives;
 821
 822   static ValueType add(ValueType& aPtr, ValueType aVal)
 823   {
 824     return applyBinaryFunction(&Primitives::add, aPtr, aVal);
 825   }
 826
 827   static ValueType sub(ValueType& aPtr, ValueType aVal)
 828   {
 829     return applyBinaryFunction(&Primitives::sub, aPtr, aVal);
 830   }
 831 };
 832
 833 template<typename T>
 834 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
 835 {
 836   typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
 837   typedef typename IntrinsicBase<T*>::Primitives Primitives;
 838
 839   static ValueType add(ValueType& aPtr, ptrdiff_t aAmount)
 840   {
 841     return applyBinaryFunction(&Primitives::add, aPtr,
 842                                (ValueType)(aAmount * sizeof(T)));
 843   }
 844
 845   static ValueType sub(ValueType& aPtr, ptrdiff_t aAmount)
 846   {
 847     return applyBinaryFunction(&Primitives::sub, aPtr,
 848                                (ValueType)(aAmount * sizeof(T)));
 849   }
 850 };
 851
 852 template<typename T>
 853 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 854 {
 855   typedef typename IntrinsicAddSub<T>::ValueType ValueType;
 856   static ValueType inc(ValueType& aPtr) { return add(aPtr, 1); }
 857   static ValueType dec(ValueType& aPtr) { return sub(aPtr, 1); }
 858 };
 859
 860 template<typename T, MemoryOrdering Order>
 861 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 862                           public IntrinsicIncDec<T>
 863 {
 864   typedef typename IntrinsicIncDec<T>::ValueType ValueType;
 865   typedef typename IntrinsicBase<T>::Primitives Primitives;
 866
 867   static ValueType or_(ValueType& aPtr, T aVal)
 868   {
 869     return applyBinaryFunction(&Primitives::or_, aPtr, aVal);
 870   }
 871
 872   static ValueType xor_(ValueType& aPtr, T aVal)
 873   {
 874     return applyBinaryFunction(&Primitives::xor_, aPtr, aVal);
 875   }
 876
 877   static ValueType and_(ValueType& aPtr, T aVal)
 878   {
 879     return applyBinaryFunction(&Primitives::and_, aPtr, aVal);
 880   }
 881 };
 882
 883 template<typename T, MemoryOrdering Order>
 884 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 885                                      public IntrinsicIncDec<T*>
 886 {
 887   typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
 888   // This is required to make us be able to build with MSVC10, for unknown
 889   // reasons.
 890   typedef typename IntrinsicBase<T*>::Primitives Primitives;
 891 };
 892
 893 } // namespace detail
 894 } // namespace mozilla
 895
 896 #else
 897 # error "Atomic compiler intrinsics are not supported on your platform"
 898 #endif
 899
 900 namespace mozilla {
 901
 902 namespace detail {
 903
 904 template<typename T, MemoryOrdering Order>
 905 class AtomicBase
 906 {
 907   // We only support 32-bit types on 32-bit Windows, which constrains our
 908   // implementation elsewhere.  But we support pointer-sized types everywhere.
 909   static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
 910                 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
 911
 912 protected:
 913   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 914   typename Intrinsics::ValueType mValue;
 915
 916 public:
 917   MOZ_CONSTEXPR AtomicBase() : mValue() {}
 918   explicit MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
 919
 920   // Note: we can't provide operator T() here because Atomic<bool> inherits
 921   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
 922   // operator T() here, it would cause errors when comparing Atomic<bool> with
 923   // a regular bool.
 924
 925   T operator=(T aVal)
 926   {
 927     Intrinsics::store(mValue, aVal);
 928     return aVal;
 929   }
 930
 931   /**
 932    * Performs an atomic swap operation.  aVal is stored and the previous
 933    * value of this variable is returned.
 934    */
 935   T exchange(T aVal)
 936   {
 937     return Intrinsics::exchange(mValue, aVal);
 938   }
 939
 940   /**
 941    * Performs an atomic compare-and-swap operation and returns true if it
 942    * succeeded. This is equivalent to atomically doing
 943    *
 944    *   if (mValue == aOldValue) {
 945    *     mValue = aNewValue;
 946    *     return true;
 947    *   } else {
 948    *     return false;
 949    *   }
 950    */
 951   bool compareExchange(T aOldValue, T aNewValue)
 952   {
 953     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 954   }
 955
 956 private:
 957   template<MemoryOrdering AnyOrder>
 958   AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) = delete;
 959 };
 960
 961 template<typename T, MemoryOrdering Order>
 962 class AtomicBaseIncDec : public AtomicBase<T, Order>
 963 {
 964   typedef typename detail::AtomicBase<T, Order> Base;
 965
 966 public:
 967   MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
 968   explicit MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
 969
 970   using Base::operator=;
 971
 972   operator T() const { return Base::Intrinsics::load(Base::mValue); }
 973   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 974   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 975   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 976   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 977
 978 private:
 979   template<MemoryOrdering AnyOrder>
 980   AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) = delete;
 981 };
 982
 983 } // namespace detail
 984
 985 /**
 986  * A wrapper for a type that enforces that all memory accesses are atomic.
 987  *
 988  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
 989  * its place.  Implementations for integral and pointer types are provided
 990  * below.
 991  *
 992  * Atomic accesses are sequentially consistent by default.  You should
 993  * use the default unless you are tall enough to ride the
 994  * memory-ordering roller coaster (if you're not sure, you aren't) and
 995  * you have a compelling reason to do otherwise.
 996  *
 997  * There is one exception to the case of atomic memory accesses: providing an
 998  * initial value of the atomic value is not guaranteed to be atomic.  This is a
 999  * deliberate design choice that enables static atomic variables to be declared
1000  * without introducing extra static constructors.
1001  */
1002 template<typename T,
1003          MemoryOrdering Order = SequentiallyConsistent,
1004          typename Enable = void>
1005 class Atomic;
1006
1007 /**
1008  * Atomic<T> implementation for integral types.
1009  *
1010  * In addition to atomic store and load operations, compound assignment and
1011  * increment/decrement operators are implemented which perform the
1012  * corresponding read-modify-write operation atomically.  Finally, an atomic
1013  * swap method is provided.
1014  */
1015 template<typename T, MemoryOrdering Order>
1016 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value &&
1017                        !IsSame<T, bool>::value>::Type>
1018   : public detail::AtomicBaseIncDec<T, Order>
1019 {
1020   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
1021
1022 public:
1023   MOZ_CONSTEXPR Atomic() : Base() {}
1024   explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1025
1026   using Base::operator=;
1027
1028   T operator+=(T aDelta)
1029   {
1030     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1031   }
1032
1033   T operator-=(T aDelta)
1034   {
1035     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1036   }
1037
1038   T operator|=(T aVal)
1039   {
1040     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
1041   }
1042
1043   T operator^=(T aVal)
1044   {
1045     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
1046   }
1047
1048   T operator&=(T aVal)
1049   {
1050     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
1051   }
1052
1053 private:
1054   Atomic(Atomic<T, Order>& aOther) = delete;
1055 };
1056
1057 /**
1058  * Atomic<T> implementation for pointer types.
1059  *
1060  * An atomic compare-and-swap primitive for pointer variables is provided, as
1061  * are atomic increment and decement operators.  Also provided are the compound
1062  * assignment operators for addition and subtraction. Atomic swap (via
1063  * exchange()) is included as well.
1064  */
1065 template<typename T, MemoryOrdering Order>
1066 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
1067 {
1068   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
1069
1070 public:
1071   MOZ_CONSTEXPR Atomic() : Base() {}
1072   explicit MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
1073
1074   using Base::operator=;
1075
1076   T* operator+=(ptrdiff_t aDelta)
1077   {
1078     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1079   }
1080
1081   T* operator-=(ptrdiff_t aDelta)
1082   {
1083     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1084   }
1085
1086 private:
1087   Atomic(Atomic<T*, Order>& aOther) = delete;
1088 };
1089
1090 /**
1091  * Atomic<T> implementation for enum types.
1092  *
1093  * The atomic store and load operations and the atomic swap method is provided.
1094  */
1095 template<typename T, MemoryOrdering Order>
1096 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
1097   : public detail::AtomicBase<T, Order>
1098 {
1099   typedef typename detail::AtomicBase<T, Order> Base;
1100
1101 public:
1102   MOZ_CONSTEXPR Atomic() : Base() {}
1103   explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1104
1105   operator T() const { return Base::Intrinsics::load(Base::mValue); }
1106
1107   using Base::operator=;
1108
1109 private:
1110   Atomic(Atomic<T, Order>& aOther) = delete;
1111 };
1112
1113 /**
1114  * Atomic<T> implementation for boolean types.
1115  *
1116  * The atomic store and load operations and the atomic swap method is provided.
1117  *
1118  * Note:
1119  *
1120  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1121  *   bool and/or some implementations of std::atomic. This is allowed in
1122  *   [atomic.types.generic]p9.
1123  *
1124  * - It's not obvious whether the 8-bit atomic functions on Windows are always
1125  *   inlined or not. If they are not inlined, the corresponding functions in the
1126  *   runtime library are not available on Windows XP. This is why we implement
1127  *   Atomic<bool> with an underlying type of uint32_t.
1128  */
1129 template<MemoryOrdering Order>
1130 class Atomic<bool, Order>
1131   : protected detail::AtomicBase<uint32_t, Order>
1132 {
1133   typedef typename detail::AtomicBase<uint32_t, Order> Base;
1134
1135 public:
1136   MOZ_CONSTEXPR Atomic() : Base() {}
1137   explicit MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
1138
1139   // We provide boolean wrappers for the underlying AtomicBase methods.
1140   operator bool() const
1141   {
1142     return Base::Intrinsics::load(Base::mValue);
1143   }
1144
1145   bool operator=(bool aVal)
1146   {
1147     return Base::operator=(aVal);
1148   }
1149
1150   bool exchange(bool aVal)
1151   {
1152     return Base::exchange(aVal);
1153   }
1154
1155   bool compareExchange(bool aOldValue, bool aNewValue)
1156   {
1157     return Base::compareExchange(aOldValue, aNewValue);
1158   }
1159
1160 private:
1161   Atomic(Atomic<bool, Order>& aOther) = delete;
1162 };
1163
1164 } // namespace mozilla
1165
1166 #endif /* mozilla_Atomics_h */