mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Assertions.h"
  20 #include "mozilla/Attributes.h"
  21 #include "mozilla/Compiler.h"
  22 #include "mozilla/TypeTraits.h"
  23
  24 #include <stdint.h>
  25
  26 /*
  27  * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
  28  * does not have <atomic>.  So be sure to check for <atomic> support
  29  * along with C++0x support.
  30  */
  31 #if defined(__clang__) || defined(__GNUC__)
  32    /*
  33     * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
  34     * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
  35     * definitions for unspecialized std::atomic and causes linking errors.
  36     * Therefore, we require at least 4.7.0 for using libstdc++.
  37     */
  38 #  if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
  39 #    define MOZ_HAVE_CXX11_ATOMICS
  40 #  elif MOZ_USING_LIBCXX
  41 #    define MOZ_HAVE_CXX11_ATOMICS
  42 #  endif
  43 #elif defined(_MSC_VER) && _MSC_VER >= 1700
  44 #  if defined(DEBUG)
  45      /*
  46       * Provide our own failure code since we're having trouble linking to
  47       * std::_Debug_message (bug 982310).
  48       */
  49 #    define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")
  50 #  endif
  51 #  define MOZ_HAVE_CXX11_ATOMICS
  52 #endif
  53
  54 namespace mozilla {
  55
  56 /**
  57  * An enum of memory ordering possibilities for atomics.
  58  *
  59  * Memory ordering is the observable state of distinct values in memory.
  60  * (It's a separate concept from atomicity, which concerns whether an
  61  * operation can ever be observed in an intermediate state.  Don't
  62  * conflate the two!)  Given a sequence of operations in source code on
  63  * memory, it is *not* always the case that, at all times and on all
  64  * cores, those operations will appear to have occurred in that exact
  65  * sequence.  First, the compiler might reorder that sequence, if it
  66  * thinks another ordering will be more efficient.  Second, the CPU may
  67  * not expose so consistent a view of memory.  CPUs will often perform
  68  * their own instruction reordering, above and beyond that performed by
  69  * the compiler.  And each core has its own memory caches, and accesses
  70  * (reads and writes both) to "memory" may only resolve to out-of-date
  71  * cache entries -- not to the "most recently" performed operation in
  72  * some global sense.  Any access to a value that may be used by
  73  * multiple threads, potentially across multiple cores, must therefore
  74  * have a memory ordering imposed on it, for all code on all
  75  * threads/cores to have a sufficiently coherent worldview.
  76  *
  77  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  78  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  79  * detail on all this, including examples of how each mode works.
  80  *
  81  * Note that for simplicity and practicality, not all of the modes in
  82  * C++11 are supported.  The missing C++11 modes are either subsumed by
  83  * the modes we provide below, or not relevant for the CPUs we support
  84  * in Gecko.  These three modes are confusing enough as it is!
  85  */
  86 enum MemoryOrdering {
  87   /*
  88    * Relaxed ordering is the simplest memory ordering: none at all.
  89    * When the result of a write is observed, nothing may be inferred
  90    * about other memory.  Writes ostensibly performed "before" on the
  91    * writing thread may not yet be visible.  Writes performed "after" on
  92    * the writing thread may already be visible, if the compiler or CPU
  93    * reordered them.  (The latter can happen if reads and/or writes get
  94    * held up in per-processor caches.)  Relaxed ordering means
  95    * operations can always use cached values (as long as the actual
  96    * updates to atomic values actually occur, correctly, eventually), so
  97    * it's usually the fastest sort of atomic access.  For this reason,
  98    * *it's also the most dangerous kind of access*.
  99    *
 100    * Relaxed ordering is good for things like process-wide statistics
 101    * counters that don't need to be consistent with anything else, so
 102    * long as updates themselves are atomic.  (And so long as any
 103    * observations of that value can tolerate being out-of-date -- if you
 104    * need some sort of up-to-date value, you need some sort of other
 105    * synchronizing operation.)  It's *not* good for locks, mutexes,
 106    * reference counts, etc. that mediate access to other memory, or must
 107    * be observably consistent with other memory.
 108    *
 109    * x86 architectures don't take advantage of the optimization
 110    * opportunities that relaxed ordering permits.  Thus it's possible
 111    * that using relaxed ordering will "work" on x86 but fail elsewhere
 112    * (ARM, say, which *does* implement non-sequentially-consistent
 113    * relaxed ordering semantics).  Be extra-careful using relaxed
 114    * ordering if you can't easily test non-x86 architectures!
 115    */
 116   Relaxed,
 117
 118   /*
 119    * When an atomic value is updated with ReleaseAcquire ordering, and
 120    * that new value is observed with ReleaseAcquire ordering, prior
 121    * writes (atomic or not) are also observable.  What ReleaseAcquire
 122    * *doesn't* give you is any observable ordering guarantees for
 123    * ReleaseAcquire-ordered operations on different objects.  For
 124    * example, if there are two cores that each perform ReleaseAcquire
 125    * operations on separate objects, each core may or may not observe
 126    * the operations made by the other core.  The only way the cores can
 127    * be synchronized with ReleaseAcquire is if they both
 128    * ReleaseAcquire-access the same object.  This implies that you can't
 129    * necessarily describe some global total ordering of ReleaseAcquire
 130    * operations.
 131    *
 132    * ReleaseAcquire ordering is good for (as the name implies) atomic
 133    * operations on values controlling ownership of things: reference
 134    * counts, mutexes, and the like.  However, if you are thinking about
 135    * using these to implement your own locks or mutexes, you should take
 136    * a good, hard look at actual lock or mutex primitives first.
 137    */
 138   ReleaseAcquire,
 139
 140   /*
 141    * When an atomic value is updated with SequentiallyConsistent
 142    * ordering, all writes observable when the update is observed, just
 143    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 144    * ordering of SequentiallyConsistent operations *can* be described.
 145    * For example, if two cores perform SequentiallyConsistent operations
 146    * on separate objects, one core will observably perform its update
 147    * (and all previous operations will have completed), then the other
 148    * core will observably perform its update (and all previous
 149    * operations will have completed).  (Although those previous
 150    * operations aren't themselves ordered -- they could be intermixed,
 151    * or ordered if they occur on atomic values with ordering
 152    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 153    * ordering of atomic operations -- it's always as if one operation
 154    * happens, then another, then another, in some order -- and every
 155    * core observes updates to happen in that single order.  Because it
 156    * has the most synchronization requirements, operations ordered this
 157    * way also tend to be slowest.
 158    *
 159    * SequentiallyConsistent ordering can be desirable when multiple
 160    * threads observe objects, and they all have to agree on the
 161    * observable order of changes to them.  People expect
 162    * SequentiallyConsistent ordering, even if they shouldn't, when
 163    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 164    * the ordering of choice when designing lockless data structures.  If
 165    * you don't know what order to use, use this one.
 166    */
 167   SequentiallyConsistent,
 168 };
 169
 170 } // namespace mozilla
 171
 172 // Build up the underlying intrinsics.
 173 #ifdef MOZ_HAVE_CXX11_ATOMICS
 174
 175 #  include <atomic>
 176
 177 namespace mozilla {
 178 namespace detail {
 179
 180 /*
 181  * We provide CompareExchangeFailureOrder to work around a bug in some
 182  * versions of GCC's <atomic> header.  See bug 898491.
 183  */
 184 template<MemoryOrdering Order> struct AtomicOrderConstraints;
 185
 186 template<>
 187 struct AtomicOrderConstraints<Relaxed>
 188 {
 189   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 190   static const std::memory_order LoadOrder = std::memory_order_relaxed;
 191   static const std::memory_order StoreOrder = std::memory_order_relaxed;
 192   static const std::memory_order CompareExchangeFailureOrder =
 193     std::memory_order_relaxed;
 194 };
 195
 196 template<>
 197 struct AtomicOrderConstraints<ReleaseAcquire>
 198 {
 199   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 200   static const std::memory_order LoadOrder = std::memory_order_acquire;
 201   static const std::memory_order StoreOrder = std::memory_order_release;
 202   static const std::memory_order CompareExchangeFailureOrder =
 203     std::memory_order_acquire;
 204 };
 205
 206 template<>
 207 struct AtomicOrderConstraints<SequentiallyConsistent>
 208 {
 209   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 210   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 211   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 212   static const std::memory_order CompareExchangeFailureOrder =
 213     std::memory_order_seq_cst;
 214 };
 215
 216 template<typename T, MemoryOrdering Order>
 217 struct IntrinsicBase
 218 {
 219   typedef std::atomic<T> ValueType;
 220   typedef AtomicOrderConstraints<Order> OrderedOp;
 221 };
 222
 223 template<typename T, MemoryOrdering Order>
 224 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
 225 {
 226   typedef IntrinsicBase<T, Order> Base;
 227
 228   static T load(const typename Base::ValueType& aPtr)
 229   {
 230     return aPtr.load(Base::OrderedOp::LoadOrder);
 231   }
 232
 233   static void store(typename Base::ValueType& aPtr, T aVal)
 234   {
 235     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
 236   }
 237
 238   static T exchange(typename Base::ValueType& aPtr, T aVal)
 239   {
 240     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
 241   }
 242
 243   static bool compareExchange(typename Base::ValueType& aPtr,
 244                               T aOldVal, T aNewVal)
 245   {
 246     return aPtr.compare_exchange_strong(aOldVal, aNewVal,
 247                                         Base::OrderedOp::AtomicRMWOrder,
 248                                         Base::OrderedOp::CompareExchangeFailureOrder);
 249   }
 250 };
 251
 252 template<typename T, MemoryOrdering Order>
 253 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
 254 {
 255   typedef IntrinsicBase<T, Order> Base;
 256
 257   static T add(typename Base::ValueType& aPtr, T aVal)
 258   {
 259     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
 260   }
 261
 262   static T sub(typename Base::ValueType& aPtr, T aVal)
 263   {
 264     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
 265   }
 266 };
 267
 268 template<typename T, MemoryOrdering Order>
 269 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
 270 {
 271   typedef IntrinsicBase<T*, Order> Base;
 272
 273   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal)
 274   {
 275     return aPtr.fetch_add(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
 276   }
 277
 278   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal)
 279   {
 280     return aPtr.fetch_sub(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
 281   }
 282 private:
 283   /*
 284    * GCC 4.6's <atomic> header has a bug where adding X to an
 285    * atomic<T*> is not the same as adding X to a T*.  Hence the need
 286    * for this function to provide the correct addend.
 287    */
 288   static ptrdiff_t fixupAddend(ptrdiff_t aVal)
 289   {
 290 #if defined(__clang__) || defined(_MSC_VER)
 291     return aVal;
 292 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
 293     !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
 294     return aVal * sizeof(T);
 295 #else
 296     return aVal;
 297 #endif
 298   }
 299 };
 300
 301 template<typename T, MemoryOrdering Order>
 302 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
 303 {
 304   typedef IntrinsicBase<T, Order> Base;
 305
 306   static T inc(typename Base::ValueType& aPtr)
 307   {
 308     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
 309   }
 310
 311   static T dec(typename Base::ValueType& aPtr)
 312   {
 313     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
 314   }
 315 };
 316
 317 template<typename T, MemoryOrdering Order>
 318 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 319                           public IntrinsicIncDec<T, Order>
 320 {
 321   typedef IntrinsicBase<T, Order> Base;
 322
 323   static T or_(typename Base::ValueType& aPtr, T aVal)
 324   {
 325     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
 326   }
 327
 328   static T xor_(typename Base::ValueType& aPtr, T aVal)
 329   {
 330     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
 331   }
 332
 333   static T and_(typename Base::ValueType& aPtr, T aVal)
 334   {
 335     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
 336   }
 337 };
 338
 339 template<typename T, MemoryOrdering Order>
 340 struct AtomicIntrinsics<T*, Order>
 341   : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
 342 {
 343 };
 344
 345 } // namespace detail
 346 } // namespace mozilla
 347
 348 #elif defined(__GNUC__)
 349
 350 namespace mozilla {
 351 namespace detail {
 352
 353 /*
 354  * The __sync_* family of intrinsics is documented here:
 355  *
 356  * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
 357  *
 358  * While these intrinsics are deprecated in favor of the newer __atomic_*
 359  * family of intrincs:
 360  *
 361  * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
 362  *
 363  * any GCC version that supports the __atomic_* intrinsics will also support
 364  * the <atomic> header and so will be handled above.  We provide a version of
 365  * atomics using the __sync_* intrinsics to support older versions of GCC.
 366  *
 367  * All __sync_* intrinsics that we use below act as full memory barriers, for
 368  * both compiler and hardware reordering, except for __sync_lock_test_and_set,
 369  * which is a only an acquire barrier.  When we call __sync_lock_test_and_set,
 370  * we add a barrier above it as appropriate.
 371  */
 372
 373 template<MemoryOrdering Order> struct Barrier;
 374
 375 /*
 376  * Some processors (in particular, x86) don't require quite so many calls to
 377  * __sync_sychronize as our specializations of Barrier produce.  If
 378  * performance turns out to be an issue, defining these specializations
 379  * on a per-processor basis would be a good first tuning step.
 380  */
 381
 382 template<>
 383 struct Barrier<Relaxed>
 384 {
 385   static void beforeLoad() {}
 386   static void afterLoad() {}
 387   static void beforeStore() {}
 388   static void afterStore() {}
 389 };
 390
 391 template<>
 392 struct Barrier<ReleaseAcquire>
 393 {
 394   static void beforeLoad() {}
 395   static void afterLoad() { __sync_synchronize(); }
 396   static void beforeStore() { __sync_synchronize(); }
 397   static void afterStore() {}
 398 };
 399
 400 template<>
 401 struct Barrier<SequentiallyConsistent>
 402 {
 403   static void beforeLoad() { __sync_synchronize(); }
 404   static void afterLoad() { __sync_synchronize(); }
 405   static void beforeStore() { __sync_synchronize(); }
 406   static void afterStore() { __sync_synchronize(); }
 407 };
 408
 409 template<typename T, MemoryOrdering Order>
 410 struct IntrinsicMemoryOps
 411 {
 412   static T load(const T& aPtr)
 413   {
 414     Barrier<Order>::beforeLoad();
 415     T val = aPtr;
 416     Barrier<Order>::afterLoad();
 417     return val;
 418   }
 419
 420   static void store(T& aPtr, T aVal)
 421   {
 422     Barrier<Order>::beforeStore();
 423     aPtr = aVal;
 424     Barrier<Order>::afterStore();
 425   }
 426
 427   static T exchange(T& aPtr, T aVal)
 428   {
 429     // __sync_lock_test_and_set is only an acquire barrier; loads and stores
 430     // can't be moved up from after to before it, but they can be moved down
 431     // from before to after it.  We may want a stricter ordering, so we need
 432     // an explicit barrier.
 433     Barrier<Order>::beforeStore();
 434     return __sync_lock_test_and_set(&aPtr, aVal);
 435   }
 436
 437   static bool compareExchange(T& aPtr, T aOldVal, T aNewVal)
 438   {
 439     return __sync_bool_compare_and_swap(&aPtr, aOldVal, aNewVal);
 440   }
 441 };
 442
 443 template<typename T>
 444 struct IntrinsicAddSub
 445 {
 446   typedef T ValueType;
 447
 448   static T add(T& aPtr, T aVal)
 449   {
 450     return __sync_fetch_and_add(&aPtr, aVal);
 451   }
 452
 453   static T sub(T& aPtr, T aVal)
 454   {
 455     return __sync_fetch_and_sub(&aPtr, aVal);
 456   }
 457 };
 458
 459 template<typename T>
 460 struct IntrinsicAddSub<T*>
 461 {
 462   typedef T* ValueType;
 463
 464   /*
 465    * The reinterpret_casts are needed so that
 466    * __sync_fetch_and_{add,sub} will properly type-check.
 467    *
 468    * Also, these functions do not provide standard semantics for
 469    * pointer types, so we need to adjust the addend.
 470    */
 471   static ValueType add(ValueType& aPtr, ptrdiff_t aVal)
 472   {
 473     ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
 474     return __sync_fetch_and_add(&aPtr, amount);
 475   }
 476
 477   static ValueType sub(ValueType& aPtr, ptrdiff_t aVal)
 478   {
 479     ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
 480     return __sync_fetch_and_sub(&aPtr, amount);
 481   }
 482 };
 483
 484 template<typename T>
 485 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 486 {
 487   static T inc(T& aPtr) { return IntrinsicAddSub<T>::add(aPtr, 1); }
 488   static T dec(T& aPtr) { return IntrinsicAddSub<T>::sub(aPtr, 1); }
 489 };
 490
 491 template<typename T, MemoryOrdering Order>
 492 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 493                           public IntrinsicIncDec<T>
 494 {
 495   static T or_( T& aPtr, T aVal) { return __sync_fetch_and_or(&aPtr, aVal); }
 496   static T xor_(T& aPtr, T aVal) { return __sync_fetch_and_xor(&aPtr, aVal); }
 497   static T and_(T& aPtr, T aVal) { return __sync_fetch_and_and(&aPtr, aVal); }
 498 };
 499
 500 template<typename T, MemoryOrdering Order>
 501 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 502                                      public IntrinsicIncDec<T*>
 503 {
 504 };
 505
 506 } // namespace detail
 507 } // namespace mozilla
 508
 509 #elif defined(_MSC_VER)
 510
 511 /*
 512  * Windows comes with a full complement of atomic operations.
 513  * Unfortunately, most of those aren't available for Windows XP (even if
 514  * the compiler supports intrinsics for them), which is the oldest
 515  * version of Windows we support.  Therefore, we only provide operations
 516  * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
 517  * versions, we support 64-bit datatypes as well.
 518  */
 519
 520 #  include <intrin.h>
 521
 522 #  pragma intrinsic(_InterlockedExchangeAdd)
 523 #  pragma intrinsic(_InterlockedOr)
 524 #  pragma intrinsic(_InterlockedXor)
 525 #  pragma intrinsic(_InterlockedAnd)
 526 #  pragma intrinsic(_InterlockedExchange)
 527 #  pragma intrinsic(_InterlockedCompareExchange)
 528
 529 namespace mozilla {
 530 namespace detail {
 531
 532 #  if !defined(_M_IX86) && !defined(_M_X64)
 533      /*
 534       * The implementations below are optimized for x86ish systems.  You
 535       * will have to modify them if you are porting to Windows on a
 536       * different architecture.
 537       */
 538 #    error "Unknown CPU type"
 539 #  endif
 540
 541 /*
 542  * The PrimitiveIntrinsics template should define |Type|, the datatype of size
 543  * DataSize upon which we operate, and the following eight functions.
 544  *
 545  * static Type add(Type* aPtr, Type aVal);
 546  * static Type sub(Type* aPtr, Type aVal);
 547  * static Type or_(Type* aPtr, Type aVal);
 548  * static Type xor_(Type* aPtr, Type aVal);
 549  * static Type and_(Type* aPtr, Type aVal);
 550  *
 551  *   These functions perform the obvious operation on the value contained in
 552  *   |*aPtr| combined with |aVal| and return the value previously stored in
 553  *   |*aPtr|.
 554  *
 555  * static void store(Type* aPtr, Type aVal);
 556  *
 557  *   This function atomically stores |aVal| into |*aPtr| and must provide a full
 558  *   memory fence after the store to prevent compiler and hardware instruction
 559  *   reordering.  It should also act as a compiler barrier to prevent reads and
 560  *   writes from moving to after the store.
 561  *
 562  * static Type exchange(Type* aPtr, Type aVal);
 563  *
 564  *   This function atomically stores |aVal| into |*aPtr| and returns the
 565  *   previous contents of |*aPtr|;
 566  *
 567  * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal);
 568  *
 569  *   This function atomically performs the following operation:
 570  *
 571  *     if (*aPtr == aOldVal) {
 572  *       *aPtr = aNewVal;
 573  *       return true;
 574  *     } else {
 575  *       return false;
 576  *     }
 577  *
 578  */
 579 template<size_t DataSize> struct PrimitiveIntrinsics;
 580
 581 template<>
 582 struct PrimitiveIntrinsics<4>
 583 {
 584   typedef long Type;
 585
 586   static Type add(Type* aPtr, Type aVal)
 587   {
 588     return _InterlockedExchangeAdd(aPtr, aVal);
 589   }
 590
 591   static Type sub(Type* aPtr, Type aVal)
 592   {
 593     /*
 594      * _InterlockedExchangeSubtract isn't available before Windows 7,
 595      * and we must support Windows XP.
 596      */
 597     return _InterlockedExchangeAdd(aPtr, -aVal);
 598   }
 599
 600   static Type or_(Type* aPtr, Type aVal)
 601   {
 602     return _InterlockedOr(aPtr, aVal);
 603   }
 604
 605   static Type xor_(Type* aPtr, Type aVal)
 606   {
 607     return _InterlockedXor(aPtr, aVal);
 608   }
 609
 610   static Type and_(Type* aPtr, Type aVal)
 611   {
 612     return _InterlockedAnd(aPtr, aVal);
 613   }
 614
 615   static void store(Type* aPtr, Type aVal)
 616   {
 617     _InterlockedExchange(aPtr, aVal);
 618   }
 619
 620   static Type exchange(Type* aPtr, Type aVal)
 621   {
 622     return _InterlockedExchange(aPtr, aVal);
 623   }
 624
 625   static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
 626   {
 627     return _InterlockedCompareExchange(aPtr, aNewVal, aOldVal) == aOldVal;
 628   }
 629 };
 630
 631 #  if defined(_M_X64)
 632
 633 #    pragma intrinsic(_InterlockedExchangeAdd64)
 634 #    pragma intrinsic(_InterlockedOr64)
 635 #    pragma intrinsic(_InterlockedXor64)
 636 #    pragma intrinsic(_InterlockedAnd64)
 637 #    pragma intrinsic(_InterlockedExchange64)
 638 #    pragma intrinsic(_InterlockedCompareExchange64)
 639
 640 template <>
 641 struct PrimitiveIntrinsics<8>
 642 {
 643   typedef __int64 Type;
 644
 645   static Type add(Type* aPtr, Type aVal)
 646   {
 647     return _InterlockedExchangeAdd64(aPtr, aVal);
 648   }
 649
 650   static Type sub(Type* aPtr, Type aVal)
 651   {
 652     /*
 653      * There is no _InterlockedExchangeSubtract64.
 654      */
 655     return _InterlockedExchangeAdd64(aPtr, -aVal);
 656   }
 657
 658   static Type or_(Type* aPtr, Type aVal)
 659   {
 660     return _InterlockedOr64(aPtr, aVal);
 661   }
 662
 663   static Type xor_(Type* aPtr, Type aVal)
 664   {
 665     return _InterlockedXor64(aPtr, aVal);
 666   }
 667
 668   static Type and_(Type* aPtr, Type aVal)
 669   {
 670     return _InterlockedAnd64(aPtr, aVal);
 671   }
 672
 673   static void store(Type* aPtr, Type aVal)
 674   {
 675     _InterlockedExchange64(aPtr, aVal);
 676   }
 677
 678   static Type exchange(Type* aPtr, Type aVal)
 679   {
 680     return _InterlockedExchange64(aPtr, aVal);
 681   }
 682
 683   static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
 684   {
 685     return _InterlockedCompareExchange64(aPtr, aNewVal, aOldVal) == aOldVal;
 686   }
 687 };
 688
 689 #  endif
 690
 691 #  pragma intrinsic(_ReadWriteBarrier)
 692
 693 template<MemoryOrdering Order> struct Barrier;
 694
 695 /*
 696  * We do not provide an afterStore method in Barrier, as Relaxed and
 697  * ReleaseAcquire orderings do not require one, and the required barrier
 698  * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
 699  */
 700
 701 template<>
 702 struct Barrier<Relaxed>
 703 {
 704   static void beforeLoad() {}
 705   static void afterLoad() {}
 706   static void beforeStore() {}
 707 };
 708
 709 template<>
 710 struct Barrier<ReleaseAcquire>
 711 {
 712   static void beforeLoad() {}
 713   static void afterLoad() { _ReadWriteBarrier(); }
 714   static void beforeStore() { _ReadWriteBarrier(); }
 715 };
 716
 717 template<>
 718 struct Barrier<SequentiallyConsistent>
 719 {
 720   static void beforeLoad() { _ReadWriteBarrier(); }
 721   static void afterLoad() { _ReadWriteBarrier(); }
 722   static void beforeStore() { _ReadWriteBarrier(); }
 723 };
 724
 725 template<typename PrimType, typename T>
 726 struct CastHelper
 727 {
 728   static PrimType toPrimType(T aVal) { return static_cast<PrimType>(aVal); }
 729   static T fromPrimType(PrimType aVal) { return static_cast<T>(aVal); }
 730 };
 731
 732 template<typename PrimType, typename T>
 733 struct CastHelper<PrimType, T*>
 734 {
 735   static PrimType toPrimType(T* aVal) { return reinterpret_cast<PrimType>(aVal); }
 736   static T* fromPrimType(PrimType aVal) { return reinterpret_cast<T*>(aVal); }
 737 };
 738
 739 template<typename T>
 740 struct IntrinsicBase
 741 {
 742   typedef T ValueType;
 743   typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
 744   typedef typename Primitives::Type PrimType;
 745   static_assert(sizeof(PrimType) == sizeof(T),
 746                 "Selection of PrimitiveIntrinsics was wrong");
 747   typedef CastHelper<PrimType, T> Cast;
 748 };
 749
 750 template<typename T, MemoryOrdering Order>
 751 struct IntrinsicMemoryOps : public IntrinsicBase<T>
 752 {
 753   typedef typename IntrinsicBase<T>::ValueType ValueType;
 754   typedef typename IntrinsicBase<T>::Primitives Primitives;
 755   typedef typename IntrinsicBase<T>::PrimType PrimType;
 756   typedef typename IntrinsicBase<T>::Cast Cast;
 757
 758   static ValueType load(const ValueType& aPtr)
 759   {
 760     Barrier<Order>::beforeLoad();
 761     ValueType val = aPtr;
 762     Barrier<Order>::afterLoad();
 763     return val;
 764   }
 765
 766   static void store(ValueType& aPtr, ValueType aVal)
 767   {
 768     // For SequentiallyConsistent, Primitives::store() will generate the
 769     // proper memory fence.  Everything else just needs a barrier before
 770     // the store.
 771     if (Order == SequentiallyConsistent) {
 772       Primitives::store(reinterpret_cast<PrimType*>(&aPtr),
 773                         Cast::toPrimType(aVal));
 774     } else {
 775       Barrier<Order>::beforeStore();
 776       aPtr = aVal;
 777     }
 778   }
 779
 780   static ValueType exchange(ValueType& aPtr, ValueType aVal)
 781   {
 782     PrimType oldval =
 783       Primitives::exchange(reinterpret_cast<PrimType*>(&aPtr),
 784                            Cast::toPrimType(aVal));
 785     return Cast::fromPrimType(oldval);
 786   }
 787
 788   static bool compareExchange(ValueType& aPtr, ValueType aOldVal,
 789                               ValueType aNewVal)
 790   {
 791     return Primitives::compareExchange(reinterpret_cast<PrimType*>(&aPtr),
 792                                        Cast::toPrimType(aOldVal),
 793                                        Cast::toPrimType(aNewVal));
 794   }
 795 };
 796
 797 template<typename T>
 798 struct IntrinsicApplyHelper : public IntrinsicBase<T>
 799 {
 800   typedef typename IntrinsicBase<T>::ValueType ValueType;
 801   typedef typename IntrinsicBase<T>::PrimType PrimType;
 802   typedef typename IntrinsicBase<T>::Cast Cast;
 803   typedef PrimType (*BinaryOp)(PrimType*, PrimType);
 804   typedef PrimType (*UnaryOp)(PrimType*);
 805
 806   static ValueType applyBinaryFunction(BinaryOp aOp, ValueType& aPtr,
 807                                        ValueType aVal)
 808   {
 809     PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
 810     PrimType primTypeVal = Cast::toPrimType(aVal);
 811     return Cast::fromPrimType(aOp(primTypePtr, primTypeVal));
 812   }
 813
 814   static ValueType applyUnaryFunction(UnaryOp aOp, ValueType& aPtr)
 815   {
 816     PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
 817     return Cast::fromPrimType(aOp(primTypePtr));
 818   }
 819 };
 820
 821 template<typename T>
 822 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
 823 {
 824   typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
 825   typedef typename IntrinsicBase<T>::Primitives Primitives;
 826
 827   static ValueType add(ValueType& aPtr, ValueType aVal)
 828   {
 829     return applyBinaryFunction(&Primitives::add, aPtr, aVal);
 830   }
 831
 832   static ValueType sub(ValueType& aPtr, ValueType aVal)
 833   {
 834     return applyBinaryFunction(&Primitives::sub, aPtr, aVal);
 835   }
 836 };
 837
 838 template<typename T>
 839 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
 840 {
 841   typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
 842   typedef typename IntrinsicBase<T*>::Primitives Primitives;
 843
 844   static ValueType add(ValueType& aPtr, ptrdiff_t aAmount)
 845   {
 846     return applyBinaryFunction(&Primitives::add, aPtr,
 847                                (ValueType)(aAmount * sizeof(T)));
 848   }
 849
 850   static ValueType sub(ValueType& aPtr, ptrdiff_t aAmount)
 851   {
 852     return applyBinaryFunction(&Primitives::sub, aPtr,
 853                                (ValueType)(aAmount * sizeof(T)));
 854   }
 855 };
 856
 857 template<typename T>
 858 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 859 {
 860   typedef typename IntrinsicAddSub<T>::ValueType ValueType;
 861   static ValueType inc(ValueType& aPtr) { return add(aPtr, 1); }
 862   static ValueType dec(ValueType& aPtr) { return sub(aPtr, 1); }
 863 };
 864
 865 template<typename T, MemoryOrdering Order>
 866 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 867                           public IntrinsicIncDec<T>
 868 {
 869   typedef typename IntrinsicIncDec<T>::ValueType ValueType;
 870   typedef typename IntrinsicBase<T>::Primitives Primitives;
 871
 872   static ValueType or_(ValueType& aPtr, T aVal)
 873   {
 874     return applyBinaryFunction(&Primitives::or_, aPtr, aVal);
 875   }
 876
 877   static ValueType xor_(ValueType& aPtr, T aVal)
 878   {
 879     return applyBinaryFunction(&Primitives::xor_, aPtr, aVal);
 880   }
 881
 882   static ValueType and_(ValueType& aPtr, T aVal)
 883   {
 884     return applyBinaryFunction(&Primitives::and_, aPtr, aVal);
 885   }
 886 };
 887
 888 template<typename T, MemoryOrdering Order>
 889 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 890                                      public IntrinsicIncDec<T*>
 891 {
 892   typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
 893   // This is required to make us be able to build with MSVC10, for unknown
 894   // reasons.
 895   typedef typename IntrinsicBase<T*>::Primitives Primitives;
 896 };
 897
 898 } // namespace detail
 899 } // namespace mozilla
 900
 901 #else
 902 # error "Atomic compiler intrinsics are not supported on your platform"
 903 #endif
 904
 905 namespace mozilla {
 906
 907 namespace detail {
 908
 909 template<typename T, MemoryOrdering Order>
 910 class AtomicBase
 911 {
 912   // We only support 32-bit types on 32-bit Windows, which constrains our
 913   // implementation elsewhere.  But we support pointer-sized types everywhere.
 914   static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
 915                 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
 916
 917 protected:
 918   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 919   typename Intrinsics::ValueType mValue;
 920
 921 public:
 922   MOZ_CONSTEXPR AtomicBase() : mValue() {}
 923   explicit MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
 924
 925   // Note: we can't provide operator T() here because Atomic<bool> inherits
 926   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
 927   // operator T() here, it would cause errors when comparing Atomic<bool> with
 928   // a regular bool.
 929
 930   T operator=(T aVal)
 931   {
 932     Intrinsics::store(mValue, aVal);
 933     return aVal;
 934   }
 935
 936   /**
 937    * Performs an atomic swap operation.  aVal is stored and the previous
 938    * value of this variable is returned.
 939    */
 940   T exchange(T aVal)
 941   {
 942     return Intrinsics::exchange(mValue, aVal);
 943   }
 944
 945   /**
 946    * Performs an atomic compare-and-swap operation and returns true if it
 947    * succeeded. This is equivalent to atomically doing
 948    *
 949    *   if (mValue == aOldValue) {
 950    *     mValue = aNewValue;
 951    *     return true;
 952    *   } else {
 953    *     return false;
 954    *   }
 955    */
 956   bool compareExchange(T aOldValue, T aNewValue)
 957   {
 958     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 959   }
 960
 961 private:
 962   template<MemoryOrdering AnyOrder>
 963   AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
 964 };
 965
 966 template<typename T, MemoryOrdering Order>
 967 class AtomicBaseIncDec : public AtomicBase<T, Order>
 968 {
 969   typedef typename detail::AtomicBase<T, Order> Base;
 970
 971 public:
 972   MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
 973   explicit MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
 974
 975   using Base::operator=;
 976
 977   operator T() const { return Base::Intrinsics::load(Base::mValue); }
 978   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 979   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 980   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 981   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 982
 983 private:
 984   template<MemoryOrdering AnyOrder>
 985   AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
 986 };
 987
 988 } // namespace detail
 989
 990 /**
 991  * A wrapper for a type that enforces that all memory accesses are atomic.
 992  *
 993  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
 994  * its place.  Implementations for integral and pointer types are provided
 995  * below.
 996  *
 997  * Atomic accesses are sequentially consistent by default.  You should
 998  * use the default unless you are tall enough to ride the
 999  * memory-ordering roller coaster (if you're not sure, you aren't) and
1000  * you have a compelling reason to do otherwise.
1001  *
1002  * There is one exception to the case of atomic memory accesses: providing an
1003  * initial value of the atomic value is not guaranteed to be atomic.  This is a
1004  * deliberate design choice that enables static atomic variables to be declared
1005  * without introducing extra static constructors.
1006  */
1007 template<typename T,
1008          MemoryOrdering Order = SequentiallyConsistent,
1009          typename Enable = void>
1010 class Atomic;
1011
1012 /**
1013  * Atomic<T> implementation for integral types.
1014  *
1015  * In addition to atomic store and load operations, compound assignment and
1016  * increment/decrement operators are implemented which perform the
1017  * corresponding read-modify-write operation atomically.  Finally, an atomic
1018  * swap method is provided.
1019  */
1020 template<typename T, MemoryOrdering Order>
1021 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value &&
1022                        !IsSame<T, bool>::value>::Type>
1023   : public detail::AtomicBaseIncDec<T, Order>
1024 {
1025   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
1026
1027 public:
1028   MOZ_CONSTEXPR Atomic() : Base() {}
1029   explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1030
1031   using Base::operator=;
1032
1033   T operator+=(T aDelta)
1034   {
1035     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1036   }
1037
1038   T operator-=(T aDelta)
1039   {
1040     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1041   }
1042
1043   T operator|=(T aVal)
1044   {
1045     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
1046   }
1047
1048   T operator^=(T aVal)
1049   {
1050     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
1051   }
1052
1053   T operator&=(T aVal)
1054   {
1055     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
1056   }
1057
1058 private:
1059   Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1060 };
1061
1062 /**
1063  * Atomic<T> implementation for pointer types.
1064  *
1065  * An atomic compare-and-swap primitive for pointer variables is provided, as
1066  * are atomic increment and decement operators.  Also provided are the compound
1067  * assignment operators for addition and subtraction. Atomic swap (via
1068  * exchange()) is included as well.
1069  */
1070 template<typename T, MemoryOrdering Order>
1071 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
1072 {
1073   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
1074
1075 public:
1076   MOZ_CONSTEXPR Atomic() : Base() {}
1077   explicit MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
1078
1079   using Base::operator=;
1080
1081   T* operator+=(ptrdiff_t aDelta)
1082   {
1083     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1084   }
1085
1086   T* operator-=(ptrdiff_t aDelta)
1087   {
1088     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1089   }
1090
1091 private:
1092   Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
1093 };
1094
1095 /**
1096  * Atomic<T> implementation for enum types.
1097  *
1098  * The atomic store and load operations and the atomic swap method is provided.
1099  */
1100 template<typename T, MemoryOrdering Order>
1101 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
1102   : public detail::AtomicBase<T, Order>
1103 {
1104   typedef typename detail::AtomicBase<T, Order> Base;
1105
1106 public:
1107   MOZ_CONSTEXPR Atomic() : Base() {}
1108   explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1109
1110   operator T() const { return Base::Intrinsics::load(Base::mValue); }
1111
1112   using Base::operator=;
1113
1114 private:
1115   Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1116 };
1117
1118 /**
1119  * Atomic<T> implementation for boolean types.
1120  *
1121  * The atomic store and load operations and the atomic swap method is provided.
1122  *
1123  * Note:
1124  *
1125  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1126  *   bool and/or some implementations of std::atomic. This is allowed in
1127  *   [atomic.types.generic]p9.
1128  *
1129  * - It's not obvious whether the 8-bit atomic functions on Windows are always
1130  *   inlined or not. If they are not inlined, the corresponding functions in the
1131  *   runtime library are not available on Windows XP. This is why we implement
1132  *   Atomic<bool> with an underlying type of uint32_t.
1133  */
1134 template<MemoryOrdering Order>
1135 class Atomic<bool, Order>
1136   : protected detail::AtomicBase<uint32_t, Order>
1137 {
1138   typedef typename detail::AtomicBase<uint32_t, Order> Base;
1139
1140 public:
1141   MOZ_CONSTEXPR Atomic() : Base() {}
1142   explicit MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
1143
1144   // We provide boolean wrappers for the underlying AtomicBase methods.
1145   operator bool() const
1146   {
1147     return Base::Intrinsics::load(Base::mValue);
1148   }
1149
1150   bool operator=(bool aVal)
1151   {
1152     return Base::operator=(aVal);
1153   }
1154
1155   bool exchange(bool aVal)
1156   {
1157     return Base::exchange(aVal);
1158   }
1159
1160   bool compareExchange(bool aOldValue, bool aNewValue)
1161   {
1162     return Base::compareExchange(aOldValue, aNewValue);
1163   }
1164
1165 private:
1166   Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;
1167 };
1168
1169 } // namespace mozilla
1170
1171 #endif /* mozilla_Atomics_h */