mfbt/Atomics.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Implements (almost always) lock-free atomic operations. The operations here
   9  * are a subset of that which can be found in C++11's <atomic> header, with a
  10  * different API to enforce consistent memory ordering constraints.
  11  *
  12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
  13  * sent a copy of this header and the C++11 standard.
  14  */
  15
  16 #ifndef mozilla_Atomics_h
  17 #define mozilla_Atomics_h
  18
  19 #include "mozilla/Assertions.h"
  20 #include "mozilla/Attributes.h"
  21 #include "mozilla/Compiler.h"
  22 #include "mozilla/TypeTraits.h"
  23
  24 #include <stdint.h>
  25
  26 /*
  27  * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
  28  * does not have <atomic>.  So be sure to check for <atomic> support
  29  * along with C++0x support.
  30  */
  31 #if defined(__clang__) || defined(__GNUC__)
  32    /*
  33     * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
  34     * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
  35     * definitions for unspecialized std::atomic and causes linking errors.
  36     * Therefore, we require at least 4.7.0 for using libstdc++.
  37     */
  38 #  if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
  39 #    define MOZ_HAVE_CXX11_ATOMICS
  40 #  elif MOZ_USING_LIBCXX
  41 #    define MOZ_HAVE_CXX11_ATOMICS
  42 #  endif
  43 #elif defined(_MSC_VER) && _MSC_VER >= 1700
  44 #  define MOZ_HAVE_CXX11_ATOMICS
  45 #endif
  46
  47 namespace mozilla {
  48
  49 /**
  50  * An enum of memory ordering possibilities for atomics.
  51  *
  52  * Memory ordering is the observable state of distinct values in memory.
  53  * (It's a separate concept from atomicity, which concerns whether an
  54  * operation can ever be observed in an intermediate state.  Don't
  55  * conflate the two!)  Given a sequence of operations in source code on
  56  * memory, it is *not* always the case that, at all times and on all
  57  * cores, those operations will appear to have occurred in that exact
  58  * sequence.  First, the compiler might reorder that sequence, if it
  59  * thinks another ordering will be more efficient.  Second, the CPU may
  60  * not expose so consistent a view of memory.  CPUs will often perform
  61  * their own instruction reordering, above and beyond that performed by
  62  * the compiler.  And each core has its own memory caches, and accesses
  63  * (reads and writes both) to "memory" may only resolve to out-of-date
  64  * cache entries -- not to the "most recently" performed operation in
  65  * some global sense.  Any access to a value that may be used by
  66  * multiple threads, potentially across multiple cores, must therefore
  67  * have a memory ordering imposed on it, for all code on all
  68  * threads/cores to have a sufficiently coherent worldview.
  69  *
  70  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
  71  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
  72  * detail on all this, including examples of how each mode works.
  73  *
  74  * Note that for simplicity and practicality, not all of the modes in
  75  * C++11 are supported.  The missing C++11 modes are either subsumed by
  76  * the modes we provide below, or not relevant for the CPUs we support
  77  * in Gecko.  These three modes are confusing enough as it is!
  78  */
  79 enum MemoryOrdering {
  80   /*
  81    * Relaxed ordering is the simplest memory ordering: none at all.
  82    * When the result of a write is observed, nothing may be inferred
  83    * about other memory.  Writes ostensibly performed "before" on the
  84    * writing thread may not yet be visible.  Writes performed "after" on
  85    * the writing thread may already be visible, if the compiler or CPU
  86    * reordered them.  (The latter can happen if reads and/or writes get
  87    * held up in per-processor caches.)  Relaxed ordering means
  88    * operations can always use cached values (as long as the actual
  89    * updates to atomic values actually occur, correctly, eventually), so
  90    * it's usually the fastest sort of atomic access.  For this reason,
  91    * *it's also the most dangerous kind of access*.
  92    *
  93    * Relaxed ordering is good for things like process-wide statistics
  94    * counters that don't need to be consistent with anything else, so
  95    * long as updates themselves are atomic.  (And so long as any
  96    * observations of that value can tolerate being out-of-date -- if you
  97    * need some sort of up-to-date value, you need some sort of other
  98    * synchronizing operation.)  It's *not* good for locks, mutexes,
  99    * reference counts, etc. that mediate access to other memory, or must
 100    * be observably consistent with other memory.
 101    *
 102    * x86 architectures don't take advantage of the optimization
 103    * opportunities that relaxed ordering permits.  Thus it's possible
 104    * that using relaxed ordering will "work" on x86 but fail elsewhere
 105    * (ARM, say, which *does* implement non-sequentially-consistent
 106    * relaxed ordering semantics).  Be extra-careful using relaxed
 107    * ordering if you can't easily test non-x86 architectures!
 108    */
 109   Relaxed,
 110   /*
 111    * When an atomic value is updated with ReleaseAcquire ordering, and
 112    * that new value is observed with ReleaseAcquire ordering, prior
 113    * writes (atomic or not) are also observable.  What ReleaseAcquire
 114    * *doesn't* give you is any observable ordering guarantees for
 115    * ReleaseAcquire-ordered operations on different objects.  For
 116    * example, if there are two cores that each perform ReleaseAcquire
 117    * operations on separate objects, each core may or may not observe
 118    * the operations made by the other core.  The only way the cores can
 119    * be synchronized with ReleaseAcquire is if they both
 120    * ReleaseAcquire-access the same object.  This implies that you can't
 121    * necessarily describe some global total ordering of ReleaseAcquire
 122    * operations.
 123    *
 124    * ReleaseAcquire ordering is good for (as the name implies) atomic
 125    * operations on values controlling ownership of things: reference
 126    * counts, mutexes, and the like.  However, if you are thinking about
 127    * using these to implement your own locks or mutexes, you should take
 128    * a good, hard look at actual lock or mutex primitives first.
 129    */
 130   ReleaseAcquire,
 131   /*
 132    * When an atomic value is updated with SequentiallyConsistent
 133    * ordering, all writes observable when the update is observed, just
 134    * as with ReleaseAcquire ordering.  But, furthermore, a global total
 135    * ordering of SequentiallyConsistent operations *can* be described.
 136    * For example, if two cores perform SequentiallyConsistent operations
 137    * on separate objects, one core will observably perform its update
 138    * (and all previous operations will have completed), then the other
 139    * core will observably perform its update (and all previous
 140    * operations will have completed).  (Although those previous
 141    * operations aren't themselves ordered -- they could be intermixed,
 142    * or ordered if they occur on atomic values with ordering
 143    * requirements.)  SequentiallyConsistent is the *simplest and safest*
 144    * ordering of atomic operations -- it's always as if one operation
 145    * happens, then another, then another, in some order -- and every
 146    * core observes updates to happen in that single order.  Because it
 147    * has the most synchronization requirements, operations ordered this
 148    * way also tend to be slowest.
 149    *
 150    * SequentiallyConsistent ordering can be desirable when multiple
 151    * threads observe objects, and they all have to agree on the
 152    * observable order of changes to them.  People expect
 153    * SequentiallyConsistent ordering, even if they shouldn't, when
 154    * writing code, atomic or otherwise.  SequentiallyConsistent is also
 155    * the ordering of choice when designing lockless data structures.  If
 156    * you don't know what order to use, use this one.
 157    */
 158   SequentiallyConsistent,
 159 };
 160
 161 } // namespace mozilla
 162
 163 // Build up the underlying intrinsics.
 164 #ifdef MOZ_HAVE_CXX11_ATOMICS
 165
 166 #  include <atomic>
 167
 168 namespace mozilla {
 169 namespace detail {
 170
 171 /*
 172  * We provide CompareExchangeFailureOrder to work around a bug in some
 173  * versions of GCC's <atomic> header.  See bug 898491.
 174  */
 175 template<MemoryOrdering Order> struct AtomicOrderConstraints;
 176
 177 template<>
 178 struct AtomicOrderConstraints<Relaxed>
 179 {
 180     static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
 181     static const std::memory_order LoadOrder = std::memory_order_relaxed;
 182     static const std::memory_order StoreOrder = std::memory_order_relaxed;
 183     static const std::memory_order CompareExchangeFailureOrder =
 184       std::memory_order_relaxed;
 185 };
 186
 187 template<>
 188 struct AtomicOrderConstraints<ReleaseAcquire>
 189 {
 190     static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
 191     static const std::memory_order LoadOrder = std::memory_order_acquire;
 192     static const std::memory_order StoreOrder = std::memory_order_release;
 193     static const std::memory_order CompareExchangeFailureOrder =
 194       std::memory_order_acquire;
 195 };
 196
 197 template<>
 198 struct AtomicOrderConstraints<SequentiallyConsistent>
 199 {
 200     static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
 201     static const std::memory_order LoadOrder = std::memory_order_seq_cst;
 202     static const std::memory_order StoreOrder = std::memory_order_seq_cst;
 203     static const std::memory_order CompareExchangeFailureOrder =
 204       std::memory_order_seq_cst;
 205 };
 206
 207 template<typename T, MemoryOrdering Order>
 208 struct IntrinsicBase
 209 {
 210     typedef std::atomic<T> ValueType;
 211     typedef AtomicOrderConstraints<Order> OrderedOp;
 212 };
 213
 214 template<typename T, MemoryOrdering Order>
 215 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
 216 {
 217     typedef IntrinsicBase<T, Order> Base;
 218     static T load(const typename Base::ValueType& ptr) {
 219       return ptr.load(Base::OrderedOp::LoadOrder);
 220     }
 221     static void store(typename Base::ValueType& ptr, T val) {
 222       ptr.store(val, Base::OrderedOp::StoreOrder);
 223     }
 224     static T exchange(typename Base::ValueType& ptr, T val) {
 225       return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder);
 226     }
 227     static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) {
 228       return ptr.compare_exchange_strong(oldVal, newVal,
 229                                          Base::OrderedOp::AtomicRMWOrder,
 230                                          Base::OrderedOp::CompareExchangeFailureOrder);
 231     }
 232 };
 233
 234 template<typename T, MemoryOrdering Order>
 235 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
 236 {
 237     typedef IntrinsicBase<T, Order> Base;
 238     static T add(typename Base::ValueType& ptr, T val) {
 239       return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder);
 240     }
 241     static T sub(typename Base::ValueType& ptr, T val) {
 242       return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder);
 243     }
 244 };
 245
 246 template<typename T, MemoryOrdering Order>
 247 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
 248 {
 249     typedef IntrinsicBase<T*, Order> Base;
 250     static T* add(typename Base::ValueType& ptr, ptrdiff_t val) {
 251       return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
 252     }
 253     static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) {
 254       return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
 255     }
 256   private:
 257     /*
 258      * GCC 4.6's <atomic> header has a bug where adding X to an
 259      * atomic<T*> is not the same as adding X to a T*.  Hence the need
 260      * for this function to provide the correct addend.
 261      */
 262     static ptrdiff_t fixupAddend(ptrdiff_t val) {
 263 #if defined(__clang__) || defined(_MSC_VER)
 264       return val;
 265 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
 266       !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
 267       return val * sizeof(T);
 268 #else
 269       return val;
 270 #endif
 271     }
 272 };
 273
 274 template<typename T, MemoryOrdering Order>
 275 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
 276 {
 277     typedef IntrinsicBase<T, Order> Base;
 278     static T inc(typename Base::ValueType& ptr) {
 279       return IntrinsicAddSub<T, Order>::add(ptr, 1);
 280     }
 281     static T dec(typename Base::ValueType& ptr) {
 282       return IntrinsicAddSub<T, Order>::sub(ptr, 1);
 283     }
 284 };
 285
 286 template<typename T, MemoryOrdering Order>
 287 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 288                           public IntrinsicIncDec<T, Order>
 289 {
 290     typedef IntrinsicBase<T, Order> Base;
 291     static T or_(typename Base::ValueType& ptr, T val) {
 292       return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder);
 293     }
 294     static T xor_(typename Base::ValueType& ptr, T val) {
 295       return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder);
 296     }
 297     static T and_(typename Base::ValueType& ptr, T val) {
 298       return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder);
 299     }
 300 };
 301
 302 template<typename T, MemoryOrdering Order>
 303 struct AtomicIntrinsics<T*, Order>
 304   : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
 305 {
 306 };
 307
 308 } // namespace detail
 309 } // namespace mozilla
 310
 311 #elif defined(__GNUC__)
 312
 313 namespace mozilla {
 314 namespace detail {
 315
 316 /*
 317  * The __sync_* family of intrinsics is documented here:
 318  *
 319  * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
 320  *
 321  * While these intrinsics are deprecated in favor of the newer __atomic_*
 322  * family of intrincs:
 323  *
 324  * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
 325  *
 326  * any GCC version that supports the __atomic_* intrinsics will also support
 327  * the <atomic> header and so will be handled above.  We provide a version of
 328  * atomics using the __sync_* intrinsics to support older versions of GCC.
 329  *
 330  * All __sync_* intrinsics that we use below act as full memory barriers, for
 331  * both compiler and hardware reordering, except for __sync_lock_test_and_set,
 332  * which is a only an acquire barrier.  When we call __sync_lock_test_and_set,
 333  * we add a barrier above it as appropriate.
 334  */
 335
 336 template<MemoryOrdering Order> struct Barrier;
 337
 338 /*
 339  * Some processors (in particular, x86) don't require quite so many calls to
 340  * __sync_sychronize as our specializations of Barrier produce.  If
 341  * performance turns out to be an issue, defining these specializations
 342  * on a per-processor basis would be a good first tuning step.
 343  */
 344
 345 template<>
 346 struct Barrier<Relaxed>
 347 {
 348     static void beforeLoad() {}
 349     static void afterLoad() {}
 350     static void beforeStore() {}
 351     static void afterStore() {}
 352 };
 353
 354 template<>
 355 struct Barrier<ReleaseAcquire>
 356 {
 357     static void beforeLoad() {}
 358     static void afterLoad() { __sync_synchronize(); }
 359     static void beforeStore() { __sync_synchronize(); }
 360     static void afterStore() {}
 361 };
 362
 363 template<>
 364 struct Barrier<SequentiallyConsistent>
 365 {
 366     static void beforeLoad() { __sync_synchronize(); }
 367     static void afterLoad() { __sync_synchronize(); }
 368     static void beforeStore() { __sync_synchronize(); }
 369     static void afterStore() { __sync_synchronize(); }
 370 };
 371
 372 template<typename T, MemoryOrdering Order>
 373 struct IntrinsicMemoryOps
 374 {
 375     static T load(const T& ptr) {
 376       Barrier<Order>::beforeLoad();
 377       T val = ptr;
 378       Barrier<Order>::afterLoad();
 379       return val;
 380     }
 381     static void store(T& ptr, T val) {
 382       Barrier<Order>::beforeStore();
 383       ptr = val;
 384       Barrier<Order>::afterStore();
 385     }
 386     static T exchange(T& ptr, T val) {
 387       // __sync_lock_test_and_set is only an acquire barrier; loads and stores
 388       // can't be moved up from after to before it, but they can be moved down
 389       // from before to after it.  We may want a stricter ordering, so we need
 390       // an explicit barrier.
 391
 392       Barrier<Order>::beforeStore();
 393       return __sync_lock_test_and_set(&ptr, val);
 394     }
 395     static bool compareExchange(T& ptr, T oldVal, T newVal) {
 396       return __sync_bool_compare_and_swap(&ptr, oldVal, newVal);
 397     }
 398 };
 399
 400 template<typename T>
 401 struct IntrinsicAddSub
 402 {
 403     typedef T ValueType;
 404     static T add(T& ptr, T val) {
 405       return __sync_fetch_and_add(&ptr, val);
 406     }
 407     static T sub(T& ptr, T val) {
 408       return __sync_fetch_and_sub(&ptr, val);
 409     }
 410 };
 411
 412 template<typename T>
 413 struct IntrinsicAddSub<T*>
 414 {
 415     typedef T* ValueType;
 416     /*
 417      * The reinterpret_casts are needed so that
 418      * __sync_fetch_and_{add,sub} will properly type-check.
 419      *
 420      * Also, these functions do not provide standard semantics for
 421      * pointer types, so we need to adjust the addend.
 422      */
 423     static ValueType add(ValueType& ptr, ptrdiff_t val) {
 424       ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
 425       return __sync_fetch_and_add(&ptr, amount);
 426     }
 427     static ValueType sub(ValueType& ptr, ptrdiff_t val) {
 428       ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
 429       return __sync_fetch_and_sub(&ptr, amount);
 430     }
 431 };
 432
 433 template<typename T>
 434 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 435 {
 436     static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); }
 437     static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); }
 438 };
 439
 440 template<typename T, MemoryOrdering Order>
 441 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 442                           public IntrinsicIncDec<T>
 443 {
 444     static T or_(T& ptr, T val) {
 445       return __sync_fetch_and_or(&ptr, val);
 446     }
 447     static T xor_(T& ptr, T val) {
 448       return __sync_fetch_and_xor(&ptr, val);
 449     }
 450     static T and_(T& ptr, T val) {
 451       return __sync_fetch_and_and(&ptr, val);
 452     }
 453 };
 454
 455 template<typename T, MemoryOrdering Order>
 456 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 457                                      public IntrinsicIncDec<T*>
 458 {
 459 };
 460
 461 } // namespace detail
 462 } // namespace mozilla
 463
 464 #elif defined(_MSC_VER)
 465
 466 /*
 467  * Windows comes with a full complement of atomic operations.
 468  * Unfortunately, most of those aren't available for Windows XP (even if
 469  * the compiler supports intrinsics for them), which is the oldest
 470  * version of Windows we support.  Therefore, we only provide operations
 471  * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
 472  * versions, we support 64-bit datatypes as well.
 473  *
 474  * To avoid namespace pollution issues, we declare whatever functions we
 475  * need ourselves.
 476  */
 477
 478 extern "C" {
 479 long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value);
 480 long __cdecl _InterlockedOr(long volatile* dst, long value);
 481 long __cdecl _InterlockedXor(long volatile* dst, long value);
 482 long __cdecl _InterlockedAnd(long volatile* dst, long value);
 483 long __cdecl _InterlockedExchange(long volatile *dst, long value);
 484 long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal);
 485 }
 486
 487 #  pragma intrinsic(_InterlockedExchangeAdd)
 488 #  pragma intrinsic(_InterlockedOr)
 489 #  pragma intrinsic(_InterlockedXor)
 490 #  pragma intrinsic(_InterlockedAnd)
 491 #  pragma intrinsic(_InterlockedExchange)
 492 #  pragma intrinsic(_InterlockedCompareExchange)
 493
 494 namespace mozilla {
 495 namespace detail {
 496
 497 #  if !defined(_M_IX86) && !defined(_M_X64)
 498      /*
 499       * The implementations below are optimized for x86ish systems.  You
 500       * will have to modify them if you are porting to Windows on a
 501       * different architecture.
 502       */
 503 #    error "Unknown CPU type"
 504 #  endif
 505
 506 /*
 507  * The PrimitiveIntrinsics template should define |Type|, the datatype of size
 508  * DataSize upon which we operate, and the following eight functions.
 509  *
 510  * static Type add(Type* ptr, Type val);
 511  * static Type sub(Type* ptr, Type val);
 512  * static Type or_(Type* ptr, Type val);
 513  * static Type xor_(Type* ptr, Type val);
 514  * static Type and_(Type* ptr, Type val);
 515  *
 516  *   These functions perform the obvious operation on the value contained in
 517  *   |*ptr| combined with |val| and return the value previously stored in
 518  *   |*ptr|.
 519  *
 520  * static void store(Type* ptr, Type val);
 521  *
 522  *   This function atomically stores |val| into |*ptr| and must provide a full
 523  *   memory fence after the store to prevent compiler and hardware instruction
 524  *   reordering.  It should also act as a compiler barrier to prevent reads and
 525  *   writes from moving to after the store.
 526  *
 527  * static Type exchange(Type* ptr, Type val);
 528  *
 529  *   This function atomically stores |val| into |*ptr| and returns the previous
 530  *   contents of *ptr;
 531  *
 532  * static bool compareExchange(Type* ptr, Type oldVal, Type newVal);
 533  *
 534  *   This function atomically performs the following operation:
 535  *
 536  *     if (*ptr == oldVal) {
 537  *       *ptr = newVal;
 538  *       return true;
 539  *     } else {
 540  *       return false;
 541  *     }
 542  *
 543  */
 544 template<size_t DataSize> struct PrimitiveIntrinsics;
 545
 546 template<>
 547 struct PrimitiveIntrinsics<4>
 548 {
 549     typedef long Type;
 550
 551     static Type add(Type* ptr, Type val) {
 552       return _InterlockedExchangeAdd(ptr, val);
 553     }
 554     static Type sub(Type* ptr, Type val) {
 555       /*
 556        * _InterlockedExchangeSubtract isn't available before Windows 7,
 557        * and we must support Windows XP.
 558        */
 559       return _InterlockedExchangeAdd(ptr, -val);
 560     }
 561     static Type or_(Type* ptr, Type val) {
 562       return _InterlockedOr(ptr, val);
 563     }
 564     static Type xor_(Type* ptr, Type val) {
 565       return _InterlockedXor(ptr, val);
 566     }
 567     static Type and_(Type* ptr, Type val) {
 568       return _InterlockedAnd(ptr, val);
 569     }
 570     static void store(Type* ptr, Type val) {
 571       _InterlockedExchange(ptr, val);
 572     }
 573     static Type exchange(Type* ptr, Type val) {
 574       return _InterlockedExchange(ptr, val);
 575     }
 576     static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
 577       return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal;
 578     }
 579 };
 580
 581 #  if defined(_M_X64)
 582
 583 extern "C" {
 584 long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst,
 585                                             long long value);
 586 long long __cdecl _InterlockedOr64(long long volatile* dst,
 587                                    long long value);
 588 long long __cdecl _InterlockedXor64(long long volatile* dst,
 589                                     long long value);
 590 long long __cdecl _InterlockedAnd64(long long volatile* dst,
 591                                     long long value);
 592 long long __cdecl _InterlockedExchange64(long long volatile* dst,
 593                                          long long value);
 594 long long __cdecl _InterlockedCompareExchange64(long long volatile* dst,
 595                                                 long long newVal,
 596                                                 long long oldVal);
 597 }
 598
 599 #    pragma intrinsic(_InterlockedExchangeAdd64)
 600 #    pragma intrinsic(_InterlockedOr64)
 601 #    pragma intrinsic(_InterlockedXor64)
 602 #    pragma intrinsic(_InterlockedAnd64)
 603 #    pragma intrinsic(_InterlockedExchange64)
 604 #    pragma intrinsic(_InterlockedCompareExchange64)
 605
 606 template <>
 607 struct PrimitiveIntrinsics<8>
 608 {
 609     typedef __int64 Type;
 610
 611     static Type add(Type* ptr, Type val) {
 612       return _InterlockedExchangeAdd64(ptr, val);
 613     }
 614     static Type sub(Type* ptr, Type val) {
 615       /*
 616        * There is no _InterlockedExchangeSubtract64.
 617        */
 618       return _InterlockedExchangeAdd64(ptr, -val);
 619     }
 620     static Type or_(Type* ptr, Type val) {
 621       return _InterlockedOr64(ptr, val);
 622     }
 623     static Type xor_(Type* ptr, Type val) {
 624       return _InterlockedXor64(ptr, val);
 625     }
 626     static Type and_(Type* ptr, Type val) {
 627       return _InterlockedAnd64(ptr, val);
 628     }
 629     static void store(Type* ptr, Type val) {
 630       _InterlockedExchange64(ptr, val);
 631     }
 632     static Type exchange(Type* ptr, Type val) {
 633       return _InterlockedExchange64(ptr, val);
 634     }
 635     static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
 636       return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal;
 637     }
 638 };
 639
 640 #  endif
 641
 642 extern "C" { void _ReadWriteBarrier(); }
 643
 644 #  pragma intrinsic(_ReadWriteBarrier)
 645
 646 template<MemoryOrdering Order> struct Barrier;
 647
 648 /*
 649  * We do not provide an afterStore method in Barrier, as Relaxed and
 650  * ReleaseAcquire orderings do not require one, and the required barrier
 651  * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
 652  */
 653
 654 template<>
 655 struct Barrier<Relaxed>
 656 {
 657     static void beforeLoad() {}
 658     static void afterLoad() {}
 659     static void beforeStore() {}
 660 };
 661
 662 template<>
 663 struct Barrier<ReleaseAcquire>
 664 {
 665     static void beforeLoad() {}
 666     static void afterLoad() { _ReadWriteBarrier(); }
 667     static void beforeStore() { _ReadWriteBarrier(); }
 668 };
 669
 670 template<>
 671 struct Barrier<SequentiallyConsistent>
 672 {
 673     static void beforeLoad() { _ReadWriteBarrier(); }
 674     static void afterLoad() { _ReadWriteBarrier(); }
 675     static void beforeStore() { _ReadWriteBarrier(); }
 676 };
 677
 678 template<typename PrimType, typename T>
 679 struct CastHelper
 680 {
 681   static PrimType toPrimType(T val) { return static_cast<PrimType>(val); }
 682   static T fromPrimType(PrimType val) { return static_cast<T>(val); }
 683 };
 684
 685 template<typename PrimType, typename T>
 686 struct CastHelper<PrimType, T*>
 687 {
 688   static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); }
 689   static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); }
 690 };
 691
 692 template<typename T>
 693 struct IntrinsicBase
 694 {
 695     typedef T ValueType;
 696     typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
 697     typedef typename Primitives::Type PrimType;
 698     static_assert(sizeof(PrimType) == sizeof(T),
 699                   "Selection of PrimitiveIntrinsics was wrong");
 700     typedef CastHelper<PrimType, T> Cast;
 701 };
 702
 703 template<typename T, MemoryOrdering Order>
 704 struct IntrinsicMemoryOps : public IntrinsicBase<T>
 705 {
 706     typedef typename IntrinsicBase<T>::ValueType ValueType;
 707     typedef typename IntrinsicBase<T>::Primitives Primitives;
 708     typedef typename IntrinsicBase<T>::PrimType PrimType;
 709     typedef typename IntrinsicBase<T>::Cast Cast;
 710     static ValueType load(const ValueType& ptr) {
 711       Barrier<Order>::beforeLoad();
 712       ValueType val = ptr;
 713       Barrier<Order>::afterLoad();
 714       return val;
 715     }
 716     static void store(ValueType& ptr, ValueType val) {
 717       // For SequentiallyConsistent, Primitives::store() will generate the
 718       // proper memory fence.  Everything else just needs a barrier before
 719       // the store.
 720       if (Order == SequentiallyConsistent) {
 721         Primitives::store(reinterpret_cast<PrimType*>(&ptr),
 722                           Cast::toPrimType(val));
 723       } else {
 724         Barrier<Order>::beforeStore();
 725         ptr = val;
 726       }
 727     }
 728     static ValueType exchange(ValueType& ptr, ValueType val) {
 729       PrimType oldval =
 730         Primitives::exchange(reinterpret_cast<PrimType*>(&ptr),
 731                              Cast::toPrimType(val));
 732       return Cast::fromPrimType(oldval);
 733     }
 734     static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) {
 735       return Primitives::compareExchange(reinterpret_cast<PrimType*>(&ptr),
 736                                          Cast::toPrimType(oldVal),
 737                                          Cast::toPrimType(newVal));
 738     }
 739 };
 740
 741 template<typename T>
 742 struct IntrinsicApplyHelper : public IntrinsicBase<T>
 743 {
 744     typedef typename IntrinsicBase<T>::ValueType ValueType;
 745     typedef typename IntrinsicBase<T>::PrimType PrimType;
 746     typedef typename IntrinsicBase<T>::Cast Cast;
 747     typedef PrimType (*BinaryOp)(PrimType*, PrimType);
 748     typedef PrimType (*UnaryOp)(PrimType*);
 749
 750     static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr,
 751                                          ValueType val) {
 752       PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
 753       PrimType primTypeVal = Cast::toPrimType(val);
 754       return Cast::fromPrimType(op(primTypePtr, primTypeVal));
 755     }
 756
 757     static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) {
 758       PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
 759       return Cast::fromPrimType(op(primTypePtr));
 760     }
 761 };
 762
 763 template<typename T>
 764 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
 765 {
 766     typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
 767     typedef typename IntrinsicBase<T>::Primitives Primitives;
 768     static ValueType add(ValueType& ptr, ValueType val) {
 769       return applyBinaryFunction(&Primitives::add, ptr, val);
 770     }
 771     static ValueType sub(ValueType& ptr, ValueType val) {
 772       return applyBinaryFunction(&Primitives::sub, ptr, val);
 773     }
 774 };
 775
 776 template<typename T>
 777 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
 778 {
 779     typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
 780     static ValueType add(ValueType& ptr, ptrdiff_t amount) {
 781       return applyBinaryFunction(&Primitives::add, ptr,
 782                                  (ValueType)(amount * sizeof(ValueType)));
 783     }
 784     static ValueType sub(ValueType& ptr, ptrdiff_t amount) {
 785       return applyBinaryFunction(&Primitives::sub, ptr,
 786                                  (ValueType)(amount * sizeof(ValueType)));
 787     }
 788 };
 789
 790 template<typename T>
 791 struct IntrinsicIncDec : public IntrinsicAddSub<T>
 792 {
 793     typedef typename IntrinsicAddSub<T>::ValueType ValueType;
 794     static ValueType inc(ValueType& ptr) { return add(ptr, 1); }
 795     static ValueType dec(ValueType& ptr) { return sub(ptr, 1); }
 796 };
 797
 798 template<typename T, MemoryOrdering Order>
 799 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
 800                           public IntrinsicIncDec<T>
 801 {
 802     typedef typename IntrinsicIncDec<T>::ValueType ValueType;
 803     static ValueType or_(ValueType& ptr, T val) {
 804       return applyBinaryFunction(&Primitives::or_, ptr, val);
 805     }
 806     static ValueType xor_(ValueType& ptr, T val) {
 807       return applyBinaryFunction(&Primitives::xor_, ptr, val);
 808     }
 809     static ValueType and_(ValueType& ptr, T val) {
 810       return applyBinaryFunction(&Primitives::and_, ptr, val);
 811     }
 812 };
 813
 814 template<typename T, MemoryOrdering Order>
 815 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
 816                                      public IntrinsicIncDec<T*>
 817 {
 818     typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
 819 };
 820
 821 } // namespace detail
 822 } // namespace mozilla
 823
 824 #else
 825 # error "Atomic compiler intrinsics are not supported on your platform"
 826 #endif
 827
 828 namespace mozilla {
 829
 830 namespace detail {
 831
 832 template<typename T, MemoryOrdering Order>
 833 class AtomicBase
 834 {
 835     // We only support 32-bit types on 32-bit Windows, which constrains our
 836     // implementation elsewhere.  But we support pointer-sized types everywhere.
 837     static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
 838                   "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
 839
 840   protected:
 841     typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
 842     typename Intrinsics::ValueType mValue;
 843
 844   public:
 845     MOZ_CONSTEXPR AtomicBase() : mValue() {}
 846     MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
 847
 848     operator T() const { return Intrinsics::load(mValue); }
 849
 850     T operator=(T aValue) {
 851       Intrinsics::store(mValue, aValue);
 852       return aValue;
 853     }
 854
 855     /**
 856      * Performs an atomic swap operation.  aValue is stored and the previous
 857      * value of this variable is returned.
 858      */
 859     T exchange(T aValue) {
 860       return Intrinsics::exchange(mValue, aValue);
 861     }
 862
 863     /**
 864      * Performs an atomic compare-and-swap operation and returns true if it
 865      * succeeded. This is equivalent to atomically doing
 866      *
 867      *   if (mValue == aOldValue) {
 868      *     mValue = aNewValue;
 869      *     return true;
 870      *   } else {
 871      *     return false;
 872      *   }
 873      */
 874     bool compareExchange(T aOldValue, T aNewValue) {
 875       return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
 876     }
 877
 878   private:
 879     template<MemoryOrdering AnyOrder>
 880     AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
 881 };
 882
 883 template<typename T, MemoryOrdering Order>
 884 class AtomicBaseIncDec : public AtomicBase<T, Order>
 885 {
 886     typedef typename detail::AtomicBase<T, Order> Base;
 887
 888   public:
 889     MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
 890     MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
 891
 892     using Base::operator=;
 893
 894     T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
 895     T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
 896     T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
 897     T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
 898
 899   private:
 900     template<MemoryOrdering AnyOrder>
 901     AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
 902 };
 903
 904 } // namespace detail
 905
 906 /**
 907  * A wrapper for a type that enforces that all memory accesses are atomic.
 908  *
 909  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
 910  * its place.  Implementations for integral and pointer types are provided
 911  * below.
 912  *
 913  * Atomic accesses are sequentially consistent by default.  You should
 914  * use the default unless you are tall enough to ride the
 915  * memory-ordering roller coaster (if you're not sure, you aren't) and
 916  * you have a compelling reason to do otherwise.
 917  *
 918  * There is one exception to the case of atomic memory accesses: providing an
 919  * initial value of the atomic value is not guaranteed to be atomic.  This is a
 920  * deliberate design choice that enables static atomic variables to be declared
 921  * without introducing extra static constructors.
 922  */
 923 template<typename T,
 924          MemoryOrdering Order = SequentiallyConsistent,
 925          typename Enable = void>
 926 class Atomic;
 927
 928 /**
 929  * Atomic<T> implementation for integral types.
 930  *
 931  * In addition to atomic store and load operations, compound assignment and
 932  * increment/decrement operators are implemented which perform the
 933  * corresponding read-modify-write operation atomically.  Finally, an atomic
 934  * swap method is provided.
 935  */
 936 template<typename T, MemoryOrdering Order>
 937 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value>::Type>
 938   : public detail::AtomicBaseIncDec<T, Order>
 939 {
 940     typedef typename detail::AtomicBaseIncDec<T, Order> Base;
 941
 942   public:
 943     MOZ_CONSTEXPR Atomic() : Base() {}
 944     MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
 945
 946     using Base::operator=;
 947
 948     T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; }
 949     T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; }
 950     T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; }
 951     T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; }
 952     T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; }
 953
 954   private:
 955     Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
 956 };
 957
 958 /**
 959  * Atomic<T> implementation for pointer types.
 960  *
 961  * An atomic compare-and-swap primitive for pointer variables is provided, as
 962  * are atomic increment and decement operators.  Also provided are the compound
 963  * assignment operators for addition and subtraction. Atomic swap (via
 964  * exchange()) is included as well.
 965  */
 966 template<typename T, MemoryOrdering Order>
 967 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
 968 {
 969     typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
 970
 971   public:
 972     MOZ_CONSTEXPR Atomic() : Base() {}
 973     MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
 974
 975     using Base::operator=;
 976
 977     T* operator+=(ptrdiff_t delta) {
 978       return Base::Intrinsics::add(Base::mValue, delta) + delta;
 979     }
 980     T* operator-=(ptrdiff_t delta) {
 981       return Base::Intrinsics::sub(Base::mValue, delta) - delta;
 982     }
 983
 984   private:
 985     Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
 986 };
 987
 988 /**
 989  * Atomic<T> implementation for enum types.
 990  *
 991  * The atomic store and load operations and the atomic swap method is provided.
 992  */
 993 template<typename T, MemoryOrdering Order>
 994 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
 995   : public detail::AtomicBase<T, Order>
 996 {
 997     typedef typename detail::AtomicBase<T, Order> Base;
 998
 999   public:
1000     MOZ_CONSTEXPR Atomic() : Base() {}
1001     MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1002
1003     using Base::operator=;
1004
1005   private:
1006     Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1007 };
1008
1009 } // namespace mozilla
1010
1011 #endif /* mozilla_Atomics_h */