Merge mozilla-b2g34 to 2.1s. a=merge
[gecko.git] / mfbt / Atomics.h
blob71d95c61bac74e966432c0b6cd5af12b0b580009
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * Implements (almost always) lock-free atomic operations. The operations here
9 * are a subset of that which can be found in C++11's <atomic> header, with a
10 * different API to enforce consistent memory ordering constraints.
12 * Anyone caught using |volatile| for inter-thread memory safety needs to be
13 * sent a copy of this header and the C++11 standard.
16 #ifndef mozilla_Atomics_h
17 #define mozilla_Atomics_h
19 #include "mozilla/Assertions.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Compiler.h"
22 #include "mozilla/TypeTraits.h"
24 #include <stdint.h>
27 * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
28 * does not have <atomic>. So be sure to check for <atomic> support
29 * along with C++0x support.
31 #if defined(__clang__) || defined(__GNUC__)
33 * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
34 * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
35 * definitions for unspecialized std::atomic and causes linking errors.
36 * Therefore, we require at least 4.7.0 for using libstdc++.
38 # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
39 # define MOZ_HAVE_CXX11_ATOMICS
40 # elif MOZ_USING_LIBCXX
41 # define MOZ_HAVE_CXX11_ATOMICS
42 # endif
43 #elif defined(_MSC_VER) && _MSC_VER >= 1700
44 # if defined(DEBUG)
46 * Provide our own failure code since we're having trouble linking to
47 * std::_Debug_message (bug 982310).
49 # define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")
50 # endif
51 # define MOZ_HAVE_CXX11_ATOMICS
52 #endif
54 namespace mozilla {
56 /**
57 * An enum of memory ordering possibilities for atomics.
59 * Memory ordering is the observable state of distinct values in memory.
60 * (It's a separate concept from atomicity, which concerns whether an
61 * operation can ever be observed in an intermediate state. Don't
62 * conflate the two!) Given a sequence of operations in source code on
63 * memory, it is *not* always the case that, at all times and on all
64 * cores, those operations will appear to have occurred in that exact
65 * sequence. First, the compiler might reorder that sequence, if it
66 * thinks another ordering will be more efficient. Second, the CPU may
67 * not expose so consistent a view of memory. CPUs will often perform
68 * their own instruction reordering, above and beyond that performed by
69 * the compiler. And each core has its own memory caches, and accesses
70 * (reads and writes both) to "memory" may only resolve to out-of-date
71 * cache entries -- not to the "most recently" performed operation in
72 * some global sense. Any access to a value that may be used by
73 * multiple threads, potentially across multiple cores, must therefore
74 * have a memory ordering imposed on it, for all code on all
75 * threads/cores to have a sufficiently coherent worldview.
77 * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
78 * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
79 * detail on all this, including examples of how each mode works.
81 * Note that for simplicity and practicality, not all of the modes in
82 * C++11 are supported. The missing C++11 modes are either subsumed by
83 * the modes we provide below, or not relevant for the CPUs we support
84 * in Gecko. These three modes are confusing enough as it is!
86 enum MemoryOrdering {
88 * Relaxed ordering is the simplest memory ordering: none at all.
89 * When the result of a write is observed, nothing may be inferred
90 * about other memory. Writes ostensibly performed "before" on the
91 * writing thread may not yet be visible. Writes performed "after" on
92 * the writing thread may already be visible, if the compiler or CPU
93 * reordered them. (The latter can happen if reads and/or writes get
94 * held up in per-processor caches.) Relaxed ordering means
95 * operations can always use cached values (as long as the actual
96 * updates to atomic values actually occur, correctly, eventually), so
97 * it's usually the fastest sort of atomic access. For this reason,
98 * *it's also the most dangerous kind of access*.
100 * Relaxed ordering is good for things like process-wide statistics
101 * counters that don't need to be consistent with anything else, so
102 * long as updates themselves are atomic. (And so long as any
103 * observations of that value can tolerate being out-of-date -- if you
104 * need some sort of up-to-date value, you need some sort of other
105 * synchronizing operation.) It's *not* good for locks, mutexes,
106 * reference counts, etc. that mediate access to other memory, or must
107 * be observably consistent with other memory.
109 * x86 architectures don't take advantage of the optimization
110 * opportunities that relaxed ordering permits. Thus it's possible
111 * that using relaxed ordering will "work" on x86 but fail elsewhere
112 * (ARM, say, which *does* implement non-sequentially-consistent
113 * relaxed ordering semantics). Be extra-careful using relaxed
114 * ordering if you can't easily test non-x86 architectures!
116 Relaxed,
119 * When an atomic value is updated with ReleaseAcquire ordering, and
120 * that new value is observed with ReleaseAcquire ordering, prior
121 * writes (atomic or not) are also observable. What ReleaseAcquire
122 * *doesn't* give you is any observable ordering guarantees for
123 * ReleaseAcquire-ordered operations on different objects. For
124 * example, if there are two cores that each perform ReleaseAcquire
125 * operations on separate objects, each core may or may not observe
126 * the operations made by the other core. The only way the cores can
127 * be synchronized with ReleaseAcquire is if they both
128 * ReleaseAcquire-access the same object. This implies that you can't
129 * necessarily describe some global total ordering of ReleaseAcquire
130 * operations.
132 * ReleaseAcquire ordering is good for (as the name implies) atomic
133 * operations on values controlling ownership of things: reference
134 * counts, mutexes, and the like. However, if you are thinking about
135 * using these to implement your own locks or mutexes, you should take
136 * a good, hard look at actual lock or mutex primitives first.
138 ReleaseAcquire,
141 * When an atomic value is updated with SequentiallyConsistent
142 * ordering, all writes observable when the update is observed, just
143 * as with ReleaseAcquire ordering. But, furthermore, a global total
144 * ordering of SequentiallyConsistent operations *can* be described.
145 * For example, if two cores perform SequentiallyConsistent operations
146 * on separate objects, one core will observably perform its update
147 * (and all previous operations will have completed), then the other
148 * core will observably perform its update (and all previous
149 * operations will have completed). (Although those previous
150 * operations aren't themselves ordered -- they could be intermixed,
151 * or ordered if they occur on atomic values with ordering
152 * requirements.) SequentiallyConsistent is the *simplest and safest*
153 * ordering of atomic operations -- it's always as if one operation
154 * happens, then another, then another, in some order -- and every
155 * core observes updates to happen in that single order. Because it
156 * has the most synchronization requirements, operations ordered this
157 * way also tend to be slowest.
159 * SequentiallyConsistent ordering can be desirable when multiple
160 * threads observe objects, and they all have to agree on the
161 * observable order of changes to them. People expect
162 * SequentiallyConsistent ordering, even if they shouldn't, when
163 * writing code, atomic or otherwise. SequentiallyConsistent is also
164 * the ordering of choice when designing lockless data structures. If
165 * you don't know what order to use, use this one.
167 SequentiallyConsistent,
170 } // namespace mozilla
172 // Build up the underlying intrinsics.
173 #ifdef MOZ_HAVE_CXX11_ATOMICS
175 # include <atomic>
177 namespace mozilla {
178 namespace detail {
181 * We provide CompareExchangeFailureOrder to work around a bug in some
182 * versions of GCC's <atomic> header. See bug 898491.
184 template<MemoryOrdering Order> struct AtomicOrderConstraints;
186 template<>
187 struct AtomicOrderConstraints<Relaxed>
189 static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
190 static const std::memory_order LoadOrder = std::memory_order_relaxed;
191 static const std::memory_order StoreOrder = std::memory_order_relaxed;
192 static const std::memory_order CompareExchangeFailureOrder =
193 std::memory_order_relaxed;
196 template<>
197 struct AtomicOrderConstraints<ReleaseAcquire>
199 static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
200 static const std::memory_order LoadOrder = std::memory_order_acquire;
201 static const std::memory_order StoreOrder = std::memory_order_release;
202 static const std::memory_order CompareExchangeFailureOrder =
203 std::memory_order_acquire;
206 template<>
207 struct AtomicOrderConstraints<SequentiallyConsistent>
209 static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
210 static const std::memory_order LoadOrder = std::memory_order_seq_cst;
211 static const std::memory_order StoreOrder = std::memory_order_seq_cst;
212 static const std::memory_order CompareExchangeFailureOrder =
213 std::memory_order_seq_cst;
216 template<typename T, MemoryOrdering Order>
217 struct IntrinsicBase
219 typedef std::atomic<T> ValueType;
220 typedef AtomicOrderConstraints<Order> OrderedOp;
223 template<typename T, MemoryOrdering Order>
224 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
226 typedef IntrinsicBase<T, Order> Base;
228 static T load(const typename Base::ValueType& aPtr)
230 return aPtr.load(Base::OrderedOp::LoadOrder);
233 static void store(typename Base::ValueType& aPtr, T aVal)
235 aPtr.store(aVal, Base::OrderedOp::StoreOrder);
238 static T exchange(typename Base::ValueType& aPtr, T aVal)
240 return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
243 static bool compareExchange(typename Base::ValueType& aPtr,
244 T aOldVal, T aNewVal)
246 return aPtr.compare_exchange_strong(aOldVal, aNewVal,
247 Base::OrderedOp::AtomicRMWOrder,
248 Base::OrderedOp::CompareExchangeFailureOrder);
252 template<typename T, MemoryOrdering Order>
253 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
255 typedef IntrinsicBase<T, Order> Base;
257 static T add(typename Base::ValueType& aPtr, T aVal)
259 return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
262 static T sub(typename Base::ValueType& aPtr, T aVal)
264 return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
268 template<typename T, MemoryOrdering Order>
269 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
271 typedef IntrinsicBase<T*, Order> Base;
273 static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal)
275 return aPtr.fetch_add(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
278 static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal)
280 return aPtr.fetch_sub(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
282 private:
284 * GCC 4.6's <atomic> header has a bug where adding X to an
285 * atomic<T*> is not the same as adding X to a T*. Hence the need
286 * for this function to provide the correct addend.
288 static ptrdiff_t fixupAddend(ptrdiff_t aVal)
290 #if defined(__clang__) || defined(_MSC_VER)
291 return aVal;
292 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
293 !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
294 return aVal * sizeof(T);
295 #else
296 return aVal;
297 #endif
301 template<typename T, MemoryOrdering Order>
302 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
304 typedef IntrinsicBase<T, Order> Base;
306 static T inc(typename Base::ValueType& aPtr)
308 return IntrinsicAddSub<T, Order>::add(aPtr, 1);
311 static T dec(typename Base::ValueType& aPtr)
313 return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
317 template<typename T, MemoryOrdering Order>
318 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
319 public IntrinsicIncDec<T, Order>
321 typedef IntrinsicBase<T, Order> Base;
323 static T or_(typename Base::ValueType& aPtr, T aVal)
325 return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
328 static T xor_(typename Base::ValueType& aPtr, T aVal)
330 return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
333 static T and_(typename Base::ValueType& aPtr, T aVal)
335 return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
339 template<typename T, MemoryOrdering Order>
340 struct AtomicIntrinsics<T*, Order>
341 : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
345 } // namespace detail
346 } // namespace mozilla
348 #elif defined(__GNUC__)
350 namespace mozilla {
351 namespace detail {
354 * The __sync_* family of intrinsics is documented here:
356 * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
358 * While these intrinsics are deprecated in favor of the newer __atomic_*
359 * family of intrincs:
361 * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
363 * any GCC version that supports the __atomic_* intrinsics will also support
364 * the <atomic> header and so will be handled above. We provide a version of
365 * atomics using the __sync_* intrinsics to support older versions of GCC.
367 * All __sync_* intrinsics that we use below act as full memory barriers, for
368 * both compiler and hardware reordering, except for __sync_lock_test_and_set,
369 * which is a only an acquire barrier. When we call __sync_lock_test_and_set,
370 * we add a barrier above it as appropriate.
373 template<MemoryOrdering Order> struct Barrier;
376 * Some processors (in particular, x86) don't require quite so many calls to
377 * __sync_sychronize as our specializations of Barrier produce. If
378 * performance turns out to be an issue, defining these specializations
379 * on a per-processor basis would be a good first tuning step.
382 template<>
383 struct Barrier<Relaxed>
385 static void beforeLoad() {}
386 static void afterLoad() {}
387 static void beforeStore() {}
388 static void afterStore() {}
391 template<>
392 struct Barrier<ReleaseAcquire>
394 static void beforeLoad() {}
395 static void afterLoad() { __sync_synchronize(); }
396 static void beforeStore() { __sync_synchronize(); }
397 static void afterStore() {}
400 template<>
401 struct Barrier<SequentiallyConsistent>
403 static void beforeLoad() { __sync_synchronize(); }
404 static void afterLoad() { __sync_synchronize(); }
405 static void beforeStore() { __sync_synchronize(); }
406 static void afterStore() { __sync_synchronize(); }
409 template<typename T, MemoryOrdering Order>
410 struct IntrinsicMemoryOps
412 static T load(const T& aPtr)
414 Barrier<Order>::beforeLoad();
415 T val = aPtr;
416 Barrier<Order>::afterLoad();
417 return val;
420 static void store(T& aPtr, T aVal)
422 Barrier<Order>::beforeStore();
423 aPtr = aVal;
424 Barrier<Order>::afterStore();
427 static T exchange(T& aPtr, T aVal)
429 // __sync_lock_test_and_set is only an acquire barrier; loads and stores
430 // can't be moved up from after to before it, but they can be moved down
431 // from before to after it. We may want a stricter ordering, so we need
432 // an explicit barrier.
433 Barrier<Order>::beforeStore();
434 return __sync_lock_test_and_set(&aPtr, aVal);
437 static bool compareExchange(T& aPtr, T aOldVal, T aNewVal)
439 return __sync_bool_compare_and_swap(&aPtr, aOldVal, aNewVal);
443 template<typename T>
444 struct IntrinsicAddSub
446 typedef T ValueType;
448 static T add(T& aPtr, T aVal)
450 return __sync_fetch_and_add(&aPtr, aVal);
453 static T sub(T& aPtr, T aVal)
455 return __sync_fetch_and_sub(&aPtr, aVal);
459 template<typename T>
460 struct IntrinsicAddSub<T*>
462 typedef T* ValueType;
465 * The reinterpret_casts are needed so that
466 * __sync_fetch_and_{add,sub} will properly type-check.
468 * Also, these functions do not provide standard semantics for
469 * pointer types, so we need to adjust the addend.
471 static ValueType add(ValueType& aPtr, ptrdiff_t aVal)
473 ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
474 return __sync_fetch_and_add(&aPtr, amount);
477 static ValueType sub(ValueType& aPtr, ptrdiff_t aVal)
479 ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
480 return __sync_fetch_and_sub(&aPtr, amount);
484 template<typename T>
485 struct IntrinsicIncDec : public IntrinsicAddSub<T>
487 static T inc(T& aPtr) { return IntrinsicAddSub<T>::add(aPtr, 1); }
488 static T dec(T& aPtr) { return IntrinsicAddSub<T>::sub(aPtr, 1); }
491 template<typename T, MemoryOrdering Order>
492 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
493 public IntrinsicIncDec<T>
495 static T or_( T& aPtr, T aVal) { return __sync_fetch_and_or(&aPtr, aVal); }
496 static T xor_(T& aPtr, T aVal) { return __sync_fetch_and_xor(&aPtr, aVal); }
497 static T and_(T& aPtr, T aVal) { return __sync_fetch_and_and(&aPtr, aVal); }
500 template<typename T, MemoryOrdering Order>
501 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
502 public IntrinsicIncDec<T*>
506 } // namespace detail
507 } // namespace mozilla
509 #elif defined(_MSC_VER)
512 * Windows comes with a full complement of atomic operations.
513 * Unfortunately, most of those aren't available for Windows XP (even if
514 * the compiler supports intrinsics for them), which is the oldest
515 * version of Windows we support. Therefore, we only provide operations
516 * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
517 * versions, we support 64-bit datatypes as well.
520 # include <intrin.h>
522 # pragma intrinsic(_InterlockedExchangeAdd)
523 # pragma intrinsic(_InterlockedOr)
524 # pragma intrinsic(_InterlockedXor)
525 # pragma intrinsic(_InterlockedAnd)
526 # pragma intrinsic(_InterlockedExchange)
527 # pragma intrinsic(_InterlockedCompareExchange)
529 namespace mozilla {
530 namespace detail {
532 # if !defined(_M_IX86) && !defined(_M_X64)
534 * The implementations below are optimized for x86ish systems. You
535 * will have to modify them if you are porting to Windows on a
536 * different architecture.
538 # error "Unknown CPU type"
539 # endif
542 * The PrimitiveIntrinsics template should define |Type|, the datatype of size
543 * DataSize upon which we operate, and the following eight functions.
545 * static Type add(Type* aPtr, Type aVal);
546 * static Type sub(Type* aPtr, Type aVal);
547 * static Type or_(Type* aPtr, Type aVal);
548 * static Type xor_(Type* aPtr, Type aVal);
549 * static Type and_(Type* aPtr, Type aVal);
551 * These functions perform the obvious operation on the value contained in
552 * |*aPtr| combined with |aVal| and return the value previously stored in
553 * |*aPtr|.
555 * static void store(Type* aPtr, Type aVal);
557 * This function atomically stores |aVal| into |*aPtr| and must provide a full
558 * memory fence after the store to prevent compiler and hardware instruction
559 * reordering. It should also act as a compiler barrier to prevent reads and
560 * writes from moving to after the store.
562 * static Type exchange(Type* aPtr, Type aVal);
564 * This function atomically stores |aVal| into |*aPtr| and returns the
565 * previous contents of |*aPtr|;
567 * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal);
569 * This function atomically performs the following operation:
571 * if (*aPtr == aOldVal) {
572 * *aPtr = aNewVal;
573 * return true;
574 * } else {
575 * return false;
579 template<size_t DataSize> struct PrimitiveIntrinsics;
581 template<>
582 struct PrimitiveIntrinsics<4>
584 typedef long Type;
586 static Type add(Type* aPtr, Type aVal)
588 return _InterlockedExchangeAdd(aPtr, aVal);
591 static Type sub(Type* aPtr, Type aVal)
594 * _InterlockedExchangeSubtract isn't available before Windows 7,
595 * and we must support Windows XP.
597 return _InterlockedExchangeAdd(aPtr, -aVal);
600 static Type or_(Type* aPtr, Type aVal)
602 return _InterlockedOr(aPtr, aVal);
605 static Type xor_(Type* aPtr, Type aVal)
607 return _InterlockedXor(aPtr, aVal);
610 static Type and_(Type* aPtr, Type aVal)
612 return _InterlockedAnd(aPtr, aVal);
615 static void store(Type* aPtr, Type aVal)
617 _InterlockedExchange(aPtr, aVal);
620 static Type exchange(Type* aPtr, Type aVal)
622 return _InterlockedExchange(aPtr, aVal);
625 static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
627 return _InterlockedCompareExchange(aPtr, aNewVal, aOldVal) == aOldVal;
631 # if defined(_M_X64)
633 # pragma intrinsic(_InterlockedExchangeAdd64)
634 # pragma intrinsic(_InterlockedOr64)
635 # pragma intrinsic(_InterlockedXor64)
636 # pragma intrinsic(_InterlockedAnd64)
637 # pragma intrinsic(_InterlockedExchange64)
638 # pragma intrinsic(_InterlockedCompareExchange64)
640 template <>
641 struct PrimitiveIntrinsics<8>
643 typedef __int64 Type;
645 static Type add(Type* aPtr, Type aVal)
647 return _InterlockedExchangeAdd64(aPtr, aVal);
650 static Type sub(Type* aPtr, Type aVal)
653 * There is no _InterlockedExchangeSubtract64.
655 return _InterlockedExchangeAdd64(aPtr, -aVal);
658 static Type or_(Type* aPtr, Type aVal)
660 return _InterlockedOr64(aPtr, aVal);
663 static Type xor_(Type* aPtr, Type aVal)
665 return _InterlockedXor64(aPtr, aVal);
668 static Type and_(Type* aPtr, Type aVal)
670 return _InterlockedAnd64(aPtr, aVal);
673 static void store(Type* aPtr, Type aVal)
675 _InterlockedExchange64(aPtr, aVal);
678 static Type exchange(Type* aPtr, Type aVal)
680 return _InterlockedExchange64(aPtr, aVal);
683 static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
685 return _InterlockedCompareExchange64(aPtr, aNewVal, aOldVal) == aOldVal;
689 # endif
691 # pragma intrinsic(_ReadWriteBarrier)
693 template<MemoryOrdering Order> struct Barrier;
696 * We do not provide an afterStore method in Barrier, as Relaxed and
697 * ReleaseAcquire orderings do not require one, and the required barrier
698 * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
701 template<>
702 struct Barrier<Relaxed>
704 static void beforeLoad() {}
705 static void afterLoad() {}
706 static void beforeStore() {}
709 template<>
710 struct Barrier<ReleaseAcquire>
712 static void beforeLoad() {}
713 static void afterLoad() { _ReadWriteBarrier(); }
714 static void beforeStore() { _ReadWriteBarrier(); }
717 template<>
718 struct Barrier<SequentiallyConsistent>
720 static void beforeLoad() { _ReadWriteBarrier(); }
721 static void afterLoad() { _ReadWriteBarrier(); }
722 static void beforeStore() { _ReadWriteBarrier(); }
725 template<typename PrimType, typename T>
726 struct CastHelper
728 static PrimType toPrimType(T aVal) { return static_cast<PrimType>(aVal); }
729 static T fromPrimType(PrimType aVal) { return static_cast<T>(aVal); }
732 template<typename PrimType, typename T>
733 struct CastHelper<PrimType, T*>
735 static PrimType toPrimType(T* aVal) { return reinterpret_cast<PrimType>(aVal); }
736 static T* fromPrimType(PrimType aVal) { return reinterpret_cast<T*>(aVal); }
739 template<typename T>
740 struct IntrinsicBase
742 typedef T ValueType;
743 typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
744 typedef typename Primitives::Type PrimType;
745 static_assert(sizeof(PrimType) == sizeof(T),
746 "Selection of PrimitiveIntrinsics was wrong");
747 typedef CastHelper<PrimType, T> Cast;
750 template<typename T, MemoryOrdering Order>
751 struct IntrinsicMemoryOps : public IntrinsicBase<T>
753 typedef typename IntrinsicBase<T>::ValueType ValueType;
754 typedef typename IntrinsicBase<T>::Primitives Primitives;
755 typedef typename IntrinsicBase<T>::PrimType PrimType;
756 typedef typename IntrinsicBase<T>::Cast Cast;
758 static ValueType load(const ValueType& aPtr)
760 Barrier<Order>::beforeLoad();
761 ValueType val = aPtr;
762 Barrier<Order>::afterLoad();
763 return val;
766 static void store(ValueType& aPtr, ValueType aVal)
768 // For SequentiallyConsistent, Primitives::store() will generate the
769 // proper memory fence. Everything else just needs a barrier before
770 // the store.
771 if (Order == SequentiallyConsistent) {
772 Primitives::store(reinterpret_cast<PrimType*>(&aPtr),
773 Cast::toPrimType(aVal));
774 } else {
775 Barrier<Order>::beforeStore();
776 aPtr = aVal;
780 static ValueType exchange(ValueType& aPtr, ValueType aVal)
782 PrimType oldval =
783 Primitives::exchange(reinterpret_cast<PrimType*>(&aPtr),
784 Cast::toPrimType(aVal));
785 return Cast::fromPrimType(oldval);
788 static bool compareExchange(ValueType& aPtr, ValueType aOldVal,
789 ValueType aNewVal)
791 return Primitives::compareExchange(reinterpret_cast<PrimType*>(&aPtr),
792 Cast::toPrimType(aOldVal),
793 Cast::toPrimType(aNewVal));
797 template<typename T>
798 struct IntrinsicApplyHelper : public IntrinsicBase<T>
800 typedef typename IntrinsicBase<T>::ValueType ValueType;
801 typedef typename IntrinsicBase<T>::PrimType PrimType;
802 typedef typename IntrinsicBase<T>::Cast Cast;
803 typedef PrimType (*BinaryOp)(PrimType*, PrimType);
804 typedef PrimType (*UnaryOp)(PrimType*);
806 static ValueType applyBinaryFunction(BinaryOp aOp, ValueType& aPtr,
807 ValueType aVal)
809 PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
810 PrimType primTypeVal = Cast::toPrimType(aVal);
811 return Cast::fromPrimType(aOp(primTypePtr, primTypeVal));
814 static ValueType applyUnaryFunction(UnaryOp aOp, ValueType& aPtr)
816 PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
817 return Cast::fromPrimType(aOp(primTypePtr));
821 template<typename T>
822 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
824 typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
825 typedef typename IntrinsicBase<T>::Primitives Primitives;
827 static ValueType add(ValueType& aPtr, ValueType aVal)
829 return applyBinaryFunction(&Primitives::add, aPtr, aVal);
832 static ValueType sub(ValueType& aPtr, ValueType aVal)
834 return applyBinaryFunction(&Primitives::sub, aPtr, aVal);
838 template<typename T>
839 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
841 typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
842 typedef typename IntrinsicBase<T*>::Primitives Primitives;
844 static ValueType add(ValueType& aPtr, ptrdiff_t aAmount)
846 return applyBinaryFunction(&Primitives::add, aPtr,
847 (ValueType)(aAmount * sizeof(T)));
850 static ValueType sub(ValueType& aPtr, ptrdiff_t aAmount)
852 return applyBinaryFunction(&Primitives::sub, aPtr,
853 (ValueType)(aAmount * sizeof(T)));
857 template<typename T>
858 struct IntrinsicIncDec : public IntrinsicAddSub<T>
860 typedef typename IntrinsicAddSub<T>::ValueType ValueType;
861 static ValueType inc(ValueType& aPtr) { return add(aPtr, 1); }
862 static ValueType dec(ValueType& aPtr) { return sub(aPtr, 1); }
865 template<typename T, MemoryOrdering Order>
866 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
867 public IntrinsicIncDec<T>
869 typedef typename IntrinsicIncDec<T>::ValueType ValueType;
870 typedef typename IntrinsicBase<T>::Primitives Primitives;
872 static ValueType or_(ValueType& aPtr, T aVal)
874 return applyBinaryFunction(&Primitives::or_, aPtr, aVal);
877 static ValueType xor_(ValueType& aPtr, T aVal)
879 return applyBinaryFunction(&Primitives::xor_, aPtr, aVal);
882 static ValueType and_(ValueType& aPtr, T aVal)
884 return applyBinaryFunction(&Primitives::and_, aPtr, aVal);
888 template<typename T, MemoryOrdering Order>
889 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
890 public IntrinsicIncDec<T*>
892 typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
893 // This is required to make us be able to build with MSVC10, for unknown
894 // reasons.
895 typedef typename IntrinsicBase<T*>::Primitives Primitives;
898 } // namespace detail
899 } // namespace mozilla
901 #else
902 # error "Atomic compiler intrinsics are not supported on your platform"
903 #endif
905 namespace mozilla {
907 namespace detail {
909 template<typename T, MemoryOrdering Order>
910 class AtomicBase
912 // We only support 32-bit types on 32-bit Windows, which constrains our
913 // implementation elsewhere. But we support pointer-sized types everywhere.
914 static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
915 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
917 protected:
918 typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
919 typename Intrinsics::ValueType mValue;
921 public:
922 MOZ_CONSTEXPR AtomicBase() : mValue() {}
923 explicit MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
925 // Note: we can't provide operator T() here because Atomic<bool> inherits
926 // from AtomcBase with T=uint32_t and not T=bool. If we implemented
927 // operator T() here, it would cause errors when comparing Atomic<bool> with
928 // a regular bool.
930 T operator=(T aVal)
932 Intrinsics::store(mValue, aVal);
933 return aVal;
937 * Performs an atomic swap operation. aVal is stored and the previous
938 * value of this variable is returned.
940 T exchange(T aVal)
942 return Intrinsics::exchange(mValue, aVal);
946 * Performs an atomic compare-and-swap operation and returns true if it
947 * succeeded. This is equivalent to atomically doing
949 * if (mValue == aOldValue) {
950 * mValue = aNewValue;
951 * return true;
952 * } else {
953 * return false;
956 bool compareExchange(T aOldValue, T aNewValue)
958 return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
961 private:
962 template<MemoryOrdering AnyOrder>
963 AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
966 template<typename T, MemoryOrdering Order>
967 class AtomicBaseIncDec : public AtomicBase<T, Order>
969 typedef typename detail::AtomicBase<T, Order> Base;
971 public:
972 MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
973 explicit MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
975 using Base::operator=;
977 operator T() const { return Base::Intrinsics::load(Base::mValue); }
978 T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
979 T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
980 T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
981 T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
983 private:
984 template<MemoryOrdering AnyOrder>
985 AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
988 } // namespace detail
991 * A wrapper for a type that enforces that all memory accesses are atomic.
993 * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
994 * its place. Implementations for integral and pointer types are provided
995 * below.
997 * Atomic accesses are sequentially consistent by default. You should
998 * use the default unless you are tall enough to ride the
999 * memory-ordering roller coaster (if you're not sure, you aren't) and
1000 * you have a compelling reason to do otherwise.
1002 * There is one exception to the case of atomic memory accesses: providing an
1003 * initial value of the atomic value is not guaranteed to be atomic. This is a
1004 * deliberate design choice that enables static atomic variables to be declared
1005 * without introducing extra static constructors.
1007 template<typename T,
1008 MemoryOrdering Order = SequentiallyConsistent,
1009 typename Enable = void>
1010 class Atomic;
1013 * Atomic<T> implementation for integral types.
1015 * In addition to atomic store and load operations, compound assignment and
1016 * increment/decrement operators are implemented which perform the
1017 * corresponding read-modify-write operation atomically. Finally, an atomic
1018 * swap method is provided.
1020 template<typename T, MemoryOrdering Order>
1021 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value &&
1022 !IsSame<T, bool>::value>::Type>
1023 : public detail::AtomicBaseIncDec<T, Order>
1025 typedef typename detail::AtomicBaseIncDec<T, Order> Base;
1027 public:
1028 MOZ_CONSTEXPR Atomic() : Base() {}
1029 explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1031 using Base::operator=;
1033 T operator+=(T aDelta)
1035 return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1038 T operator-=(T aDelta)
1040 return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1043 T operator|=(T aVal)
1045 return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
1048 T operator^=(T aVal)
1050 return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
1053 T operator&=(T aVal)
1055 return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
1058 private:
1059 Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1063 * Atomic<T> implementation for pointer types.
1065 * An atomic compare-and-swap primitive for pointer variables is provided, as
1066 * are atomic increment and decement operators. Also provided are the compound
1067 * assignment operators for addition and subtraction. Atomic swap (via
1068 * exchange()) is included as well.
1070 template<typename T, MemoryOrdering Order>
1071 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
1073 typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
1075 public:
1076 MOZ_CONSTEXPR Atomic() : Base() {}
1077 explicit MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
1079 using Base::operator=;
1081 T* operator+=(ptrdiff_t aDelta)
1083 return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1086 T* operator-=(ptrdiff_t aDelta)
1088 return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1091 private:
1092 Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
1096 * Atomic<T> implementation for enum types.
1098 * The atomic store and load operations and the atomic swap method is provided.
1100 template<typename T, MemoryOrdering Order>
1101 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
1102 : public detail::AtomicBase<T, Order>
1104 typedef typename detail::AtomicBase<T, Order> Base;
1106 public:
1107 MOZ_CONSTEXPR Atomic() : Base() {}
1108 explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1110 operator T() const { return Base::Intrinsics::load(Base::mValue); }
1112 using Base::operator=;
1114 private:
1115 Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1119 * Atomic<T> implementation for boolean types.
1121 * The atomic store and load operations and the atomic swap method is provided.
1123 * Note:
1125 * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1126 * bool and/or some implementations of std::atomic. This is allowed in
1127 * [atomic.types.generic]p9.
1129 * - It's not obvious whether the 8-bit atomic functions on Windows are always
1130 * inlined or not. If they are not inlined, the corresponding functions in the
1131 * runtime library are not available on Windows XP. This is why we implement
1132 * Atomic<bool> with an underlying type of uint32_t.
1134 template<MemoryOrdering Order>
1135 class Atomic<bool, Order>
1136 : protected detail::AtomicBase<uint32_t, Order>
1138 typedef typename detail::AtomicBase<uint32_t, Order> Base;
1140 public:
1141 MOZ_CONSTEXPR Atomic() : Base() {}
1142 explicit MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
1144 // We provide boolean wrappers for the underlying AtomicBase methods.
1145 operator bool() const
1147 return Base::Intrinsics::load(Base::mValue);
1150 bool operator=(bool aVal)
1152 return Base::operator=(aVal);
1155 bool exchange(bool aVal)
1157 return Base::exchange(aVal);
1160 bool compareExchange(bool aOldValue, bool aNewValue)
1162 return Base::compareExchange(aOldValue, aNewValue);
1165 private:
1166 Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;
1169 } // namespace mozilla
1171 #endif /* mozilla_Atomics_h */