Bug 1104435 part 2 - Make AnimationPlayer derive from nsISupports; r=smaug
[gecko.git] / mfbt / Atomics.h
blobc7f76014276fcf0157878ef296798dc68a643e4c
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * Implements (almost always) lock-free atomic operations. The operations here
9 * are a subset of that which can be found in C++11's <atomic> header, with a
10 * different API to enforce consistent memory ordering constraints.
12 * Anyone caught using |volatile| for inter-thread memory safety needs to be
13 * sent a copy of this header and the C++11 standard.
16 #ifndef mozilla_Atomics_h
17 #define mozilla_Atomics_h
19 #include "mozilla/Assertions.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Compiler.h"
22 #include "mozilla/TypeTraits.h"
24 #include <stdint.h>
27 * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
28 * does not have <atomic>. So be sure to check for <atomic> support
29 * along with C++0x support.
31 #if defined(__clang__) || defined(__GNUC__)
33 * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
34 * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
35 * definitions for unspecialized std::atomic and causes linking errors.
36 * Therefore, we require at least 4.7.0 for using libstdc++.
38 * libc++ <atomic> is only functional with clang.
40 # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
41 # define MOZ_HAVE_CXX11_ATOMICS
42 # elif MOZ_USING_LIBCXX && defined(__clang__)
43 # define MOZ_HAVE_CXX11_ATOMICS
44 # endif
46 * Although Visual Studio 2012's CRT supports <atomic>, its atomic load
47 * implementation unnecessarily uses an atomic intrinsic for the less
48 * restrictive memory orderings, which can be prohibitively expensive.
49 * Therefore, we require at least Visual Studio 2013 for using the CRT
50 * (bug 1061764).
52 #elif defined(_MSC_VER) && _MSC_VER >= 1800
53 # if defined(DEBUG)
55 * Provide our own failure code since we're having trouble linking to
56 * std::_Debug_message (bug 982310).
58 # define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")
59 # endif
60 # define MOZ_HAVE_CXX11_ATOMICS
61 #endif
63 namespace mozilla {
65 /**
66 * An enum of memory ordering possibilities for atomics.
68 * Memory ordering is the observable state of distinct values in memory.
69 * (It's a separate concept from atomicity, which concerns whether an
70 * operation can ever be observed in an intermediate state. Don't
71 * conflate the two!) Given a sequence of operations in source code on
72 * memory, it is *not* always the case that, at all times and on all
73 * cores, those operations will appear to have occurred in that exact
74 * sequence. First, the compiler might reorder that sequence, if it
75 * thinks another ordering will be more efficient. Second, the CPU may
76 * not expose so consistent a view of memory. CPUs will often perform
77 * their own instruction reordering, above and beyond that performed by
78 * the compiler. And each core has its own memory caches, and accesses
79 * (reads and writes both) to "memory" may only resolve to out-of-date
80 * cache entries -- not to the "most recently" performed operation in
81 * some global sense. Any access to a value that may be used by
82 * multiple threads, potentially across multiple cores, must therefore
83 * have a memory ordering imposed on it, for all code on all
84 * threads/cores to have a sufficiently coherent worldview.
86 * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
87 * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
88 * detail on all this, including examples of how each mode works.
90 * Note that for simplicity and practicality, not all of the modes in
91 * C++11 are supported. The missing C++11 modes are either subsumed by
92 * the modes we provide below, or not relevant for the CPUs we support
93 * in Gecko. These three modes are confusing enough as it is!
95 enum MemoryOrdering {
97 * Relaxed ordering is the simplest memory ordering: none at all.
98 * When the result of a write is observed, nothing may be inferred
99 * about other memory. Writes ostensibly performed "before" on the
100 * writing thread may not yet be visible. Writes performed "after" on
101 * the writing thread may already be visible, if the compiler or CPU
102 * reordered them. (The latter can happen if reads and/or writes get
103 * held up in per-processor caches.) Relaxed ordering means
104 * operations can always use cached values (as long as the actual
105 * updates to atomic values actually occur, correctly, eventually), so
106 * it's usually the fastest sort of atomic access. For this reason,
107 * *it's also the most dangerous kind of access*.
109 * Relaxed ordering is good for things like process-wide statistics
110 * counters that don't need to be consistent with anything else, so
111 * long as updates themselves are atomic. (And so long as any
112 * observations of that value can tolerate being out-of-date -- if you
113 * need some sort of up-to-date value, you need some sort of other
114 * synchronizing operation.) It's *not* good for locks, mutexes,
115 * reference counts, etc. that mediate access to other memory, or must
116 * be observably consistent with other memory.
118 * x86 architectures don't take advantage of the optimization
119 * opportunities that relaxed ordering permits. Thus it's possible
120 * that using relaxed ordering will "work" on x86 but fail elsewhere
121 * (ARM, say, which *does* implement non-sequentially-consistent
122 * relaxed ordering semantics). Be extra-careful using relaxed
123 * ordering if you can't easily test non-x86 architectures!
125 Relaxed,
128 * When an atomic value is updated with ReleaseAcquire ordering, and
129 * that new value is observed with ReleaseAcquire ordering, prior
130 * writes (atomic or not) are also observable. What ReleaseAcquire
131 * *doesn't* give you is any observable ordering guarantees for
132 * ReleaseAcquire-ordered operations on different objects. For
133 * example, if there are two cores that each perform ReleaseAcquire
134 * operations on separate objects, each core may or may not observe
135 * the operations made by the other core. The only way the cores can
136 * be synchronized with ReleaseAcquire is if they both
137 * ReleaseAcquire-access the same object. This implies that you can't
138 * necessarily describe some global total ordering of ReleaseAcquire
139 * operations.
141 * ReleaseAcquire ordering is good for (as the name implies) atomic
142 * operations on values controlling ownership of things: reference
143 * counts, mutexes, and the like. However, if you are thinking about
144 * using these to implement your own locks or mutexes, you should take
145 * a good, hard look at actual lock or mutex primitives first.
147 ReleaseAcquire,
150 * When an atomic value is updated with SequentiallyConsistent
151 * ordering, all writes observable when the update is observed, just
152 * as with ReleaseAcquire ordering. But, furthermore, a global total
153 * ordering of SequentiallyConsistent operations *can* be described.
154 * For example, if two cores perform SequentiallyConsistent operations
155 * on separate objects, one core will observably perform its update
156 * (and all previous operations will have completed), then the other
157 * core will observably perform its update (and all previous
158 * operations will have completed). (Although those previous
159 * operations aren't themselves ordered -- they could be intermixed,
160 * or ordered if they occur on atomic values with ordering
161 * requirements.) SequentiallyConsistent is the *simplest and safest*
162 * ordering of atomic operations -- it's always as if one operation
163 * happens, then another, then another, in some order -- and every
164 * core observes updates to happen in that single order. Because it
165 * has the most synchronization requirements, operations ordered this
166 * way also tend to be slowest.
168 * SequentiallyConsistent ordering can be desirable when multiple
169 * threads observe objects, and they all have to agree on the
170 * observable order of changes to them. People expect
171 * SequentiallyConsistent ordering, even if they shouldn't, when
172 * writing code, atomic or otherwise. SequentiallyConsistent is also
173 * the ordering of choice when designing lockless data structures. If
174 * you don't know what order to use, use this one.
176 SequentiallyConsistent,
179 } // namespace mozilla
181 // Build up the underlying intrinsics.
182 #ifdef MOZ_HAVE_CXX11_ATOMICS
184 # include <atomic>
186 namespace mozilla {
187 namespace detail {
190 * We provide CompareExchangeFailureOrder to work around a bug in some
191 * versions of GCC's <atomic> header. See bug 898491.
193 template<MemoryOrdering Order> struct AtomicOrderConstraints;
195 template<>
196 struct AtomicOrderConstraints<Relaxed>
198 static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
199 static const std::memory_order LoadOrder = std::memory_order_relaxed;
200 static const std::memory_order StoreOrder = std::memory_order_relaxed;
201 static const std::memory_order CompareExchangeFailureOrder =
202 std::memory_order_relaxed;
205 template<>
206 struct AtomicOrderConstraints<ReleaseAcquire>
208 static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
209 static const std::memory_order LoadOrder = std::memory_order_acquire;
210 static const std::memory_order StoreOrder = std::memory_order_release;
211 static const std::memory_order CompareExchangeFailureOrder =
212 std::memory_order_acquire;
215 template<>
216 struct AtomicOrderConstraints<SequentiallyConsistent>
218 static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
219 static const std::memory_order LoadOrder = std::memory_order_seq_cst;
220 static const std::memory_order StoreOrder = std::memory_order_seq_cst;
221 static const std::memory_order CompareExchangeFailureOrder =
222 std::memory_order_seq_cst;
225 template<typename T, MemoryOrdering Order>
226 struct IntrinsicBase
228 typedef std::atomic<T> ValueType;
229 typedef AtomicOrderConstraints<Order> OrderedOp;
232 template<typename T, MemoryOrdering Order>
233 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
235 typedef IntrinsicBase<T, Order> Base;
237 static T load(const typename Base::ValueType& aPtr)
239 return aPtr.load(Base::OrderedOp::LoadOrder);
242 static void store(typename Base::ValueType& aPtr, T aVal)
244 aPtr.store(aVal, Base::OrderedOp::StoreOrder);
247 static T exchange(typename Base::ValueType& aPtr, T aVal)
249 return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
252 static bool compareExchange(typename Base::ValueType& aPtr,
253 T aOldVal, T aNewVal)
255 return aPtr.compare_exchange_strong(aOldVal, aNewVal,
256 Base::OrderedOp::AtomicRMWOrder,
257 Base::OrderedOp::CompareExchangeFailureOrder);
261 template<typename T, MemoryOrdering Order>
262 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
264 typedef IntrinsicBase<T, Order> Base;
266 static T add(typename Base::ValueType& aPtr, T aVal)
268 return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
271 static T sub(typename Base::ValueType& aPtr, T aVal)
273 return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
277 template<typename T, MemoryOrdering Order>
278 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
280 typedef IntrinsicBase<T*, Order> Base;
282 static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal)
284 return aPtr.fetch_add(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
287 static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal)
289 return aPtr.fetch_sub(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder);
291 private:
293 * GCC 4.6's <atomic> header has a bug where adding X to an
294 * atomic<T*> is not the same as adding X to a T*. Hence the need
295 * for this function to provide the correct addend.
297 static ptrdiff_t fixupAddend(ptrdiff_t aVal)
299 #if defined(__clang__) || defined(_MSC_VER)
300 return aVal;
301 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
302 !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
303 return aVal * sizeof(T);
304 #else
305 return aVal;
306 #endif
310 template<typename T, MemoryOrdering Order>
311 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
313 typedef IntrinsicBase<T, Order> Base;
315 static T inc(typename Base::ValueType& aPtr)
317 return IntrinsicAddSub<T, Order>::add(aPtr, 1);
320 static T dec(typename Base::ValueType& aPtr)
322 return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
326 template<typename T, MemoryOrdering Order>
327 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
328 public IntrinsicIncDec<T, Order>
330 typedef IntrinsicBase<T, Order> Base;
332 static T or_(typename Base::ValueType& aPtr, T aVal)
334 return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
337 static T xor_(typename Base::ValueType& aPtr, T aVal)
339 return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
342 static T and_(typename Base::ValueType& aPtr, T aVal)
344 return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
348 template<typename T, MemoryOrdering Order>
349 struct AtomicIntrinsics<T*, Order>
350 : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
354 } // namespace detail
355 } // namespace mozilla
357 #elif defined(__GNUC__)
359 namespace mozilla {
360 namespace detail {
363 * The __sync_* family of intrinsics is documented here:
365 * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
367 * While these intrinsics are deprecated in favor of the newer __atomic_*
368 * family of intrincs:
370 * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
372 * any GCC version that supports the __atomic_* intrinsics will also support
373 * the <atomic> header and so will be handled above. We provide a version of
374 * atomics using the __sync_* intrinsics to support older versions of GCC.
376 * All __sync_* intrinsics that we use below act as full memory barriers, for
377 * both compiler and hardware reordering, except for __sync_lock_test_and_set,
378 * which is a only an acquire barrier. When we call __sync_lock_test_and_set,
379 * we add a barrier above it as appropriate.
382 template<MemoryOrdering Order> struct Barrier;
385 * Some processors (in particular, x86) don't require quite so many calls to
386 * __sync_sychronize as our specializations of Barrier produce. If
387 * performance turns out to be an issue, defining these specializations
388 * on a per-processor basis would be a good first tuning step.
391 template<>
392 struct Barrier<Relaxed>
394 static void beforeLoad() {}
395 static void afterLoad() {}
396 static void beforeStore() {}
397 static void afterStore() {}
400 template<>
401 struct Barrier<ReleaseAcquire>
403 static void beforeLoad() {}
404 static void afterLoad() { __sync_synchronize(); }
405 static void beforeStore() { __sync_synchronize(); }
406 static void afterStore() {}
409 template<>
410 struct Barrier<SequentiallyConsistent>
412 static void beforeLoad() { __sync_synchronize(); }
413 static void afterLoad() { __sync_synchronize(); }
414 static void beforeStore() { __sync_synchronize(); }
415 static void afterStore() { __sync_synchronize(); }
418 template<typename T, MemoryOrdering Order>
419 struct IntrinsicMemoryOps
421 static T load(const T& aPtr)
423 Barrier<Order>::beforeLoad();
424 T val = aPtr;
425 Barrier<Order>::afterLoad();
426 return val;
429 static void store(T& aPtr, T aVal)
431 Barrier<Order>::beforeStore();
432 aPtr = aVal;
433 Barrier<Order>::afterStore();
436 static T exchange(T& aPtr, T aVal)
438 // __sync_lock_test_and_set is only an acquire barrier; loads and stores
439 // can't be moved up from after to before it, but they can be moved down
440 // from before to after it. We may want a stricter ordering, so we need
441 // an explicit barrier.
442 Barrier<Order>::beforeStore();
443 return __sync_lock_test_and_set(&aPtr, aVal);
446 static bool compareExchange(T& aPtr, T aOldVal, T aNewVal)
448 return __sync_bool_compare_and_swap(&aPtr, aOldVal, aNewVal);
452 template<typename T>
453 struct IntrinsicAddSub
455 typedef T ValueType;
457 static T add(T& aPtr, T aVal)
459 return __sync_fetch_and_add(&aPtr, aVal);
462 static T sub(T& aPtr, T aVal)
464 return __sync_fetch_and_sub(&aPtr, aVal);
468 template<typename T>
469 struct IntrinsicAddSub<T*>
471 typedef T* ValueType;
474 * The reinterpret_casts are needed so that
475 * __sync_fetch_and_{add,sub} will properly type-check.
477 * Also, these functions do not provide standard semantics for
478 * pointer types, so we need to adjust the addend.
480 static ValueType add(ValueType& aPtr, ptrdiff_t aVal)
482 ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
483 return __sync_fetch_and_add(&aPtr, amount);
486 static ValueType sub(ValueType& aPtr, ptrdiff_t aVal)
488 ValueType amount = reinterpret_cast<ValueType>(aVal * sizeof(T));
489 return __sync_fetch_and_sub(&aPtr, amount);
493 template<typename T>
494 struct IntrinsicIncDec : public IntrinsicAddSub<T>
496 static T inc(T& aPtr) { return IntrinsicAddSub<T>::add(aPtr, 1); }
497 static T dec(T& aPtr) { return IntrinsicAddSub<T>::sub(aPtr, 1); }
500 template<typename T, MemoryOrdering Order>
501 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
502 public IntrinsicIncDec<T>
504 static T or_( T& aPtr, T aVal) { return __sync_fetch_and_or(&aPtr, aVal); }
505 static T xor_(T& aPtr, T aVal) { return __sync_fetch_and_xor(&aPtr, aVal); }
506 static T and_(T& aPtr, T aVal) { return __sync_fetch_and_and(&aPtr, aVal); }
509 template<typename T, MemoryOrdering Order>
510 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
511 public IntrinsicIncDec<T*>
515 } // namespace detail
516 } // namespace mozilla
518 #elif defined(_MSC_VER)
521 * Windows comes with a full complement of atomic operations.
522 * Unfortunately, most of those aren't available for Windows XP (even if
523 * the compiler supports intrinsics for them), which is the oldest
524 * version of Windows we support. Therefore, we only provide operations
525 * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
526 * versions, we support 64-bit datatypes as well.
529 # include <intrin.h>
531 # pragma intrinsic(_InterlockedExchangeAdd)
532 # pragma intrinsic(_InterlockedOr)
533 # pragma intrinsic(_InterlockedXor)
534 # pragma intrinsic(_InterlockedAnd)
535 # pragma intrinsic(_InterlockedExchange)
536 # pragma intrinsic(_InterlockedCompareExchange)
538 namespace mozilla {
539 namespace detail {
541 # if !defined(_M_IX86) && !defined(_M_X64)
543 * The implementations below are optimized for x86ish systems. You
544 * will have to modify them if you are porting to Windows on a
545 * different architecture.
547 # error "Unknown CPU type"
548 # endif
551 * The PrimitiveIntrinsics template should define |Type|, the datatype of size
552 * DataSize upon which we operate, and the following eight functions.
554 * static Type add(Type* aPtr, Type aVal);
555 * static Type sub(Type* aPtr, Type aVal);
556 * static Type or_(Type* aPtr, Type aVal);
557 * static Type xor_(Type* aPtr, Type aVal);
558 * static Type and_(Type* aPtr, Type aVal);
560 * These functions perform the obvious operation on the value contained in
561 * |*aPtr| combined with |aVal| and return the value previously stored in
562 * |*aPtr|.
564 * static void store(Type* aPtr, Type aVal);
566 * This function atomically stores |aVal| into |*aPtr| and must provide a full
567 * memory fence after the store to prevent compiler and hardware instruction
568 * reordering. It should also act as a compiler barrier to prevent reads and
569 * writes from moving to after the store.
571 * static Type exchange(Type* aPtr, Type aVal);
573 * This function atomically stores |aVal| into |*aPtr| and returns the
574 * previous contents of |*aPtr|;
576 * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal);
578 * This function atomically performs the following operation:
580 * if (*aPtr == aOldVal) {
581 * *aPtr = aNewVal;
582 * return true;
583 * } else {
584 * return false;
588 template<size_t DataSize> struct PrimitiveIntrinsics;
590 template<>
591 struct PrimitiveIntrinsics<4>
593 typedef long Type;
595 static Type add(Type* aPtr, Type aVal)
597 return _InterlockedExchangeAdd(aPtr, aVal);
600 static Type sub(Type* aPtr, Type aVal)
603 * _InterlockedExchangeSubtract isn't available before Windows 7,
604 * and we must support Windows XP.
606 return _InterlockedExchangeAdd(aPtr, -aVal);
609 static Type or_(Type* aPtr, Type aVal)
611 return _InterlockedOr(aPtr, aVal);
614 static Type xor_(Type* aPtr, Type aVal)
616 return _InterlockedXor(aPtr, aVal);
619 static Type and_(Type* aPtr, Type aVal)
621 return _InterlockedAnd(aPtr, aVal);
624 static void store(Type* aPtr, Type aVal)
626 _InterlockedExchange(aPtr, aVal);
629 static Type exchange(Type* aPtr, Type aVal)
631 return _InterlockedExchange(aPtr, aVal);
634 static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
636 return _InterlockedCompareExchange(aPtr, aNewVal, aOldVal) == aOldVal;
640 # if defined(_M_X64)
642 # pragma intrinsic(_InterlockedExchangeAdd64)
643 # pragma intrinsic(_InterlockedOr64)
644 # pragma intrinsic(_InterlockedXor64)
645 # pragma intrinsic(_InterlockedAnd64)
646 # pragma intrinsic(_InterlockedExchange64)
647 # pragma intrinsic(_InterlockedCompareExchange64)
649 template <>
650 struct PrimitiveIntrinsics<8>
652 typedef __int64 Type;
654 static Type add(Type* aPtr, Type aVal)
656 return _InterlockedExchangeAdd64(aPtr, aVal);
659 static Type sub(Type* aPtr, Type aVal)
662 * There is no _InterlockedExchangeSubtract64.
664 return _InterlockedExchangeAdd64(aPtr, -aVal);
667 static Type or_(Type* aPtr, Type aVal)
669 return _InterlockedOr64(aPtr, aVal);
672 static Type xor_(Type* aPtr, Type aVal)
674 return _InterlockedXor64(aPtr, aVal);
677 static Type and_(Type* aPtr, Type aVal)
679 return _InterlockedAnd64(aPtr, aVal);
682 static void store(Type* aPtr, Type aVal)
684 _InterlockedExchange64(aPtr, aVal);
687 static Type exchange(Type* aPtr, Type aVal)
689 return _InterlockedExchange64(aPtr, aVal);
692 static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal)
694 return _InterlockedCompareExchange64(aPtr, aNewVal, aOldVal) == aOldVal;
698 # endif
700 # pragma intrinsic(_ReadWriteBarrier)
702 template<MemoryOrdering Order> struct Barrier;
705 * We do not provide an afterStore method in Barrier, as Relaxed and
706 * ReleaseAcquire orderings do not require one, and the required barrier
707 * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
710 template<>
711 struct Barrier<Relaxed>
713 static void beforeLoad() {}
714 static void afterLoad() {}
715 static void beforeStore() {}
718 template<>
719 struct Barrier<ReleaseAcquire>
721 static void beforeLoad() {}
722 static void afterLoad() { _ReadWriteBarrier(); }
723 static void beforeStore() { _ReadWriteBarrier(); }
726 template<>
727 struct Barrier<SequentiallyConsistent>
729 static void beforeLoad() { _ReadWriteBarrier(); }
730 static void afterLoad() { _ReadWriteBarrier(); }
731 static void beforeStore() { _ReadWriteBarrier(); }
734 template<typename PrimType, typename T>
735 struct CastHelper
737 static PrimType toPrimType(T aVal) { return static_cast<PrimType>(aVal); }
738 static T fromPrimType(PrimType aVal) { return static_cast<T>(aVal); }
741 template<typename PrimType, typename T>
742 struct CastHelper<PrimType, T*>
744 static PrimType toPrimType(T* aVal) { return reinterpret_cast<PrimType>(aVal); }
745 static T* fromPrimType(PrimType aVal) { return reinterpret_cast<T*>(aVal); }
748 template<typename T>
749 struct IntrinsicBase
751 typedef T ValueType;
752 typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
753 typedef typename Primitives::Type PrimType;
754 static_assert(sizeof(PrimType) == sizeof(T),
755 "Selection of PrimitiveIntrinsics was wrong");
756 typedef CastHelper<PrimType, T> Cast;
759 template<typename T, MemoryOrdering Order>
760 struct IntrinsicMemoryOps : public IntrinsicBase<T>
762 typedef typename IntrinsicBase<T>::ValueType ValueType;
763 typedef typename IntrinsicBase<T>::Primitives Primitives;
764 typedef typename IntrinsicBase<T>::PrimType PrimType;
765 typedef typename IntrinsicBase<T>::Cast Cast;
767 static ValueType load(const ValueType& aPtr)
769 Barrier<Order>::beforeLoad();
770 ValueType val = aPtr;
771 Barrier<Order>::afterLoad();
772 return val;
775 static void store(ValueType& aPtr, ValueType aVal)
777 // For SequentiallyConsistent, Primitives::store() will generate the
778 // proper memory fence. Everything else just needs a barrier before
779 // the store.
780 if (Order == SequentiallyConsistent) {
781 Primitives::store(reinterpret_cast<PrimType*>(&aPtr),
782 Cast::toPrimType(aVal));
783 } else {
784 Barrier<Order>::beforeStore();
785 aPtr = aVal;
789 static ValueType exchange(ValueType& aPtr, ValueType aVal)
791 PrimType oldval =
792 Primitives::exchange(reinterpret_cast<PrimType*>(&aPtr),
793 Cast::toPrimType(aVal));
794 return Cast::fromPrimType(oldval);
797 static bool compareExchange(ValueType& aPtr, ValueType aOldVal,
798 ValueType aNewVal)
800 return Primitives::compareExchange(reinterpret_cast<PrimType*>(&aPtr),
801 Cast::toPrimType(aOldVal),
802 Cast::toPrimType(aNewVal));
806 template<typename T>
807 struct IntrinsicApplyHelper : public IntrinsicBase<T>
809 typedef typename IntrinsicBase<T>::ValueType ValueType;
810 typedef typename IntrinsicBase<T>::PrimType PrimType;
811 typedef typename IntrinsicBase<T>::Cast Cast;
812 typedef PrimType (*BinaryOp)(PrimType*, PrimType);
813 typedef PrimType (*UnaryOp)(PrimType*);
815 static ValueType applyBinaryFunction(BinaryOp aOp, ValueType& aPtr,
816 ValueType aVal)
818 PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
819 PrimType primTypeVal = Cast::toPrimType(aVal);
820 return Cast::fromPrimType(aOp(primTypePtr, primTypeVal));
823 static ValueType applyUnaryFunction(UnaryOp aOp, ValueType& aPtr)
825 PrimType* primTypePtr = reinterpret_cast<PrimType*>(&aPtr);
826 return Cast::fromPrimType(aOp(primTypePtr));
830 template<typename T>
831 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
833 typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
834 typedef typename IntrinsicBase<T>::Primitives Primitives;
836 static ValueType add(ValueType& aPtr, ValueType aVal)
838 return applyBinaryFunction(&Primitives::add, aPtr, aVal);
841 static ValueType sub(ValueType& aPtr, ValueType aVal)
843 return applyBinaryFunction(&Primitives::sub, aPtr, aVal);
847 template<typename T>
848 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
850 typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
851 typedef typename IntrinsicBase<T*>::Primitives Primitives;
853 static ValueType add(ValueType& aPtr, ptrdiff_t aAmount)
855 return applyBinaryFunction(&Primitives::add, aPtr,
856 (ValueType)(aAmount * sizeof(T)));
859 static ValueType sub(ValueType& aPtr, ptrdiff_t aAmount)
861 return applyBinaryFunction(&Primitives::sub, aPtr,
862 (ValueType)(aAmount * sizeof(T)));
866 template<typename T>
867 struct IntrinsicIncDec : public IntrinsicAddSub<T>
869 typedef typename IntrinsicAddSub<T>::ValueType ValueType;
870 static ValueType inc(ValueType& aPtr) { return add(aPtr, 1); }
871 static ValueType dec(ValueType& aPtr) { return sub(aPtr, 1); }
874 template<typename T, MemoryOrdering Order>
875 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
876 public IntrinsicIncDec<T>
878 typedef typename IntrinsicIncDec<T>::ValueType ValueType;
879 typedef typename IntrinsicBase<T>::Primitives Primitives;
881 static ValueType or_(ValueType& aPtr, T aVal)
883 return applyBinaryFunction(&Primitives::or_, aPtr, aVal);
886 static ValueType xor_(ValueType& aPtr, T aVal)
888 return applyBinaryFunction(&Primitives::xor_, aPtr, aVal);
891 static ValueType and_(ValueType& aPtr, T aVal)
893 return applyBinaryFunction(&Primitives::and_, aPtr, aVal);
897 template<typename T, MemoryOrdering Order>
898 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
899 public IntrinsicIncDec<T*>
901 typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
902 // This is required to make us be able to build with MSVC10, for unknown
903 // reasons.
904 typedef typename IntrinsicBase<T*>::Primitives Primitives;
907 } // namespace detail
908 } // namespace mozilla
910 #else
911 # error "Atomic compiler intrinsics are not supported on your platform"
912 #endif
914 namespace mozilla {
916 namespace detail {
918 template<typename T, MemoryOrdering Order>
919 class AtomicBase
921 // We only support 32-bit types on 32-bit Windows, which constrains our
922 // implementation elsewhere. But we support pointer-sized types everywhere.
923 static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
924 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
926 protected:
927 typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
928 typename Intrinsics::ValueType mValue;
930 public:
931 MOZ_CONSTEXPR AtomicBase() : mValue() {}
932 explicit MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
934 // Note: we can't provide operator T() here because Atomic<bool> inherits
935 // from AtomcBase with T=uint32_t and not T=bool. If we implemented
936 // operator T() here, it would cause errors when comparing Atomic<bool> with
937 // a regular bool.
939 T operator=(T aVal)
941 Intrinsics::store(mValue, aVal);
942 return aVal;
946 * Performs an atomic swap operation. aVal is stored and the previous
947 * value of this variable is returned.
949 T exchange(T aVal)
951 return Intrinsics::exchange(mValue, aVal);
955 * Performs an atomic compare-and-swap operation and returns true if it
956 * succeeded. This is equivalent to atomically doing
958 * if (mValue == aOldValue) {
959 * mValue = aNewValue;
960 * return true;
961 * } else {
962 * return false;
965 bool compareExchange(T aOldValue, T aNewValue)
967 return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
970 private:
971 template<MemoryOrdering AnyOrder>
972 AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
975 template<typename T, MemoryOrdering Order>
976 class AtomicBaseIncDec : public AtomicBase<T, Order>
978 typedef typename detail::AtomicBase<T, Order> Base;
980 public:
981 MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
982 explicit MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
984 using Base::operator=;
986 operator T() const { return Base::Intrinsics::load(Base::mValue); }
987 T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
988 T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
989 T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
990 T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
992 private:
993 template<MemoryOrdering AnyOrder>
994 AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
997 } // namespace detail
1000 * A wrapper for a type that enforces that all memory accesses are atomic.
1002 * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
1003 * its place. Implementations for integral and pointer types are provided
1004 * below.
1006 * Atomic accesses are sequentially consistent by default. You should
1007 * use the default unless you are tall enough to ride the
1008 * memory-ordering roller coaster (if you're not sure, you aren't) and
1009 * you have a compelling reason to do otherwise.
1011 * There is one exception to the case of atomic memory accesses: providing an
1012 * initial value of the atomic value is not guaranteed to be atomic. This is a
1013 * deliberate design choice that enables static atomic variables to be declared
1014 * without introducing extra static constructors.
1016 template<typename T,
1017 MemoryOrdering Order = SequentiallyConsistent,
1018 typename Enable = void>
1019 class Atomic;
1022 * Atomic<T> implementation for integral types.
1024 * In addition to atomic store and load operations, compound assignment and
1025 * increment/decrement operators are implemented which perform the
1026 * corresponding read-modify-write operation atomically. Finally, an atomic
1027 * swap method is provided.
1029 template<typename T, MemoryOrdering Order>
1030 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value &&
1031 !IsSame<T, bool>::value>::Type>
1032 : public detail::AtomicBaseIncDec<T, Order>
1034 typedef typename detail::AtomicBaseIncDec<T, Order> Base;
1036 public:
1037 MOZ_CONSTEXPR Atomic() : Base() {}
1038 explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1040 using Base::operator=;
1042 T operator+=(T aDelta)
1044 return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1047 T operator-=(T aDelta)
1049 return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1052 T operator|=(T aVal)
1054 return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
1057 T operator^=(T aVal)
1059 return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
1062 T operator&=(T aVal)
1064 return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
1067 private:
1068 Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1072 * Atomic<T> implementation for pointer types.
1074 * An atomic compare-and-swap primitive for pointer variables is provided, as
1075 * are atomic increment and decement operators. Also provided are the compound
1076 * assignment operators for addition and subtraction. Atomic swap (via
1077 * exchange()) is included as well.
1079 template<typename T, MemoryOrdering Order>
1080 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
1082 typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
1084 public:
1085 MOZ_CONSTEXPR Atomic() : Base() {}
1086 explicit MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
1088 using Base::operator=;
1090 T* operator+=(ptrdiff_t aDelta)
1092 return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
1095 T* operator-=(ptrdiff_t aDelta)
1097 return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
1100 private:
1101 Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
1105 * Atomic<T> implementation for enum types.
1107 * The atomic store and load operations and the atomic swap method is provided.
1109 template<typename T, MemoryOrdering Order>
1110 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
1111 : public detail::AtomicBase<T, Order>
1113 typedef typename detail::AtomicBase<T, Order> Base;
1115 public:
1116 MOZ_CONSTEXPR Atomic() : Base() {}
1117 explicit MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1119 operator T() const { return Base::Intrinsics::load(Base::mValue); }
1121 using Base::operator=;
1123 private:
1124 Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1128 * Atomic<T> implementation for boolean types.
1130 * The atomic store and load operations and the atomic swap method is provided.
1132 * Note:
1134 * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1135 * bool and/or some implementations of std::atomic. This is allowed in
1136 * [atomic.types.generic]p9.
1138 * - It's not obvious whether the 8-bit atomic functions on Windows are always
1139 * inlined or not. If they are not inlined, the corresponding functions in the
1140 * runtime library are not available on Windows XP. This is why we implement
1141 * Atomic<bool> with an underlying type of uint32_t.
1143 template<MemoryOrdering Order>
1144 class Atomic<bool, Order>
1145 : protected detail::AtomicBase<uint32_t, Order>
1147 typedef typename detail::AtomicBase<uint32_t, Order> Base;
1149 public:
1150 MOZ_CONSTEXPR Atomic() : Base() {}
1151 explicit MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
1153 // We provide boolean wrappers for the underlying AtomicBase methods.
1154 operator bool() const
1156 return Base::Intrinsics::load(Base::mValue);
1159 bool operator=(bool aVal)
1161 return Base::operator=(aVal);
1164 bool exchange(bool aVal)
1166 return Base::exchange(aVal);
1169 bool compareExchange(bool aOldValue, bool aNewValue)
1171 return Base::compareExchange(aOldValue, aNewValue);
1174 private:
1175 Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;
1178 } // namespace mozilla
1180 #endif /* mozilla_Atomics_h */