1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * Implements (almost always) lock-free atomic operations. The operations here
9 * are a subset of that which can be found in C++11's <atomic> header, with a
10 * different API to enforce consistent memory ordering constraints.
12 * Anyone caught using |volatile| for inter-thread memory safety needs to be
13 * sent a copy of this header and the C++11 standard.
16 #ifndef mozilla_Atomics_h
17 #define mozilla_Atomics_h
19 #include "mozilla/Assertions.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Compiler.h"
22 #include "mozilla/TypeTraits.h"
27 * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
28 * does not have <atomic>. So be sure to check for <atomic> support
29 * along with C++0x support.
31 #if defined(__clang__) || defined(__GNUC__)
33 * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
34 * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
35 * definitions for unspecialized std::atomic and causes linking errors.
36 * Therefore, we require at least 4.7.0 for using libstdc++.
38 * libc++ <atomic> is only functional with clang.
40 # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
41 # define MOZ_HAVE_CXX11_ATOMICS
42 # elif MOZ_USING_LIBCXX && defined(__clang__)
43 # define MOZ_HAVE_CXX11_ATOMICS
45 #elif defined(_MSC_VER)
46 # define MOZ_HAVE_CXX11_ATOMICS
52 * An enum of memory ordering possibilities for atomics.
54 * Memory ordering is the observable state of distinct values in memory.
55 * (It's a separate concept from atomicity, which concerns whether an
56 * operation can ever be observed in an intermediate state. Don't
57 * conflate the two!) Given a sequence of operations in source code on
58 * memory, it is *not* always the case that, at all times and on all
59 * cores, those operations will appear to have occurred in that exact
60 * sequence. First, the compiler might reorder that sequence, if it
61 * thinks another ordering will be more efficient. Second, the CPU may
62 * not expose so consistent a view of memory. CPUs will often perform
63 * their own instruction reordering, above and beyond that performed by
64 * the compiler. And each core has its own memory caches, and accesses
65 * (reads and writes both) to "memory" may only resolve to out-of-date
66 * cache entries -- not to the "most recently" performed operation in
67 * some global sense. Any access to a value that may be used by
68 * multiple threads, potentially across multiple cores, must therefore
69 * have a memory ordering imposed on it, for all code on all
70 * threads/cores to have a sufficiently coherent worldview.
72 * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
73 * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
74 * detail on all this, including examples of how each mode works.
76 * Note that for simplicity and practicality, not all of the modes in
77 * C++11 are supported. The missing C++11 modes are either subsumed by
78 * the modes we provide below, or not relevant for the CPUs we support
79 * in Gecko. These three modes are confusing enough as it is!
83 * Relaxed ordering is the simplest memory ordering: none at all.
84 * When the result of a write is observed, nothing may be inferred
85 * about other memory. Writes ostensibly performed "before" on the
86 * writing thread may not yet be visible. Writes performed "after" on
87 * the writing thread may already be visible, if the compiler or CPU
88 * reordered them. (The latter can happen if reads and/or writes get
89 * held up in per-processor caches.) Relaxed ordering means
90 * operations can always use cached values (as long as the actual
91 * updates to atomic values actually occur, correctly, eventually), so
92 * it's usually the fastest sort of atomic access. For this reason,
93 * *it's also the most dangerous kind of access*.
95 * Relaxed ordering is good for things like process-wide statistics
96 * counters that don't need to be consistent with anything else, so
97 * long as updates themselves are atomic. (And so long as any
98 * observations of that value can tolerate being out-of-date -- if you
99 * need some sort of up-to-date value, you need some sort of other
100 * synchronizing operation.) It's *not* good for locks, mutexes,
101 * reference counts, etc. that mediate access to other memory, or must
102 * be observably consistent with other memory.
104 * x86 architectures don't take advantage of the optimization
105 * opportunities that relaxed ordering permits. Thus it's possible
106 * that using relaxed ordering will "work" on x86 but fail elsewhere
107 * (ARM, say, which *does* implement non-sequentially-consistent
108 * relaxed ordering semantics). Be extra-careful using relaxed
109 * ordering if you can't easily test non-x86 architectures!
114 * When an atomic value is updated with ReleaseAcquire ordering, and
115 * that new value is observed with ReleaseAcquire ordering, prior
116 * writes (atomic or not) are also observable. What ReleaseAcquire
117 * *doesn't* give you is any observable ordering guarantees for
118 * ReleaseAcquire-ordered operations on different objects. For
119 * example, if there are two cores that each perform ReleaseAcquire
120 * operations on separate objects, each core may or may not observe
121 * the operations made by the other core. The only way the cores can
122 * be synchronized with ReleaseAcquire is if they both
123 * ReleaseAcquire-access the same object. This implies that you can't
124 * necessarily describe some global total ordering of ReleaseAcquire
127 * ReleaseAcquire ordering is good for (as the name implies) atomic
128 * operations on values controlling ownership of things: reference
129 * counts, mutexes, and the like. However, if you are thinking about
130 * using these to implement your own locks or mutexes, you should take
131 * a good, hard look at actual lock or mutex primitives first.
136 * When an atomic value is updated with SequentiallyConsistent
137 * ordering, all writes observable when the update is observed, just
138 * as with ReleaseAcquire ordering. But, furthermore, a global total
139 * ordering of SequentiallyConsistent operations *can* be described.
140 * For example, if two cores perform SequentiallyConsistent operations
141 * on separate objects, one core will observably perform its update
142 * (and all previous operations will have completed), then the other
143 * core will observably perform its update (and all previous
144 * operations will have completed). (Although those previous
145 * operations aren't themselves ordered -- they could be intermixed,
146 * or ordered if they occur on atomic values with ordering
147 * requirements.) SequentiallyConsistent is the *simplest and safest*
148 * ordering of atomic operations -- it's always as if one operation
149 * happens, then another, then another, in some order -- and every
150 * core observes updates to happen in that single order. Because it
151 * has the most synchronization requirements, operations ordered this
152 * way also tend to be slowest.
154 * SequentiallyConsistent ordering can be desirable when multiple
155 * threads observe objects, and they all have to agree on the
156 * observable order of changes to them. People expect
157 * SequentiallyConsistent ordering, even if they shouldn't, when
158 * writing code, atomic or otherwise. SequentiallyConsistent is also
159 * the ordering of choice when designing lockless data structures. If
160 * you don't know what order to use, use this one.
162 SequentiallyConsistent
,
165 } // namespace mozilla
167 // Build up the underlying intrinsics.
168 #ifdef MOZ_HAVE_CXX11_ATOMICS
176 * We provide CompareExchangeFailureOrder to work around a bug in some
177 * versions of GCC's <atomic> header. See bug 898491.
179 template<MemoryOrdering Order
> struct AtomicOrderConstraints
;
182 struct AtomicOrderConstraints
<Relaxed
>
184 static const std::memory_order AtomicRMWOrder
= std::memory_order_relaxed
;
185 static const std::memory_order LoadOrder
= std::memory_order_relaxed
;
186 static const std::memory_order StoreOrder
= std::memory_order_relaxed
;
187 static const std::memory_order CompareExchangeFailureOrder
=
188 std::memory_order_relaxed
;
192 struct AtomicOrderConstraints
<ReleaseAcquire
>
194 static const std::memory_order AtomicRMWOrder
= std::memory_order_acq_rel
;
195 static const std::memory_order LoadOrder
= std::memory_order_acquire
;
196 static const std::memory_order StoreOrder
= std::memory_order_release
;
197 static const std::memory_order CompareExchangeFailureOrder
=
198 std::memory_order_acquire
;
202 struct AtomicOrderConstraints
<SequentiallyConsistent
>
204 static const std::memory_order AtomicRMWOrder
= std::memory_order_seq_cst
;
205 static const std::memory_order LoadOrder
= std::memory_order_seq_cst
;
206 static const std::memory_order StoreOrder
= std::memory_order_seq_cst
;
207 static const std::memory_order CompareExchangeFailureOrder
=
208 std::memory_order_seq_cst
;
211 template<typename T
, MemoryOrdering Order
>
214 typedef std::atomic
<T
> ValueType
;
215 typedef AtomicOrderConstraints
<Order
> OrderedOp
;
218 template<typename T
, MemoryOrdering Order
>
219 struct IntrinsicMemoryOps
: public IntrinsicBase
<T
, Order
>
221 typedef IntrinsicBase
<T
, Order
> Base
;
223 static T
load(const typename
Base::ValueType
& aPtr
)
225 return aPtr
.load(Base::OrderedOp::LoadOrder
);
228 static void store(typename
Base::ValueType
& aPtr
, T aVal
)
230 aPtr
.store(aVal
, Base::OrderedOp::StoreOrder
);
233 static T
exchange(typename
Base::ValueType
& aPtr
, T aVal
)
235 return aPtr
.exchange(aVal
, Base::OrderedOp::AtomicRMWOrder
);
238 static bool compareExchange(typename
Base::ValueType
& aPtr
,
239 T aOldVal
, T aNewVal
)
241 return aPtr
.compare_exchange_strong(aOldVal
, aNewVal
,
242 Base::OrderedOp::AtomicRMWOrder
,
243 Base::OrderedOp::CompareExchangeFailureOrder
);
247 template<typename T
, MemoryOrdering Order
>
248 struct IntrinsicAddSub
: public IntrinsicBase
<T
, Order
>
250 typedef IntrinsicBase
<T
, Order
> Base
;
252 static T
add(typename
Base::ValueType
& aPtr
, T aVal
)
254 return aPtr
.fetch_add(aVal
, Base::OrderedOp::AtomicRMWOrder
);
257 static T
sub(typename
Base::ValueType
& aPtr
, T aVal
)
259 return aPtr
.fetch_sub(aVal
, Base::OrderedOp::AtomicRMWOrder
);
263 template<typename T
, MemoryOrdering Order
>
264 struct IntrinsicAddSub
<T
*, Order
> : public IntrinsicBase
<T
*, Order
>
266 typedef IntrinsicBase
<T
*, Order
> Base
;
268 static T
* add(typename
Base::ValueType
& aPtr
, ptrdiff_t aVal
)
270 return aPtr
.fetch_add(fixupAddend(aVal
), Base::OrderedOp::AtomicRMWOrder
);
273 static T
* sub(typename
Base::ValueType
& aPtr
, ptrdiff_t aVal
)
275 return aPtr
.fetch_sub(fixupAddend(aVal
), Base::OrderedOp::AtomicRMWOrder
);
279 * GCC 4.6's <atomic> header has a bug where adding X to an
280 * atomic<T*> is not the same as adding X to a T*. Hence the need
281 * for this function to provide the correct addend.
283 static ptrdiff_t fixupAddend(ptrdiff_t aVal
)
285 #if defined(__clang__) || defined(_MSC_VER)
287 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
288 !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
289 return aVal
* sizeof(T
);
296 template<typename T
, MemoryOrdering Order
>
297 struct IntrinsicIncDec
: public IntrinsicAddSub
<T
, Order
>
299 typedef IntrinsicBase
<T
, Order
> Base
;
301 static T
inc(typename
Base::ValueType
& aPtr
)
303 return IntrinsicAddSub
<T
, Order
>::add(aPtr
, 1);
306 static T
dec(typename
Base::ValueType
& aPtr
)
308 return IntrinsicAddSub
<T
, Order
>::sub(aPtr
, 1);
312 template<typename T
, MemoryOrdering Order
>
313 struct AtomicIntrinsics
: public IntrinsicMemoryOps
<T
, Order
>,
314 public IntrinsicIncDec
<T
, Order
>
316 typedef IntrinsicBase
<T
, Order
> Base
;
318 static T
or_(typename
Base::ValueType
& aPtr
, T aVal
)
320 return aPtr
.fetch_or(aVal
, Base::OrderedOp::AtomicRMWOrder
);
323 static T
xor_(typename
Base::ValueType
& aPtr
, T aVal
)
325 return aPtr
.fetch_xor(aVal
, Base::OrderedOp::AtomicRMWOrder
);
328 static T
and_(typename
Base::ValueType
& aPtr
, T aVal
)
330 return aPtr
.fetch_and(aVal
, Base::OrderedOp::AtomicRMWOrder
);
334 template<typename T
, MemoryOrdering Order
>
335 struct AtomicIntrinsics
<T
*, Order
>
336 : public IntrinsicMemoryOps
<T
*, Order
>, public IntrinsicIncDec
<T
*, Order
>
340 } // namespace detail
341 } // namespace mozilla
343 #elif defined(__GNUC__)
349 * The __sync_* family of intrinsics is documented here:
351 * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
353 * While these intrinsics are deprecated in favor of the newer __atomic_*
354 * family of intrincs:
356 * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
358 * any GCC version that supports the __atomic_* intrinsics will also support
359 * the <atomic> header and so will be handled above. We provide a version of
360 * atomics using the __sync_* intrinsics to support older versions of GCC.
362 * All __sync_* intrinsics that we use below act as full memory barriers, for
363 * both compiler and hardware reordering, except for __sync_lock_test_and_set,
364 * which is a only an acquire barrier. When we call __sync_lock_test_and_set,
365 * we add a barrier above it as appropriate.
368 template<MemoryOrdering Order
> struct Barrier
;
371 * Some processors (in particular, x86) don't require quite so many calls to
372 * __sync_sychronize as our specializations of Barrier produce. If
373 * performance turns out to be an issue, defining these specializations
374 * on a per-processor basis would be a good first tuning step.
378 struct Barrier
<Relaxed
>
380 static void beforeLoad() {}
381 static void afterLoad() {}
382 static void beforeStore() {}
383 static void afterStore() {}
387 struct Barrier
<ReleaseAcquire
>
389 static void beforeLoad() {}
390 static void afterLoad() { __sync_synchronize(); }
391 static void beforeStore() { __sync_synchronize(); }
392 static void afterStore() {}
396 struct Barrier
<SequentiallyConsistent
>
398 static void beforeLoad() { __sync_synchronize(); }
399 static void afterLoad() { __sync_synchronize(); }
400 static void beforeStore() { __sync_synchronize(); }
401 static void afterStore() { __sync_synchronize(); }
404 template<typename T
, MemoryOrdering Order
>
405 struct IntrinsicMemoryOps
407 static T
load(const T
& aPtr
)
409 Barrier
<Order
>::beforeLoad();
411 Barrier
<Order
>::afterLoad();
415 static void store(T
& aPtr
, T aVal
)
417 Barrier
<Order
>::beforeStore();
419 Barrier
<Order
>::afterStore();
422 static T
exchange(T
& aPtr
, T aVal
)
424 // __sync_lock_test_and_set is only an acquire barrier; loads and stores
425 // can't be moved up from after to before it, but they can be moved down
426 // from before to after it. We may want a stricter ordering, so we need
427 // an explicit barrier.
428 Barrier
<Order
>::beforeStore();
429 return __sync_lock_test_and_set(&aPtr
, aVal
);
432 static bool compareExchange(T
& aPtr
, T aOldVal
, T aNewVal
)
434 return __sync_bool_compare_and_swap(&aPtr
, aOldVal
, aNewVal
);
439 struct IntrinsicAddSub
443 static T
add(T
& aPtr
, T aVal
)
445 return __sync_fetch_and_add(&aPtr
, aVal
);
448 static T
sub(T
& aPtr
, T aVal
)
450 return __sync_fetch_and_sub(&aPtr
, aVal
);
455 struct IntrinsicAddSub
<T
*>
457 typedef T
* ValueType
;
460 * The reinterpret_casts are needed so that
461 * __sync_fetch_and_{add,sub} will properly type-check.
463 * Also, these functions do not provide standard semantics for
464 * pointer types, so we need to adjust the addend.
466 static ValueType
add(ValueType
& aPtr
, ptrdiff_t aVal
)
468 ValueType amount
= reinterpret_cast<ValueType
>(aVal
* sizeof(T
));
469 return __sync_fetch_and_add(&aPtr
, amount
);
472 static ValueType
sub(ValueType
& aPtr
, ptrdiff_t aVal
)
474 ValueType amount
= reinterpret_cast<ValueType
>(aVal
* sizeof(T
));
475 return __sync_fetch_and_sub(&aPtr
, amount
);
480 struct IntrinsicIncDec
: public IntrinsicAddSub
<T
>
482 static T
inc(T
& aPtr
) { return IntrinsicAddSub
<T
>::add(aPtr
, 1); }
483 static T
dec(T
& aPtr
) { return IntrinsicAddSub
<T
>::sub(aPtr
, 1); }
486 template<typename T
, MemoryOrdering Order
>
487 struct AtomicIntrinsics
: public IntrinsicMemoryOps
<T
, Order
>,
488 public IntrinsicIncDec
<T
>
490 static T
or_( T
& aPtr
, T aVal
) { return __sync_fetch_and_or(&aPtr
, aVal
); }
491 static T
xor_(T
& aPtr
, T aVal
) { return __sync_fetch_and_xor(&aPtr
, aVal
); }
492 static T
and_(T
& aPtr
, T aVal
) { return __sync_fetch_and_and(&aPtr
, aVal
); }
495 template<typename T
, MemoryOrdering Order
>
496 struct AtomicIntrinsics
<T
*, Order
> : public IntrinsicMemoryOps
<T
*, Order
>,
497 public IntrinsicIncDec
<T
*>
501 } // namespace detail
502 } // namespace mozilla
504 #elif defined(_MSC_VER)
507 * Windows comes with a full complement of atomic operations.
508 * Unfortunately, most of those aren't available for Windows XP (even if
509 * the compiler supports intrinsics for them), which is the oldest
510 * version of Windows we support. Therefore, we only provide operations
511 * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
512 * versions, we support 64-bit datatypes as well.
517 # pragma intrinsic(_InterlockedExchangeAdd)
518 # pragma intrinsic(_InterlockedOr)
519 # pragma intrinsic(_InterlockedXor)
520 # pragma intrinsic(_InterlockedAnd)
521 # pragma intrinsic(_InterlockedExchange)
522 # pragma intrinsic(_InterlockedCompareExchange)
527 # if !defined(_M_IX86) && !defined(_M_X64)
529 * The implementations below are optimized for x86ish systems. You
530 * will have to modify them if you are porting to Windows on a
531 * different architecture.
533 # error "Unknown CPU type"
537 * The PrimitiveIntrinsics template should define |Type|, the datatype of size
538 * DataSize upon which we operate, and the following eight functions.
540 * static Type add(Type* aPtr, Type aVal);
541 * static Type sub(Type* aPtr, Type aVal);
542 * static Type or_(Type* aPtr, Type aVal);
543 * static Type xor_(Type* aPtr, Type aVal);
544 * static Type and_(Type* aPtr, Type aVal);
546 * These functions perform the obvious operation on the value contained in
547 * |*aPtr| combined with |aVal| and return the value previously stored in
550 * static void store(Type* aPtr, Type aVal);
552 * This function atomically stores |aVal| into |*aPtr| and must provide a full
553 * memory fence after the store to prevent compiler and hardware instruction
554 * reordering. It should also act as a compiler barrier to prevent reads and
555 * writes from moving to after the store.
557 * static Type exchange(Type* aPtr, Type aVal);
559 * This function atomically stores |aVal| into |*aPtr| and returns the
560 * previous contents of |*aPtr|;
562 * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal);
564 * This function atomically performs the following operation:
566 * if (*aPtr == aOldVal) {
574 template<size_t DataSize
> struct PrimitiveIntrinsics
;
577 struct PrimitiveIntrinsics
<4>
581 static Type
add(Type
* aPtr
, Type aVal
)
583 return _InterlockedExchangeAdd(aPtr
, aVal
);
586 static Type
sub(Type
* aPtr
, Type aVal
)
589 * _InterlockedExchangeSubtract isn't available before Windows 7,
590 * and we must support Windows XP.
592 return _InterlockedExchangeAdd(aPtr
, -aVal
);
595 static Type
or_(Type
* aPtr
, Type aVal
)
597 return _InterlockedOr(aPtr
, aVal
);
600 static Type
xor_(Type
* aPtr
, Type aVal
)
602 return _InterlockedXor(aPtr
, aVal
);
605 static Type
and_(Type
* aPtr
, Type aVal
)
607 return _InterlockedAnd(aPtr
, aVal
);
610 static void store(Type
* aPtr
, Type aVal
)
612 _InterlockedExchange(aPtr
, aVal
);
615 static Type
exchange(Type
* aPtr
, Type aVal
)
617 return _InterlockedExchange(aPtr
, aVal
);
620 static bool compareExchange(Type
* aPtr
, Type aOldVal
, Type aNewVal
)
622 return _InterlockedCompareExchange(aPtr
, aNewVal
, aOldVal
) == aOldVal
;
628 # pragma intrinsic(_InterlockedExchangeAdd64)
629 # pragma intrinsic(_InterlockedOr64)
630 # pragma intrinsic(_InterlockedXor64)
631 # pragma intrinsic(_InterlockedAnd64)
632 # pragma intrinsic(_InterlockedExchange64)
633 # pragma intrinsic(_InterlockedCompareExchange64)
636 struct PrimitiveIntrinsics
<8>
638 typedef __int64 Type
;
640 static Type
add(Type
* aPtr
, Type aVal
)
642 return _InterlockedExchangeAdd64(aPtr
, aVal
);
645 static Type
sub(Type
* aPtr
, Type aVal
)
648 * There is no _InterlockedExchangeSubtract64.
650 return _InterlockedExchangeAdd64(aPtr
, -aVal
);
653 static Type
or_(Type
* aPtr
, Type aVal
)
655 return _InterlockedOr64(aPtr
, aVal
);
658 static Type
xor_(Type
* aPtr
, Type aVal
)
660 return _InterlockedXor64(aPtr
, aVal
);
663 static Type
and_(Type
* aPtr
, Type aVal
)
665 return _InterlockedAnd64(aPtr
, aVal
);
668 static void store(Type
* aPtr
, Type aVal
)
670 _InterlockedExchange64(aPtr
, aVal
);
673 static Type
exchange(Type
* aPtr
, Type aVal
)
675 return _InterlockedExchange64(aPtr
, aVal
);
678 static bool compareExchange(Type
* aPtr
, Type aOldVal
, Type aNewVal
)
680 return _InterlockedCompareExchange64(aPtr
, aNewVal
, aOldVal
) == aOldVal
;
686 # pragma intrinsic(_ReadWriteBarrier)
688 template<MemoryOrdering Order
> struct Barrier
;
691 * We do not provide an afterStore method in Barrier, as Relaxed and
692 * ReleaseAcquire orderings do not require one, and the required barrier
693 * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
697 struct Barrier
<Relaxed
>
699 static void beforeLoad() {}
700 static void afterLoad() {}
701 static void beforeStore() {}
705 struct Barrier
<ReleaseAcquire
>
707 static void beforeLoad() {}
708 static void afterLoad() { _ReadWriteBarrier(); }
709 static void beforeStore() { _ReadWriteBarrier(); }
713 struct Barrier
<SequentiallyConsistent
>
715 static void beforeLoad() { _ReadWriteBarrier(); }
716 static void afterLoad() { _ReadWriteBarrier(); }
717 static void beforeStore() { _ReadWriteBarrier(); }
720 template<typename PrimType
, typename T
>
723 static PrimType
toPrimType(T aVal
) { return static_cast<PrimType
>(aVal
); }
724 static T
fromPrimType(PrimType aVal
) { return static_cast<T
>(aVal
); }
727 template<typename PrimType
, typename T
>
728 struct CastHelper
<PrimType
, T
*>
730 static PrimType
toPrimType(T
* aVal
) { return reinterpret_cast<PrimType
>(aVal
); }
731 static T
* fromPrimType(PrimType aVal
) { return reinterpret_cast<T
*>(aVal
); }
738 typedef PrimitiveIntrinsics
<sizeof(T
)> Primitives
;
739 typedef typename
Primitives::Type PrimType
;
740 static_assert(sizeof(PrimType
) == sizeof(T
),
741 "Selection of PrimitiveIntrinsics was wrong");
742 typedef CastHelper
<PrimType
, T
> Cast
;
745 template<typename T
, MemoryOrdering Order
>
746 struct IntrinsicMemoryOps
: public IntrinsicBase
<T
>
748 typedef typename IntrinsicBase
<T
>::ValueType ValueType
;
749 typedef typename IntrinsicBase
<T
>::Primitives Primitives
;
750 typedef typename IntrinsicBase
<T
>::PrimType PrimType
;
751 typedef typename IntrinsicBase
<T
>::Cast Cast
;
753 static ValueType
load(const ValueType
& aPtr
)
755 Barrier
<Order
>::beforeLoad();
756 ValueType val
= aPtr
;
757 Barrier
<Order
>::afterLoad();
761 static void store(ValueType
& aPtr
, ValueType aVal
)
763 // For SequentiallyConsistent, Primitives::store() will generate the
764 // proper memory fence. Everything else just needs a barrier before
766 if (Order
== SequentiallyConsistent
) {
767 Primitives::store(reinterpret_cast<PrimType
*>(&aPtr
),
768 Cast::toPrimType(aVal
));
770 Barrier
<Order
>::beforeStore();
775 static ValueType
exchange(ValueType
& aPtr
, ValueType aVal
)
778 Primitives::exchange(reinterpret_cast<PrimType
*>(&aPtr
),
779 Cast::toPrimType(aVal
));
780 return Cast::fromPrimType(oldval
);
783 static bool compareExchange(ValueType
& aPtr
, ValueType aOldVal
,
786 return Primitives::compareExchange(reinterpret_cast<PrimType
*>(&aPtr
),
787 Cast::toPrimType(aOldVal
),
788 Cast::toPrimType(aNewVal
));
793 struct IntrinsicApplyHelper
: public IntrinsicBase
<T
>
795 typedef typename IntrinsicBase
<T
>::ValueType ValueType
;
796 typedef typename IntrinsicBase
<T
>::PrimType PrimType
;
797 typedef typename IntrinsicBase
<T
>::Cast Cast
;
798 typedef PrimType (*BinaryOp
)(PrimType
*, PrimType
);
799 typedef PrimType (*UnaryOp
)(PrimType
*);
801 static ValueType
applyBinaryFunction(BinaryOp aOp
, ValueType
& aPtr
,
804 PrimType
* primTypePtr
= reinterpret_cast<PrimType
*>(&aPtr
);
805 PrimType primTypeVal
= Cast::toPrimType(aVal
);
806 return Cast::fromPrimType(aOp(primTypePtr
, primTypeVal
));
809 static ValueType
applyUnaryFunction(UnaryOp aOp
, ValueType
& aPtr
)
811 PrimType
* primTypePtr
= reinterpret_cast<PrimType
*>(&aPtr
);
812 return Cast::fromPrimType(aOp(primTypePtr
));
817 struct IntrinsicAddSub
: public IntrinsicApplyHelper
<T
>
819 typedef typename IntrinsicApplyHelper
<T
>::ValueType ValueType
;
820 typedef typename IntrinsicBase
<T
>::Primitives Primitives
;
822 static ValueType
add(ValueType
& aPtr
, ValueType aVal
)
824 return applyBinaryFunction(&Primitives::add
, aPtr
, aVal
);
827 static ValueType
sub(ValueType
& aPtr
, ValueType aVal
)
829 return applyBinaryFunction(&Primitives::sub
, aPtr
, aVal
);
834 struct IntrinsicAddSub
<T
*> : public IntrinsicApplyHelper
<T
*>
836 typedef typename IntrinsicApplyHelper
<T
*>::ValueType ValueType
;
837 typedef typename IntrinsicBase
<T
*>::Primitives Primitives
;
839 static ValueType
add(ValueType
& aPtr
, ptrdiff_t aAmount
)
841 return applyBinaryFunction(&Primitives::add
, aPtr
,
842 (ValueType
)(aAmount
* sizeof(T
)));
845 static ValueType
sub(ValueType
& aPtr
, ptrdiff_t aAmount
)
847 return applyBinaryFunction(&Primitives::sub
, aPtr
,
848 (ValueType
)(aAmount
* sizeof(T
)));
853 struct IntrinsicIncDec
: public IntrinsicAddSub
<T
>
855 typedef typename IntrinsicAddSub
<T
>::ValueType ValueType
;
856 static ValueType
inc(ValueType
& aPtr
) { return add(aPtr
, 1); }
857 static ValueType
dec(ValueType
& aPtr
) { return sub(aPtr
, 1); }
860 template<typename T
, MemoryOrdering Order
>
861 struct AtomicIntrinsics
: public IntrinsicMemoryOps
<T
, Order
>,
862 public IntrinsicIncDec
<T
>
864 typedef typename IntrinsicIncDec
<T
>::ValueType ValueType
;
865 typedef typename IntrinsicBase
<T
>::Primitives Primitives
;
867 static ValueType
or_(ValueType
& aPtr
, T aVal
)
869 return applyBinaryFunction(&Primitives::or_
, aPtr
, aVal
);
872 static ValueType
xor_(ValueType
& aPtr
, T aVal
)
874 return applyBinaryFunction(&Primitives::xor_
, aPtr
, aVal
);
877 static ValueType
and_(ValueType
& aPtr
, T aVal
)
879 return applyBinaryFunction(&Primitives::and_
, aPtr
, aVal
);
883 template<typename T
, MemoryOrdering Order
>
884 struct AtomicIntrinsics
<T
*, Order
> : public IntrinsicMemoryOps
<T
*, Order
>,
885 public IntrinsicIncDec
<T
*>
887 typedef typename IntrinsicMemoryOps
<T
*, Order
>::ValueType ValueType
;
888 // This is required to make us be able to build with MSVC10, for unknown
890 typedef typename IntrinsicBase
<T
*>::Primitives Primitives
;
893 } // namespace detail
894 } // namespace mozilla
897 # error "Atomic compiler intrinsics are not supported on your platform"
904 template<typename T
, MemoryOrdering Order
>
907 // We only support 32-bit types on 32-bit Windows, which constrains our
908 // implementation elsewhere. But we support pointer-sized types everywhere.
909 static_assert(sizeof(T
) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T
) == 8),
910 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
913 typedef typename
detail::AtomicIntrinsics
<T
, Order
> Intrinsics
;
914 typename
Intrinsics::ValueType mValue
;
917 MOZ_CONSTEXPR
AtomicBase() : mValue() {}
918 explicit MOZ_CONSTEXPR
AtomicBase(T aInit
) : mValue(aInit
) {}
920 // Note: we can't provide operator T() here because Atomic<bool> inherits
921 // from AtomcBase with T=uint32_t and not T=bool. If we implemented
922 // operator T() here, it would cause errors when comparing Atomic<bool> with
927 Intrinsics::store(mValue
, aVal
);
932 * Performs an atomic swap operation. aVal is stored and the previous
933 * value of this variable is returned.
937 return Intrinsics::exchange(mValue
, aVal
);
941 * Performs an atomic compare-and-swap operation and returns true if it
942 * succeeded. This is equivalent to atomically doing
944 * if (mValue == aOldValue) {
945 * mValue = aNewValue;
951 bool compareExchange(T aOldValue
, T aNewValue
)
953 return Intrinsics::compareExchange(mValue
, aOldValue
, aNewValue
);
957 template<MemoryOrdering AnyOrder
>
958 AtomicBase(const AtomicBase
<T
, AnyOrder
>& aCopy
) = delete;
961 template<typename T
, MemoryOrdering Order
>
962 class AtomicBaseIncDec
: public AtomicBase
<T
, Order
>
964 typedef typename
detail::AtomicBase
<T
, Order
> Base
;
967 MOZ_CONSTEXPR
AtomicBaseIncDec() : Base() {}
968 explicit MOZ_CONSTEXPR
AtomicBaseIncDec(T aInit
) : Base(aInit
) {}
970 using Base::operator=;
972 operator T() const { return Base::Intrinsics::load(Base::mValue
); }
973 T
operator++(int) { return Base::Intrinsics::inc(Base::mValue
); }
974 T
operator--(int) { return Base::Intrinsics::dec(Base::mValue
); }
975 T
operator++() { return Base::Intrinsics::inc(Base::mValue
) + 1; }
976 T
operator--() { return Base::Intrinsics::dec(Base::mValue
) - 1; }
979 template<MemoryOrdering AnyOrder
>
980 AtomicBaseIncDec(const AtomicBaseIncDec
<T
, AnyOrder
>& aCopy
) = delete;
983 } // namespace detail
986 * A wrapper for a type that enforces that all memory accesses are atomic.
988 * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
989 * its place. Implementations for integral and pointer types are provided
992 * Atomic accesses are sequentially consistent by default. You should
993 * use the default unless you are tall enough to ride the
994 * memory-ordering roller coaster (if you're not sure, you aren't) and
995 * you have a compelling reason to do otherwise.
997 * There is one exception to the case of atomic memory accesses: providing an
998 * initial value of the atomic value is not guaranteed to be atomic. This is a
999 * deliberate design choice that enables static atomic variables to be declared
1000 * without introducing extra static constructors.
1002 template<typename T
,
1003 MemoryOrdering Order
= SequentiallyConsistent
,
1004 typename Enable
= void>
1008 * Atomic<T> implementation for integral types.
1010 * In addition to atomic store and load operations, compound assignment and
1011 * increment/decrement operators are implemented which perform the
1012 * corresponding read-modify-write operation atomically. Finally, an atomic
1013 * swap method is provided.
1015 template<typename T
, MemoryOrdering Order
>
1016 class Atomic
<T
, Order
, typename EnableIf
<IsIntegral
<T
>::value
&&
1017 !IsSame
<T
, bool>::value
>::Type
>
1018 : public detail::AtomicBaseIncDec
<T
, Order
>
1020 typedef typename
detail::AtomicBaseIncDec
<T
, Order
> Base
;
1023 MOZ_CONSTEXPR
Atomic() : Base() {}
1024 explicit MOZ_CONSTEXPR
Atomic(T aInit
) : Base(aInit
) {}
1026 using Base::operator=;
1028 T
operator+=(T aDelta
)
1030 return Base::Intrinsics::add(Base::mValue
, aDelta
) + aDelta
;
1033 T
operator-=(T aDelta
)
1035 return Base::Intrinsics::sub(Base::mValue
, aDelta
) - aDelta
;
1038 T
operator|=(T aVal
)
1040 return Base::Intrinsics::or_(Base::mValue
, aVal
) | aVal
;
1043 T
operator^=(T aVal
)
1045 return Base::Intrinsics::xor_(Base::mValue
, aVal
) ^ aVal
;
1048 T
operator&=(T aVal
)
1050 return Base::Intrinsics::and_(Base::mValue
, aVal
) & aVal
;
1054 Atomic(Atomic
<T
, Order
>& aOther
) = delete;
1058 * Atomic<T> implementation for pointer types.
1060 * An atomic compare-and-swap primitive for pointer variables is provided, as
1061 * are atomic increment and decement operators. Also provided are the compound
1062 * assignment operators for addition and subtraction. Atomic swap (via
1063 * exchange()) is included as well.
1065 template<typename T
, MemoryOrdering Order
>
1066 class Atomic
<T
*, Order
> : public detail::AtomicBaseIncDec
<T
*, Order
>
1068 typedef typename
detail::AtomicBaseIncDec
<T
*, Order
> Base
;
1071 MOZ_CONSTEXPR
Atomic() : Base() {}
1072 explicit MOZ_CONSTEXPR
Atomic(T
* aInit
) : Base(aInit
) {}
1074 using Base::operator=;
1076 T
* operator+=(ptrdiff_t aDelta
)
1078 return Base::Intrinsics::add(Base::mValue
, aDelta
) + aDelta
;
1081 T
* operator-=(ptrdiff_t aDelta
)
1083 return Base::Intrinsics::sub(Base::mValue
, aDelta
) - aDelta
;
1087 Atomic(Atomic
<T
*, Order
>& aOther
) = delete;
1091 * Atomic<T> implementation for enum types.
1093 * The atomic store and load operations and the atomic swap method is provided.
1095 template<typename T
, MemoryOrdering Order
>
1096 class Atomic
<T
, Order
, typename EnableIf
<IsEnum
<T
>::value
>::Type
>
1097 : public detail::AtomicBase
<T
, Order
>
1099 typedef typename
detail::AtomicBase
<T
, Order
> Base
;
1102 MOZ_CONSTEXPR
Atomic() : Base() {}
1103 explicit MOZ_CONSTEXPR
Atomic(T aInit
) : Base(aInit
) {}
1105 operator T() const { return Base::Intrinsics::load(Base::mValue
); }
1107 using Base::operator=;
1110 Atomic(Atomic
<T
, Order
>& aOther
) = delete;
1114 * Atomic<T> implementation for boolean types.
1116 * The atomic store and load operations and the atomic swap method is provided.
1120 * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1121 * bool and/or some implementations of std::atomic. This is allowed in
1122 * [atomic.types.generic]p9.
1124 * - It's not obvious whether the 8-bit atomic functions on Windows are always
1125 * inlined or not. If they are not inlined, the corresponding functions in the
1126 * runtime library are not available on Windows XP. This is why we implement
1127 * Atomic<bool> with an underlying type of uint32_t.
1129 template<MemoryOrdering Order
>
1130 class Atomic
<bool, Order
>
1131 : protected detail::AtomicBase
<uint32_t, Order
>
1133 typedef typename
detail::AtomicBase
<uint32_t, Order
> Base
;
1136 MOZ_CONSTEXPR
Atomic() : Base() {}
1137 explicit MOZ_CONSTEXPR
Atomic(bool aInit
) : Base(aInit
) {}
1139 // We provide boolean wrappers for the underlying AtomicBase methods.
1140 operator bool() const
1142 return Base::Intrinsics::load(Base::mValue
);
1145 bool operator=(bool aVal
)
1147 return Base::operator=(aVal
);
1150 bool exchange(bool aVal
)
1152 return Base::exchange(aVal
);
1155 bool compareExchange(bool aOldValue
, bool aNewValue
)
1157 return Base::compareExchange(aOldValue
, aNewValue
);
1161 Atomic(Atomic
<bool, Order
>& aOther
) = delete;
1164 } // namespace mozilla
1166 #endif /* mozilla_Atomics_h */