1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * Implements (almost always) lock-free atomic operations. The operations here
9 * are a subset of that which can be found in C++11's <atomic> header, with a
10 * different API to enforce consistent memory ordering constraints.
12 * Anyone caught using |volatile| for inter-thread memory safety needs to be
13 * sent a copy of this header and the C++11 standard.
16 #ifndef mozilla_Atomics_h
17 #define mozilla_Atomics_h
19 #include "mozilla/Assertions.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Compiler.h"
22 #include "mozilla/TypeTraits.h"
27 * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
28 * does not have <atomic>. So be sure to check for <atomic> support
29 * along with C++0x support.
31 #if defined(__clang__) || defined(__GNUC__)
33 * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
34 * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
35 * definitions for unspecialized std::atomic and causes linking errors.
36 * Therefore, we require at least 4.7.0 for using libstdc++.
38 # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
39 # define MOZ_HAVE_CXX11_ATOMICS
40 # elif MOZ_USING_LIBCXX
41 # define MOZ_HAVE_CXX11_ATOMICS
43 #elif defined(_MSC_VER) && _MSC_VER >= 1700
44 # define MOZ_HAVE_CXX11_ATOMICS
50 * An enum of memory ordering possibilities for atomics.
52 * Memory ordering is the observable state of distinct values in memory.
53 * (It's a separate concept from atomicity, which concerns whether an
54 * operation can ever be observed in an intermediate state. Don't
55 * conflate the two!) Given a sequence of operations in source code on
56 * memory, it is *not* always the case that, at all times and on all
57 * cores, those operations will appear to have occurred in that exact
58 * sequence. First, the compiler might reorder that sequence, if it
59 * thinks another ordering will be more efficient. Second, the CPU may
60 * not expose so consistent a view of memory. CPUs will often perform
61 * their own instruction reordering, above and beyond that performed by
62 * the compiler. And each core has its own memory caches, and accesses
63 * (reads and writes both) to "memory" may only resolve to out-of-date
64 * cache entries -- not to the "most recently" performed operation in
65 * some global sense. Any access to a value that may be used by
66 * multiple threads, potentially across multiple cores, must therefore
67 * have a memory ordering imposed on it, for all code on all
68 * threads/cores to have a sufficiently coherent worldview.
70 * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
71 * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
72 * detail on all this, including examples of how each mode works.
74 * Note that for simplicity and practicality, not all of the modes in
75 * C++11 are supported. The missing C++11 modes are either subsumed by
76 * the modes we provide below, or not relevant for the CPUs we support
77 * in Gecko. These three modes are confusing enough as it is!
81 * Relaxed ordering is the simplest memory ordering: none at all.
82 * When the result of a write is observed, nothing may be inferred
83 * about other memory. Writes ostensibly performed "before" on the
84 * writing thread may not yet be visible. Writes performed "after" on
85 * the writing thread may already be visible, if the compiler or CPU
86 * reordered them. (The latter can happen if reads and/or writes get
87 * held up in per-processor caches.) Relaxed ordering means
88 * operations can always use cached values (as long as the actual
89 * updates to atomic values actually occur, correctly, eventually), so
90 * it's usually the fastest sort of atomic access. For this reason,
91 * *it's also the most dangerous kind of access*.
93 * Relaxed ordering is good for things like process-wide statistics
94 * counters that don't need to be consistent with anything else, so
95 * long as updates themselves are atomic. (And so long as any
96 * observations of that value can tolerate being out-of-date -- if you
97 * need some sort of up-to-date value, you need some sort of other
98 * synchronizing operation.) It's *not* good for locks, mutexes,
99 * reference counts, etc. that mediate access to other memory, or must
100 * be observably consistent with other memory.
102 * x86 architectures don't take advantage of the optimization
103 * opportunities that relaxed ordering permits. Thus it's possible
104 * that using relaxed ordering will "work" on x86 but fail elsewhere
105 * (ARM, say, which *does* implement non-sequentially-consistent
106 * relaxed ordering semantics). Be extra-careful using relaxed
107 * ordering if you can't easily test non-x86 architectures!
111 * When an atomic value is updated with ReleaseAcquire ordering, and
112 * that new value is observed with ReleaseAcquire ordering, prior
113 * writes (atomic or not) are also observable. What ReleaseAcquire
114 * *doesn't* give you is any observable ordering guarantees for
115 * ReleaseAcquire-ordered operations on different objects. For
116 * example, if there are two cores that each perform ReleaseAcquire
117 * operations on separate objects, each core may or may not observe
118 * the operations made by the other core. The only way the cores can
119 * be synchronized with ReleaseAcquire is if they both
120 * ReleaseAcquire-access the same object. This implies that you can't
121 * necessarily describe some global total ordering of ReleaseAcquire
124 * ReleaseAcquire ordering is good for (as the name implies) atomic
125 * operations on values controlling ownership of things: reference
126 * counts, mutexes, and the like. However, if you are thinking about
127 * using these to implement your own locks or mutexes, you should take
128 * a good, hard look at actual lock or mutex primitives first.
132 * When an atomic value is updated with SequentiallyConsistent
133 * ordering, all writes observable when the update is observed, just
134 * as with ReleaseAcquire ordering. But, furthermore, a global total
135 * ordering of SequentiallyConsistent operations *can* be described.
136 * For example, if two cores perform SequentiallyConsistent operations
137 * on separate objects, one core will observably perform its update
138 * (and all previous operations will have completed), then the other
139 * core will observably perform its update (and all previous
140 * operations will have completed). (Although those previous
141 * operations aren't themselves ordered -- they could be intermixed,
142 * or ordered if they occur on atomic values with ordering
143 * requirements.) SequentiallyConsistent is the *simplest and safest*
144 * ordering of atomic operations -- it's always as if one operation
145 * happens, then another, then another, in some order -- and every
146 * core observes updates to happen in that single order. Because it
147 * has the most synchronization requirements, operations ordered this
148 * way also tend to be slowest.
150 * SequentiallyConsistent ordering can be desirable when multiple
151 * threads observe objects, and they all have to agree on the
152 * observable order of changes to them. People expect
153 * SequentiallyConsistent ordering, even if they shouldn't, when
154 * writing code, atomic or otherwise. SequentiallyConsistent is also
155 * the ordering of choice when designing lockless data structures. If
156 * you don't know what order to use, use this one.
158 SequentiallyConsistent
,
161 } // namespace mozilla
163 // Build up the underlying intrinsics.
164 #ifdef MOZ_HAVE_CXX11_ATOMICS
172 * We provide CompareExchangeFailureOrder to work around a bug in some
173 * versions of GCC's <atomic> header. See bug 898491.
175 template<MemoryOrdering Order
> struct AtomicOrderConstraints
;
178 struct AtomicOrderConstraints
<Relaxed
>
180 static const std::memory_order AtomicRMWOrder
= std::memory_order_relaxed
;
181 static const std::memory_order LoadOrder
= std::memory_order_relaxed
;
182 static const std::memory_order StoreOrder
= std::memory_order_relaxed
;
183 static const std::memory_order CompareExchangeFailureOrder
=
184 std::memory_order_relaxed
;
188 struct AtomicOrderConstraints
<ReleaseAcquire
>
190 static const std::memory_order AtomicRMWOrder
= std::memory_order_acq_rel
;
191 static const std::memory_order LoadOrder
= std::memory_order_acquire
;
192 static const std::memory_order StoreOrder
= std::memory_order_release
;
193 static const std::memory_order CompareExchangeFailureOrder
=
194 std::memory_order_acquire
;
198 struct AtomicOrderConstraints
<SequentiallyConsistent
>
200 static const std::memory_order AtomicRMWOrder
= std::memory_order_seq_cst
;
201 static const std::memory_order LoadOrder
= std::memory_order_seq_cst
;
202 static const std::memory_order StoreOrder
= std::memory_order_seq_cst
;
203 static const std::memory_order CompareExchangeFailureOrder
=
204 std::memory_order_seq_cst
;
207 template<typename T
, MemoryOrdering Order
>
210 typedef std::atomic
<T
> ValueType
;
211 typedef AtomicOrderConstraints
<Order
> OrderedOp
;
214 template<typename T
, MemoryOrdering Order
>
215 struct IntrinsicMemoryOps
: public IntrinsicBase
<T
, Order
>
217 typedef IntrinsicBase
<T
, Order
> Base
;
218 static T
load(const typename
Base::ValueType
& ptr
) {
219 return ptr
.load(Base::OrderedOp::LoadOrder
);
221 static void store(typename
Base::ValueType
& ptr
, T val
) {
222 ptr
.store(val
, Base::OrderedOp::StoreOrder
);
224 static T
exchange(typename
Base::ValueType
& ptr
, T val
) {
225 return ptr
.exchange(val
, Base::OrderedOp::AtomicRMWOrder
);
227 static bool compareExchange(typename
Base::ValueType
& ptr
, T oldVal
, T newVal
) {
228 return ptr
.compare_exchange_strong(oldVal
, newVal
,
229 Base::OrderedOp::AtomicRMWOrder
,
230 Base::OrderedOp::CompareExchangeFailureOrder
);
234 template<typename T
, MemoryOrdering Order
>
235 struct IntrinsicAddSub
: public IntrinsicBase
<T
, Order
>
237 typedef IntrinsicBase
<T
, Order
> Base
;
238 static T
add(typename
Base::ValueType
& ptr
, T val
) {
239 return ptr
.fetch_add(val
, Base::OrderedOp::AtomicRMWOrder
);
241 static T
sub(typename
Base::ValueType
& ptr
, T val
) {
242 return ptr
.fetch_sub(val
, Base::OrderedOp::AtomicRMWOrder
);
246 template<typename T
, MemoryOrdering Order
>
247 struct IntrinsicAddSub
<T
*, Order
> : public IntrinsicBase
<T
*, Order
>
249 typedef IntrinsicBase
<T
*, Order
> Base
;
250 static T
* add(typename
Base::ValueType
& ptr
, ptrdiff_t val
) {
251 return ptr
.fetch_add(fixupAddend(val
), Base::OrderedOp::AtomicRMWOrder
);
253 static T
* sub(typename
Base::ValueType
& ptr
, ptrdiff_t val
) {
254 return ptr
.fetch_sub(fixupAddend(val
), Base::OrderedOp::AtomicRMWOrder
);
258 * GCC 4.6's <atomic> header has a bug where adding X to an
259 * atomic<T*> is not the same as adding X to a T*. Hence the need
260 * for this function to provide the correct addend.
262 static ptrdiff_t fixupAddend(ptrdiff_t val
) {
263 #if defined(__clang__) || defined(_MSC_VER)
265 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
266 !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
267 return val
* sizeof(T
);
274 template<typename T
, MemoryOrdering Order
>
275 struct IntrinsicIncDec
: public IntrinsicAddSub
<T
, Order
>
277 typedef IntrinsicBase
<T
, Order
> Base
;
278 static T
inc(typename
Base::ValueType
& ptr
) {
279 return IntrinsicAddSub
<T
, Order
>::add(ptr
, 1);
281 static T
dec(typename
Base::ValueType
& ptr
) {
282 return IntrinsicAddSub
<T
, Order
>::sub(ptr
, 1);
286 template<typename T
, MemoryOrdering Order
>
287 struct AtomicIntrinsics
: public IntrinsicMemoryOps
<T
, Order
>,
288 public IntrinsicIncDec
<T
, Order
>
290 typedef IntrinsicBase
<T
, Order
> Base
;
291 static T
or_(typename
Base::ValueType
& ptr
, T val
) {
292 return ptr
.fetch_or(val
, Base::OrderedOp::AtomicRMWOrder
);
294 static T
xor_(typename
Base::ValueType
& ptr
, T val
) {
295 return ptr
.fetch_xor(val
, Base::OrderedOp::AtomicRMWOrder
);
297 static T
and_(typename
Base::ValueType
& ptr
, T val
) {
298 return ptr
.fetch_and(val
, Base::OrderedOp::AtomicRMWOrder
);
302 template<typename T
, MemoryOrdering Order
>
303 struct AtomicIntrinsics
<T
*, Order
>
304 : public IntrinsicMemoryOps
<T
*, Order
>, public IntrinsicIncDec
<T
*, Order
>
308 } // namespace detail
309 } // namespace mozilla
311 #elif defined(__GNUC__)
317 * The __sync_* family of intrinsics is documented here:
319 * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
321 * While these intrinsics are deprecated in favor of the newer __atomic_*
322 * family of intrincs:
324 * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
326 * any GCC version that supports the __atomic_* intrinsics will also support
327 * the <atomic> header and so will be handled above. We provide a version of
328 * atomics using the __sync_* intrinsics to support older versions of GCC.
330 * All __sync_* intrinsics that we use below act as full memory barriers, for
331 * both compiler and hardware reordering, except for __sync_lock_test_and_set,
332 * which is a only an acquire barrier. When we call __sync_lock_test_and_set,
333 * we add a barrier above it as appropriate.
336 template<MemoryOrdering Order
> struct Barrier
;
339 * Some processors (in particular, x86) don't require quite so many calls to
340 * __sync_sychronize as our specializations of Barrier produce. If
341 * performance turns out to be an issue, defining these specializations
342 * on a per-processor basis would be a good first tuning step.
346 struct Barrier
<Relaxed
>
348 static void beforeLoad() {}
349 static void afterLoad() {}
350 static void beforeStore() {}
351 static void afterStore() {}
355 struct Barrier
<ReleaseAcquire
>
357 static void beforeLoad() {}
358 static void afterLoad() { __sync_synchronize(); }
359 static void beforeStore() { __sync_synchronize(); }
360 static void afterStore() {}
364 struct Barrier
<SequentiallyConsistent
>
366 static void beforeLoad() { __sync_synchronize(); }
367 static void afterLoad() { __sync_synchronize(); }
368 static void beforeStore() { __sync_synchronize(); }
369 static void afterStore() { __sync_synchronize(); }
372 template<typename T
, MemoryOrdering Order
>
373 struct IntrinsicMemoryOps
375 static T
load(const T
& ptr
) {
376 Barrier
<Order
>::beforeLoad();
378 Barrier
<Order
>::afterLoad();
381 static void store(T
& ptr
, T val
) {
382 Barrier
<Order
>::beforeStore();
384 Barrier
<Order
>::afterStore();
386 static T
exchange(T
& ptr
, T val
) {
387 // __sync_lock_test_and_set is only an acquire barrier; loads and stores
388 // can't be moved up from after to before it, but they can be moved down
389 // from before to after it. We may want a stricter ordering, so we need
390 // an explicit barrier.
392 Barrier
<Order
>::beforeStore();
393 return __sync_lock_test_and_set(&ptr
, val
);
395 static bool compareExchange(T
& ptr
, T oldVal
, T newVal
) {
396 return __sync_bool_compare_and_swap(&ptr
, oldVal
, newVal
);
401 struct IntrinsicAddSub
404 static T
add(T
& ptr
, T val
) {
405 return __sync_fetch_and_add(&ptr
, val
);
407 static T
sub(T
& ptr
, T val
) {
408 return __sync_fetch_and_sub(&ptr
, val
);
413 struct IntrinsicAddSub
<T
*>
415 typedef T
* ValueType
;
417 * The reinterpret_casts are needed so that
418 * __sync_fetch_and_{add,sub} will properly type-check.
420 * Also, these functions do not provide standard semantics for
421 * pointer types, so we need to adjust the addend.
423 static ValueType
add(ValueType
& ptr
, ptrdiff_t val
) {
424 ValueType amount
= reinterpret_cast<ValueType
>(val
* sizeof(T
));
425 return __sync_fetch_and_add(&ptr
, amount
);
427 static ValueType
sub(ValueType
& ptr
, ptrdiff_t val
) {
428 ValueType amount
= reinterpret_cast<ValueType
>(val
* sizeof(T
));
429 return __sync_fetch_and_sub(&ptr
, amount
);
434 struct IntrinsicIncDec
: public IntrinsicAddSub
<T
>
436 static T
inc(T
& ptr
) { return IntrinsicAddSub
<T
>::add(ptr
, 1); }
437 static T
dec(T
& ptr
) { return IntrinsicAddSub
<T
>::sub(ptr
, 1); }
440 template<typename T
, MemoryOrdering Order
>
441 struct AtomicIntrinsics
: public IntrinsicMemoryOps
<T
, Order
>,
442 public IntrinsicIncDec
<T
>
444 static T
or_(T
& ptr
, T val
) {
445 return __sync_fetch_and_or(&ptr
, val
);
447 static T
xor_(T
& ptr
, T val
) {
448 return __sync_fetch_and_xor(&ptr
, val
);
450 static T
and_(T
& ptr
, T val
) {
451 return __sync_fetch_and_and(&ptr
, val
);
455 template<typename T
, MemoryOrdering Order
>
456 struct AtomicIntrinsics
<T
*, Order
> : public IntrinsicMemoryOps
<T
*, Order
>,
457 public IntrinsicIncDec
<T
*>
461 } // namespace detail
462 } // namespace mozilla
464 #elif defined(_MSC_VER)
467 * Windows comes with a full complement of atomic operations.
468 * Unfortunately, most of those aren't available for Windows XP (even if
469 * the compiler supports intrinsics for them), which is the oldest
470 * version of Windows we support. Therefore, we only provide operations
471 * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
472 * versions, we support 64-bit datatypes as well.
474 * To avoid namespace pollution issues, we declare whatever functions we
479 long __cdecl
_InterlockedExchangeAdd(long volatile* dst
, long value
);
480 long __cdecl
_InterlockedOr(long volatile* dst
, long value
);
481 long __cdecl
_InterlockedXor(long volatile* dst
, long value
);
482 long __cdecl
_InterlockedAnd(long volatile* dst
, long value
);
483 long __cdecl
_InterlockedExchange(long volatile *dst
, long value
);
484 long __cdecl
_InterlockedCompareExchange(long volatile *dst
, long newVal
, long oldVal
);
487 # pragma intrinsic(_InterlockedExchangeAdd)
488 # pragma intrinsic(_InterlockedOr)
489 # pragma intrinsic(_InterlockedXor)
490 # pragma intrinsic(_InterlockedAnd)
491 # pragma intrinsic(_InterlockedExchange)
492 # pragma intrinsic(_InterlockedCompareExchange)
497 # if !defined(_M_IX86) && !defined(_M_X64)
499 * The implementations below are optimized for x86ish systems. You
500 * will have to modify them if you are porting to Windows on a
501 * different architecture.
503 # error "Unknown CPU type"
507 * The PrimitiveIntrinsics template should define |Type|, the datatype of size
508 * DataSize upon which we operate, and the following eight functions.
510 * static Type add(Type* ptr, Type val);
511 * static Type sub(Type* ptr, Type val);
512 * static Type or_(Type* ptr, Type val);
513 * static Type xor_(Type* ptr, Type val);
514 * static Type and_(Type* ptr, Type val);
516 * These functions perform the obvious operation on the value contained in
517 * |*ptr| combined with |val| and return the value previously stored in
520 * static void store(Type* ptr, Type val);
522 * This function atomically stores |val| into |*ptr| and must provide a full
523 * memory fence after the store to prevent compiler and hardware instruction
524 * reordering. It should also act as a compiler barrier to prevent reads and
525 * writes from moving to after the store.
527 * static Type exchange(Type* ptr, Type val);
529 * This function atomically stores |val| into |*ptr| and returns the previous
532 * static bool compareExchange(Type* ptr, Type oldVal, Type newVal);
534 * This function atomically performs the following operation:
536 * if (*ptr == oldVal) {
544 template<size_t DataSize
> struct PrimitiveIntrinsics
;
547 struct PrimitiveIntrinsics
<4>
551 static Type
add(Type
* ptr
, Type val
) {
552 return _InterlockedExchangeAdd(ptr
, val
);
554 static Type
sub(Type
* ptr
, Type val
) {
556 * _InterlockedExchangeSubtract isn't available before Windows 7,
557 * and we must support Windows XP.
559 return _InterlockedExchangeAdd(ptr
, -val
);
561 static Type
or_(Type
* ptr
, Type val
) {
562 return _InterlockedOr(ptr
, val
);
564 static Type
xor_(Type
* ptr
, Type val
) {
565 return _InterlockedXor(ptr
, val
);
567 static Type
and_(Type
* ptr
, Type val
) {
568 return _InterlockedAnd(ptr
, val
);
570 static void store(Type
* ptr
, Type val
) {
571 _InterlockedExchange(ptr
, val
);
573 static Type
exchange(Type
* ptr
, Type val
) {
574 return _InterlockedExchange(ptr
, val
);
576 static bool compareExchange(Type
* ptr
, Type oldVal
, Type newVal
) {
577 return _InterlockedCompareExchange(ptr
, newVal
, oldVal
) == oldVal
;
584 long long __cdecl
_InterlockedExchangeAdd64(long long volatile* dst
,
586 long long __cdecl
_InterlockedOr64(long long volatile* dst
,
588 long long __cdecl
_InterlockedXor64(long long volatile* dst
,
590 long long __cdecl
_InterlockedAnd64(long long volatile* dst
,
592 long long __cdecl
_InterlockedExchange64(long long volatile* dst
,
594 long long __cdecl
_InterlockedCompareExchange64(long long volatile* dst
,
599 # pragma intrinsic(_InterlockedExchangeAdd64)
600 # pragma intrinsic(_InterlockedOr64)
601 # pragma intrinsic(_InterlockedXor64)
602 # pragma intrinsic(_InterlockedAnd64)
603 # pragma intrinsic(_InterlockedExchange64)
604 # pragma intrinsic(_InterlockedCompareExchange64)
607 struct PrimitiveIntrinsics
<8>
609 typedef __int64 Type
;
611 static Type
add(Type
* ptr
, Type val
) {
612 return _InterlockedExchangeAdd64(ptr
, val
);
614 static Type
sub(Type
* ptr
, Type val
) {
616 * There is no _InterlockedExchangeSubtract64.
618 return _InterlockedExchangeAdd64(ptr
, -val
);
620 static Type
or_(Type
* ptr
, Type val
) {
621 return _InterlockedOr64(ptr
, val
);
623 static Type
xor_(Type
* ptr
, Type val
) {
624 return _InterlockedXor64(ptr
, val
);
626 static Type
and_(Type
* ptr
, Type val
) {
627 return _InterlockedAnd64(ptr
, val
);
629 static void store(Type
* ptr
, Type val
) {
630 _InterlockedExchange64(ptr
, val
);
632 static Type
exchange(Type
* ptr
, Type val
) {
633 return _InterlockedExchange64(ptr
, val
);
635 static bool compareExchange(Type
* ptr
, Type oldVal
, Type newVal
) {
636 return _InterlockedCompareExchange64(ptr
, newVal
, oldVal
) == oldVal
;
642 extern "C" { void _ReadWriteBarrier(); }
644 # pragma intrinsic(_ReadWriteBarrier)
646 template<MemoryOrdering Order
> struct Barrier
;
649 * We do not provide an afterStore method in Barrier, as Relaxed and
650 * ReleaseAcquire orderings do not require one, and the required barrier
651 * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
655 struct Barrier
<Relaxed
>
657 static void beforeLoad() {}
658 static void afterLoad() {}
659 static void beforeStore() {}
663 struct Barrier
<ReleaseAcquire
>
665 static void beforeLoad() {}
666 static void afterLoad() { _ReadWriteBarrier(); }
667 static void beforeStore() { _ReadWriteBarrier(); }
671 struct Barrier
<SequentiallyConsistent
>
673 static void beforeLoad() { _ReadWriteBarrier(); }
674 static void afterLoad() { _ReadWriteBarrier(); }
675 static void beforeStore() { _ReadWriteBarrier(); }
678 template<typename PrimType
, typename T
>
681 static PrimType
toPrimType(T val
) { return static_cast<PrimType
>(val
); }
682 static T
fromPrimType(PrimType val
) { return static_cast<T
>(val
); }
685 template<typename PrimType
, typename T
>
686 struct CastHelper
<PrimType
, T
*>
688 static PrimType
toPrimType(T
* val
) { return reinterpret_cast<PrimType
>(val
); }
689 static T
* fromPrimType(PrimType val
) { return reinterpret_cast<T
*>(val
); }
696 typedef PrimitiveIntrinsics
<sizeof(T
)> Primitives
;
697 typedef typename
Primitives::Type PrimType
;
698 static_assert(sizeof(PrimType
) == sizeof(T
),
699 "Selection of PrimitiveIntrinsics was wrong");
700 typedef CastHelper
<PrimType
, T
> Cast
;
703 template<typename T
, MemoryOrdering Order
>
704 struct IntrinsicMemoryOps
: public IntrinsicBase
<T
>
706 typedef typename IntrinsicBase
<T
>::ValueType ValueType
;
707 typedef typename IntrinsicBase
<T
>::Primitives Primitives
;
708 typedef typename IntrinsicBase
<T
>::PrimType PrimType
;
709 typedef typename IntrinsicBase
<T
>::Cast Cast
;
710 static ValueType
load(const ValueType
& ptr
) {
711 Barrier
<Order
>::beforeLoad();
713 Barrier
<Order
>::afterLoad();
716 static void store(ValueType
& ptr
, ValueType val
) {
717 // For SequentiallyConsistent, Primitives::store() will generate the
718 // proper memory fence. Everything else just needs a barrier before
720 if (Order
== SequentiallyConsistent
) {
721 Primitives::store(reinterpret_cast<PrimType
*>(&ptr
),
722 Cast::toPrimType(val
));
724 Barrier
<Order
>::beforeStore();
728 static ValueType
exchange(ValueType
& ptr
, ValueType val
) {
730 Primitives::exchange(reinterpret_cast<PrimType
*>(&ptr
),
731 Cast::toPrimType(val
));
732 return Cast::fromPrimType(oldval
);
734 static bool compareExchange(ValueType
& ptr
, ValueType oldVal
, ValueType newVal
) {
735 return Primitives::compareExchange(reinterpret_cast<PrimType
*>(&ptr
),
736 Cast::toPrimType(oldVal
),
737 Cast::toPrimType(newVal
));
742 struct IntrinsicApplyHelper
: public IntrinsicBase
<T
>
744 typedef typename IntrinsicBase
<T
>::ValueType ValueType
;
745 typedef typename IntrinsicBase
<T
>::PrimType PrimType
;
746 typedef typename IntrinsicBase
<T
>::Cast Cast
;
747 typedef PrimType (*BinaryOp
)(PrimType
*, PrimType
);
748 typedef PrimType (*UnaryOp
)(PrimType
*);
750 static ValueType
applyBinaryFunction(BinaryOp op
, ValueType
& ptr
,
752 PrimType
* primTypePtr
= reinterpret_cast<PrimType
*>(&ptr
);
753 PrimType primTypeVal
= Cast::toPrimType(val
);
754 return Cast::fromPrimType(op(primTypePtr
, primTypeVal
));
757 static ValueType
applyUnaryFunction(UnaryOp op
, ValueType
& ptr
) {
758 PrimType
* primTypePtr
= reinterpret_cast<PrimType
*>(&ptr
);
759 return Cast::fromPrimType(op(primTypePtr
));
764 struct IntrinsicAddSub
: public IntrinsicApplyHelper
<T
>
766 typedef typename IntrinsicApplyHelper
<T
>::ValueType ValueType
;
767 typedef typename IntrinsicBase
<T
>::Primitives Primitives
;
768 static ValueType
add(ValueType
& ptr
, ValueType val
) {
769 return applyBinaryFunction(&Primitives::add
, ptr
, val
);
771 static ValueType
sub(ValueType
& ptr
, ValueType val
) {
772 return applyBinaryFunction(&Primitives::sub
, ptr
, val
);
777 struct IntrinsicAddSub
<T
*> : public IntrinsicApplyHelper
<T
*>
779 typedef typename IntrinsicApplyHelper
<T
*>::ValueType ValueType
;
780 static ValueType
add(ValueType
& ptr
, ptrdiff_t amount
) {
781 return applyBinaryFunction(&Primitives::add
, ptr
,
782 (ValueType
)(amount
* sizeof(ValueType
)));
784 static ValueType
sub(ValueType
& ptr
, ptrdiff_t amount
) {
785 return applyBinaryFunction(&Primitives::sub
, ptr
,
786 (ValueType
)(amount
* sizeof(ValueType
)));
791 struct IntrinsicIncDec
: public IntrinsicAddSub
<T
>
793 typedef typename IntrinsicAddSub
<T
>::ValueType ValueType
;
794 static ValueType
inc(ValueType
& ptr
) { return add(ptr
, 1); }
795 static ValueType
dec(ValueType
& ptr
) { return sub(ptr
, 1); }
798 template<typename T
, MemoryOrdering Order
>
799 struct AtomicIntrinsics
: public IntrinsicMemoryOps
<T
, Order
>,
800 public IntrinsicIncDec
<T
>
802 typedef typename IntrinsicIncDec
<T
>::ValueType ValueType
;
803 static ValueType
or_(ValueType
& ptr
, T val
) {
804 return applyBinaryFunction(&Primitives::or_
, ptr
, val
);
806 static ValueType
xor_(ValueType
& ptr
, T val
) {
807 return applyBinaryFunction(&Primitives::xor_
, ptr
, val
);
809 static ValueType
and_(ValueType
& ptr
, T val
) {
810 return applyBinaryFunction(&Primitives::and_
, ptr
, val
);
814 template<typename T
, MemoryOrdering Order
>
815 struct AtomicIntrinsics
<T
*, Order
> : public IntrinsicMemoryOps
<T
*, Order
>,
816 public IntrinsicIncDec
<T
*>
818 typedef typename IntrinsicMemoryOps
<T
*, Order
>::ValueType ValueType
;
821 } // namespace detail
822 } // namespace mozilla
825 # error "Atomic compiler intrinsics are not supported on your platform"
832 template<typename T
, MemoryOrdering Order
>
835 // We only support 32-bit types on 32-bit Windows, which constrains our
836 // implementation elsewhere. But we support pointer-sized types everywhere.
837 static_assert(sizeof(T
) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T
) == 8),
838 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
841 typedef typename
detail::AtomicIntrinsics
<T
, Order
> Intrinsics
;
842 typename
Intrinsics::ValueType mValue
;
845 MOZ_CONSTEXPR
AtomicBase() : mValue() {}
846 MOZ_CONSTEXPR
AtomicBase(T aInit
) : mValue(aInit
) {}
848 operator T() const { return Intrinsics::load(mValue
); }
850 T
operator=(T aValue
) {
851 Intrinsics::store(mValue
, aValue
);
856 * Performs an atomic swap operation. aValue is stored and the previous
857 * value of this variable is returned.
859 T
exchange(T aValue
) {
860 return Intrinsics::exchange(mValue
, aValue
);
864 * Performs an atomic compare-and-swap operation and returns true if it
865 * succeeded. This is equivalent to atomically doing
867 * if (mValue == aOldValue) {
868 * mValue = aNewValue;
874 bool compareExchange(T aOldValue
, T aNewValue
) {
875 return Intrinsics::compareExchange(mValue
, aOldValue
, aNewValue
);
879 template<MemoryOrdering AnyOrder
>
880 AtomicBase(const AtomicBase
<T
, AnyOrder
>& aCopy
) MOZ_DELETE
;
883 template<typename T
, MemoryOrdering Order
>
884 class AtomicBaseIncDec
: public AtomicBase
<T
, Order
>
886 typedef typename
detail::AtomicBase
<T
, Order
> Base
;
889 MOZ_CONSTEXPR
AtomicBaseIncDec() : Base() {}
890 MOZ_CONSTEXPR
AtomicBaseIncDec(T aInit
) : Base(aInit
) {}
892 using Base::operator=;
894 T
operator++(int) { return Base::Intrinsics::inc(Base::mValue
); }
895 T
operator--(int) { return Base::Intrinsics::dec(Base::mValue
); }
896 T
operator++() { return Base::Intrinsics::inc(Base::mValue
) + 1; }
897 T
operator--() { return Base::Intrinsics::dec(Base::mValue
) - 1; }
900 template<MemoryOrdering AnyOrder
>
901 AtomicBaseIncDec(const AtomicBaseIncDec
<T
, AnyOrder
>& aCopy
) MOZ_DELETE
;
904 } // namespace detail
907 * A wrapper for a type that enforces that all memory accesses are atomic.
909 * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
910 * its place. Implementations for integral and pointer types are provided
913 * Atomic accesses are sequentially consistent by default. You should
914 * use the default unless you are tall enough to ride the
915 * memory-ordering roller coaster (if you're not sure, you aren't) and
916 * you have a compelling reason to do otherwise.
918 * There is one exception to the case of atomic memory accesses: providing an
919 * initial value of the atomic value is not guaranteed to be atomic. This is a
920 * deliberate design choice that enables static atomic variables to be declared
921 * without introducing extra static constructors.
924 MemoryOrdering Order
= SequentiallyConsistent
,
925 typename Enable
= void>
929 * Atomic<T> implementation for integral types.
931 * In addition to atomic store and load operations, compound assignment and
932 * increment/decrement operators are implemented which perform the
933 * corresponding read-modify-write operation atomically. Finally, an atomic
934 * swap method is provided.
936 template<typename T
, MemoryOrdering Order
>
937 class Atomic
<T
, Order
, typename EnableIf
<IsIntegral
<T
>::value
>::Type
>
938 : public detail::AtomicBaseIncDec
<T
, Order
>
940 typedef typename
detail::AtomicBaseIncDec
<T
, Order
> Base
;
943 MOZ_CONSTEXPR
Atomic() : Base() {}
944 MOZ_CONSTEXPR
Atomic(T aInit
) : Base(aInit
) {}
946 using Base::operator=;
948 T
operator+=(T delta
) { return Base::Intrinsics::add(Base::mValue
, delta
) + delta
; }
949 T
operator-=(T delta
) { return Base::Intrinsics::sub(Base::mValue
, delta
) - delta
; }
950 T
operator|=(T val
) { return Base::Intrinsics::or_(Base::mValue
, val
) | val
; }
951 T
operator^=(T val
) { return Base::Intrinsics::xor_(Base::mValue
, val
) ^ val
; }
952 T
operator&=(T val
) { return Base::Intrinsics::and_(Base::mValue
, val
) & val
; }
955 Atomic(Atomic
<T
, Order
>& aOther
) MOZ_DELETE
;
959 * Atomic<T> implementation for pointer types.
961 * An atomic compare-and-swap primitive for pointer variables is provided, as
962 * are atomic increment and decement operators. Also provided are the compound
963 * assignment operators for addition and subtraction. Atomic swap (via
964 * exchange()) is included as well.
966 template<typename T
, MemoryOrdering Order
>
967 class Atomic
<T
*, Order
> : public detail::AtomicBaseIncDec
<T
*, Order
>
969 typedef typename
detail::AtomicBaseIncDec
<T
*, Order
> Base
;
972 MOZ_CONSTEXPR
Atomic() : Base() {}
973 MOZ_CONSTEXPR
Atomic(T
* aInit
) : Base(aInit
) {}
975 using Base::operator=;
977 T
* operator+=(ptrdiff_t delta
) {
978 return Base::Intrinsics::add(Base::mValue
, delta
) + delta
;
980 T
* operator-=(ptrdiff_t delta
) {
981 return Base::Intrinsics::sub(Base::mValue
, delta
) - delta
;
985 Atomic(Atomic
<T
*, Order
>& aOther
) MOZ_DELETE
;
989 * Atomic<T> implementation for enum types.
991 * The atomic store and load operations and the atomic swap method is provided.
993 template<typename T
, MemoryOrdering Order
>
994 class Atomic
<T
, Order
, typename EnableIf
<IsEnum
<T
>::value
>::Type
>
995 : public detail::AtomicBase
<T
, Order
>
997 typedef typename
detail::AtomicBase
<T
, Order
> Base
;
1000 MOZ_CONSTEXPR
Atomic() : Base() {}
1001 MOZ_CONSTEXPR
Atomic(T aInit
) : Base(aInit
) {}
1003 using Base::operator=;
1006 Atomic(Atomic
<T
, Order
>& aOther
) MOZ_DELETE
;
1009 } // namespace mozilla
1011 #endif /* mozilla_Atomics_h */