Consistently use "rG" constraint for copy instruction in move patterns
[official-gcc.git] / libsanitizer / tsan / tsan_clock.h
blob11cbc0c0b86b64c6a30486d1d59f1e1ff283fecb
1 //===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
11 //===----------------------------------------------------------------------===//
12 #ifndef TSAN_CLOCK_H
13 #define TSAN_CLOCK_H
15 #include "tsan_defs.h"
16 #include "tsan_dense_alloc.h"
18 namespace __tsan {
20 typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc;
21 typedef DenseSlabAllocCache ClockCache;
23 // The clock that lives in sync variables (mutexes, atomics, etc).
24 class SyncClock {
25 public:
26 SyncClock();
27 ~SyncClock();
29 uptr size() const;
31 // These are used only in tests.
32 u64 get(unsigned tid) const;
33 u64 get_clean(unsigned tid) const;
35 void Resize(ClockCache *c, uptr nclk);
36 void Reset(ClockCache *c);
38 void DebugDump(int(*printf)(const char *s, ...));
40 // Clock element iterator.
41 // Note: it iterates only over the table without regard to dirty entries.
42 class Iter {
43 public:
44 explicit Iter(SyncClock* parent);
45 Iter& operator++();
46 bool operator!=(const Iter& other);
47 ClockElem &operator*();
49 private:
50 SyncClock *parent_;
51 // [pos_, end_) is the current continuous range of clock elements.
52 ClockElem *pos_;
53 ClockElem *end_;
54 int block_; // Current number of second level block.
56 NOINLINE void Next();
59 Iter begin();
60 Iter end();
62 private:
63 friend class ThreadClock;
64 friend class Iter;
65 static const uptr kDirtyTids = 2;
67 struct Dirty {
68 u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; }
69 void set_tid(u32 tid) {
70 tid_ = tid == kInvalidTid ? kShortInvalidTid : tid;
72 u64 epoch : kClkBits;
74 private:
75 // Full kInvalidTid won't fit into Dirty::tid.
76 static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1;
77 u64 tid_ : 64 - kClkBits; // kInvalidId if not active
80 static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit");
82 unsigned release_store_tid_;
83 unsigned release_store_reused_;
84 Dirty dirty_[kDirtyTids];
85 // If size_ is 0, tab_ is nullptr.
86 // If size <= 64 (kClockCount), tab_ contains pointer to an array with
87 // 64 ClockElem's (ClockBlock::clock).
88 // Otherwise, tab_ points to an array with up to 127 u32 elements,
89 // each pointing to the second-level 512b block with 64 ClockElem's.
90 // Unused space in the first level ClockBlock is used to store additional
91 // clock elements.
92 // The last u32 element in the first level ClockBlock is always used as
93 // reference counter.
95 // See the following scheme for details.
96 // All memory blocks are 512 bytes (allocated from ClockAlloc).
97 // Clock (clk) elements are 64 bits.
98 // Idx and ref are 32 bits.
100 // tab_
101 // |
102 // \/
103 // +----------------------------------------------------+
104 // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
105 // +----------------------------------------------------+
106 // | |
107 // | \/
108 // | +----------------+
109 // | | clk0 ... clk63 |
110 // | +----------------+
111 // \/
112 // +------------------+
113 // | clk64 ... clk127 |
114 // +------------------+
116 // Note: dirty entries, if active, always override what's stored in the clock.
117 ClockBlock *tab_;
118 u32 tab_idx_;
119 u16 size_;
120 u16 blocks_; // Number of second level blocks.
122 void Unshare(ClockCache *c);
123 bool IsShared() const;
124 bool Cachable() const;
125 void ResetImpl();
126 void FlushDirty();
127 uptr capacity() const;
128 u32 get_block(uptr bi) const;
129 void append_block(u32 idx);
130 ClockElem &elem(unsigned tid) const;
133 // The clock that lives in threads.
134 class ThreadClock {
135 public:
136 typedef DenseSlabAllocCache Cache;
138 explicit ThreadClock(unsigned tid, unsigned reused = 0);
140 u64 get(unsigned tid) const;
141 void set(ClockCache *c, unsigned tid, u64 v);
142 void set(u64 v);
143 void tick();
144 uptr size() const;
146 void acquire(ClockCache *c, SyncClock *src);
147 void releaseStoreAcquire(ClockCache *c, SyncClock *src);
148 void release(ClockCache *c, SyncClock *dst);
149 void acq_rel(ClockCache *c, SyncClock *dst);
150 void ReleaseStore(ClockCache *c, SyncClock *dst);
151 void ResetCached(ClockCache *c);
152 void NoteGlobalAcquire(u64 v);
154 void DebugReset();
155 void DebugDump(int(*printf)(const char *s, ...));
157 private:
158 static const uptr kDirtyTids = SyncClock::kDirtyTids;
159 // Index of the thread associated with he clock ("current thread").
160 const unsigned tid_;
161 const unsigned reused_; // tid_ reuse count.
162 // Current thread time when it acquired something from other threads.
163 u64 last_acquire_;
165 // Last time another thread has done a global acquire of this thread's clock.
166 // It helps to avoid problem described in:
167 // https://github.com/golang/go/issues/39186
168 // See test/tsan/java_finalizer2.cpp for a regression test.
169 // Note the failuire is _extremely_ hard to hit, so if you are trying
170 // to reproduce it, you may want to run something like:
171 // $ go get golang.org/x/tools/cmd/stress
172 // $ stress -p=64 ./a.out
174 // The crux of the problem is roughly as follows.
175 // A number of O(1) optimizations in the clocks algorithm assume proper
176 // transitive cumulative propagation of clock values. The AcquireGlobal
177 // operation may produce an inconsistent non-linearazable view of
178 // thread clocks. Namely, it may acquire a later value from a thread
179 // with a higher ID, but fail to acquire an earlier value from a thread
180 // with a lower ID. If a thread that executed AcquireGlobal then releases
181 // to a sync clock, it will spoil the sync clock with the inconsistent
182 // values. If another thread later releases to the sync clock, the optimized
183 // algorithm may break.
185 // The exact sequence of events that leads to the failure.
186 // - thread 1 executes AcquireGlobal
187 // - thread 1 acquires value 1 for thread 2
188 // - thread 2 increments clock to 2
189 // - thread 2 releases to sync object 1
190 // - thread 3 at time 1
191 // - thread 3 acquires from sync object 1
192 // - thread 3 increments clock to 2
193 // - thread 1 acquires value 2 for thread 3
194 // - thread 1 releases to sync object 2
195 // - sync object 2 clock has 1 for thread 2 and 2 for thread 3
196 // - thread 3 releases to sync object 2
197 // - thread 3 sees value 2 in the clock for itself
198 // and decides that it has already released to the clock
199 // and did not acquire anything from other threads after that
200 // (the last_acquire_ check in release operation)
201 // - thread 3 does not update the value for thread 2 in the clock from 1 to 2
202 // - thread 4 acquires from sync object 2
203 // - thread 4 detects a false race with thread 2
204 // as it should have been synchronized with thread 2 up to time 2,
205 // but because of the broken clock it is now synchronized only up to time 1
207 // The global_acquire_ value helps to prevent this scenario.
208 // Namely, thread 3 will not trust any own clock values up to global_acquire_
209 // for the purposes of the last_acquire_ optimization.
210 atomic_uint64_t global_acquire_;
212 // Cached SyncClock (without dirty entries and release_store_tid_).
213 // We reuse it for subsequent store-release operations without intervening
214 // acquire operations. Since it is shared (and thus constant), clock value
215 // for the current thread is then stored in dirty entries in the SyncClock.
216 // We host a reference to the table while it is cached here.
217 u32 cached_idx_;
218 u16 cached_size_;
219 u16 cached_blocks_;
221 // Number of active elements in the clk_ table (the rest is zeros).
222 uptr nclk_;
223 u64 clk_[kMaxTidInClock]; // Fixed size vector clock.
225 bool IsAlreadyAcquired(const SyncClock *src) const;
226 bool HasAcquiredAfterRelease(const SyncClock *dst) const;
227 void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
230 ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
231 DCHECK_LT(tid, kMaxTidInClock);
232 return clk_[tid];
235 ALWAYS_INLINE void ThreadClock::set(u64 v) {
236 DCHECK_GE(v, clk_[tid_]);
237 clk_[tid_] = v;
240 ALWAYS_INLINE void ThreadClock::tick() {
241 clk_[tid_]++;
244 ALWAYS_INLINE uptr ThreadClock::size() const {
245 return nclk_;
248 ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
249 // Here we rely on the fact that AcquireGlobal is protected by
250 // ThreadRegistryLock, thus only one thread at a time executes it
251 // and values passed to this function should not go backwards.
252 CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
253 atomic_store_relaxed(&global_acquire_, v);
256 ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
257 return Iter(this);
260 ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
261 return Iter(nullptr);
264 ALWAYS_INLINE uptr SyncClock::size() const {
265 return size_;
268 ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
269 : parent_(parent)
270 , pos_(nullptr)
271 , end_(nullptr)
272 , block_(-1) {
273 if (parent)
274 Next();
277 ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
278 pos_++;
279 if (UNLIKELY(pos_ >= end_))
280 Next();
281 return *this;
284 ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
285 return parent_ != other.parent_;
288 ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
289 return *pos_;
291 } // namespace __tsan
293 #endif // TSAN_CLOCK_H