* gcc.target/i386/fuse-caller-save-xmm.c (dg-options): Use
[official-gcc.git] / libsanitizer / tsan / tsan_clock.cc
blobb944cc50d54fd61d65cd75e1ea33061a876ad2b9
1 //===-- tsan_clock.cc -----------------------------------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file is a part of ThreadSanitizer (TSan), a race detector.
9 //
10 //===----------------------------------------------------------------------===//
11 #include "tsan_clock.h"
12 #include "tsan_rtl.h"
14 // SyncClock and ThreadClock implement vector clocks for sync variables
15 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
16 // ThreadClock contains fixed-size vector clock for maximum number of threads.
17 // SyncClock contains growable vector clock for currently necessary number of
18 // threads.
19 // Together they implement very simple model of operations, namely:
21 // void ThreadClock::acquire(const SyncClock *src) {
22 // for (int i = 0; i < kMaxThreads; i++)
23 // clock[i] = max(clock[i], src->clock[i]);
24 // }
26 // void ThreadClock::release(SyncClock *dst) const {
27 // for (int i = 0; i < kMaxThreads; i++)
28 // dst->clock[i] = max(dst->clock[i], clock[i]);
29 // }
31 // void ThreadClock::ReleaseStore(SyncClock *dst) const {
32 // for (int i = 0; i < kMaxThreads; i++)
33 // dst->clock[i] = clock[i];
34 // }
36 // void ThreadClock::acq_rel(SyncClock *dst) {
37 // acquire(dst);
38 // release(dst);
39 // }
41 // Conformance to this model is extensively verified in tsan_clock_test.cc.
42 // However, the implementation is significantly more complex. The complexity
43 // allows to implement important classes of use cases in O(1) instead of O(N).
45 // The use cases are:
46 // 1. Singleton/once atomic that has a single release-store operation followed
47 // by zillions of acquire-loads (the acquire-load is O(1)).
48 // 2. Thread-local mutex (both lock and unlock can be O(1)).
49 // 3. Leaf mutex (unlock is O(1)).
50 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
51 // 5. An atomic with a single writer (writes can be O(1)).
52 // The implementation dynamically adopts to workload. So if an atomic is in
53 // read-only phase, these reads will be O(1); if it later switches to read/write
54 // phase, the implementation will correctly handle that by switching to O(N).
56 // Thread-safety note: all const operations on SyncClock's are conducted under
57 // a shared lock; all non-const operations on SyncClock's are conducted under
58 // an exclusive lock; ThreadClock's are private to respective threads and so
59 // do not need any protection.
61 // Description of ThreadClock state:
62 // clk_ - fixed size vector clock.
63 // nclk_ - effective size of the vector clock (the rest is zeros).
64 // tid_ - index of the thread associated with he clock ("current thread").
65 // last_acquire_ - current thread time when it acquired something from
66 // other threads.
68 // Description of SyncClock state:
69 // clk_ - variable size vector clock, low kClkBits hold timestamp,
70 // the remaining bits hold "acquired" flag (the actual value is thread's
71 // reused counter);
72 // if acquried == thr->reused_, then the respective thread has already
73 // acquired this clock (except possibly dirty_tids_).
74 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
75 // need to acquire regardless of "acquired" flag value;
76 // release_store_tid_ - denotes that the clock state is a result of
77 // release-store operation by the thread with release_store_tid_ index.
78 // release_store_reused_ - reuse count of release_store_tid_.
80 // We don't have ThreadState in these methods, so this is an ugly hack that
81 // works only in C++.
82 #ifndef TSAN_GO
83 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
84 #else
85 # define CPP_STAT_INC(typ) (void)0
86 #endif
88 namespace __tsan {
90 const unsigned kInvalidTid = (unsigned)-1;
92 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
93 : tid_(tid)
94 , reused_(reused + 1) { // 0 has special meaning
95 CHECK_LT(tid, kMaxTidInClock);
96 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
97 nclk_ = tid_ + 1;
98 last_acquire_ = 0;
99 internal_memset(clk_, 0, sizeof(clk_));
100 clk_[tid_].reused = reused_;
103 void ThreadClock::acquire(const SyncClock *src) {
104 DCHECK(nclk_ <= kMaxTid);
105 DCHECK(src->clk_.Size() <= kMaxTid);
106 CPP_STAT_INC(StatClockAcquire);
108 // Check if it's empty -> no need to do anything.
109 const uptr nclk = src->clk_.Size();
110 if (nclk == 0) {
111 CPP_STAT_INC(StatClockAcquireEmpty);
112 return;
115 // Check if we've already acquired src after the last release operation on src
116 bool acquired = false;
117 if (nclk > tid_) {
118 CPP_STAT_INC(StatClockAcquireLarge);
119 if (src->clk_[tid_].reused == reused_) {
120 CPP_STAT_INC(StatClockAcquireRepeat);
121 for (unsigned i = 0; i < kDirtyTids; i++) {
122 unsigned tid = src->dirty_tids_[i];
123 if (tid != kInvalidTid) {
124 u64 epoch = src->clk_[tid].epoch;
125 if (clk_[tid].epoch < epoch) {
126 clk_[tid].epoch = epoch;
127 acquired = true;
131 if (acquired) {
132 CPP_STAT_INC(StatClockAcquiredSomething);
133 last_acquire_ = clk_[tid_].epoch;
135 return;
139 // O(N) acquire.
140 CPP_STAT_INC(StatClockAcquireFull);
141 nclk_ = max(nclk_, nclk);
142 for (uptr i = 0; i < nclk; i++) {
143 u64 epoch = src->clk_[i].epoch;
144 if (clk_[i].epoch < epoch) {
145 clk_[i].epoch = epoch;
146 acquired = true;
150 // Remember that this thread has acquired this clock.
151 if (nclk > tid_)
152 src->clk_[tid_].reused = reused_;
154 if (acquired) {
155 CPP_STAT_INC(StatClockAcquiredSomething);
156 last_acquire_ = clk_[tid_].epoch;
160 void ThreadClock::release(SyncClock *dst) const {
161 DCHECK_LE(nclk_, kMaxTid);
162 DCHECK_LE(dst->clk_.Size(), kMaxTid);
164 if (dst->clk_.Size() == 0) {
165 // ReleaseStore will correctly set release_store_tid_,
166 // which can be important for future operations.
167 ReleaseStore(dst);
168 return;
171 CPP_STAT_INC(StatClockRelease);
172 // Check if we need to resize dst.
173 if (dst->clk_.Size() < nclk_) {
174 CPP_STAT_INC(StatClockReleaseResize);
175 dst->clk_.Resize(nclk_);
178 // Check if we had not acquired anything from other threads
179 // since the last release on dst. If so, we need to update
180 // only dst->clk_[tid_].
181 if (dst->clk_[tid_].epoch > last_acquire_) {
182 UpdateCurrentThread(dst);
183 if (dst->release_store_tid_ != tid_ ||
184 dst->release_store_reused_ != reused_)
185 dst->release_store_tid_ = kInvalidTid;
186 return;
189 // O(N) release.
190 CPP_STAT_INC(StatClockReleaseFull);
191 // First, remember whether we've acquired dst.
192 bool acquired = IsAlreadyAcquired(dst);
193 if (acquired)
194 CPP_STAT_INC(StatClockReleaseAcquired);
195 // Update dst->clk_.
196 for (uptr i = 0; i < nclk_; i++) {
197 dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
198 dst->clk_[i].reused = 0;
200 // Clear 'acquired' flag in the remaining elements.
201 if (nclk_ < dst->clk_.Size())
202 CPP_STAT_INC(StatClockReleaseClearTail);
203 for (uptr i = nclk_; i < dst->clk_.Size(); i++)
204 dst->clk_[i].reused = 0;
205 for (unsigned i = 0; i < kDirtyTids; i++)
206 dst->dirty_tids_[i] = kInvalidTid;
207 dst->release_store_tid_ = kInvalidTid;
208 dst->release_store_reused_ = 0;
209 // If we've acquired dst, remember this fact,
210 // so that we don't need to acquire it on next acquire.
211 if (acquired)
212 dst->clk_[tid_].reused = reused_;
215 void ThreadClock::ReleaseStore(SyncClock *dst) const {
216 DCHECK(nclk_ <= kMaxTid);
217 DCHECK(dst->clk_.Size() <= kMaxTid);
218 CPP_STAT_INC(StatClockStore);
220 // Check if we need to resize dst.
221 if (dst->clk_.Size() < nclk_) {
222 CPP_STAT_INC(StatClockStoreResize);
223 dst->clk_.Resize(nclk_);
226 if (dst->release_store_tid_ == tid_ &&
227 dst->release_store_reused_ == reused_ &&
228 dst->clk_[tid_].epoch > last_acquire_) {
229 CPP_STAT_INC(StatClockStoreFast);
230 UpdateCurrentThread(dst);
231 return;
234 // O(N) release-store.
235 CPP_STAT_INC(StatClockStoreFull);
236 for (uptr i = 0; i < nclk_; i++) {
237 dst->clk_[i].epoch = clk_[i].epoch;
238 dst->clk_[i].reused = 0;
240 // Clear the tail of dst->clk_.
241 if (nclk_ < dst->clk_.Size()) {
242 internal_memset(&dst->clk_[nclk_], 0,
243 (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
244 CPP_STAT_INC(StatClockStoreTail);
246 for (unsigned i = 0; i < kDirtyTids; i++)
247 dst->dirty_tids_[i] = kInvalidTid;
248 dst->release_store_tid_ = tid_;
249 dst->release_store_reused_ = reused_;
250 // Rememeber that we don't need to acquire it in future.
251 dst->clk_[tid_].reused = reused_;
254 void ThreadClock::acq_rel(SyncClock *dst) {
255 CPP_STAT_INC(StatClockAcquireRelease);
256 acquire(dst);
257 ReleaseStore(dst);
260 // Updates only single element related to the current thread in dst->clk_.
261 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
262 // Update the threads time, but preserve 'acquired' flag.
263 dst->clk_[tid_].epoch = clk_[tid_].epoch;
265 for (unsigned i = 0; i < kDirtyTids; i++) {
266 if (dst->dirty_tids_[i] == tid_) {
267 CPP_STAT_INC(StatClockReleaseFast1);
268 return;
270 if (dst->dirty_tids_[i] == kInvalidTid) {
271 CPP_STAT_INC(StatClockReleaseFast2);
272 dst->dirty_tids_[i] = tid_;
273 return;
276 // Reset all 'acquired' flags, O(N).
277 CPP_STAT_INC(StatClockReleaseSlow);
278 for (uptr i = 0; i < dst->clk_.Size(); i++) {
279 dst->clk_[i].reused = 0;
281 for (unsigned i = 0; i < kDirtyTids; i++)
282 dst->dirty_tids_[i] = kInvalidTid;
285 // Checks whether the current threads has already acquired src.
286 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
287 if (src->clk_[tid_].reused != reused_)
288 return false;
289 for (unsigned i = 0; i < kDirtyTids; i++) {
290 unsigned tid = src->dirty_tids_[i];
291 if (tid != kInvalidTid) {
292 if (clk_[tid].epoch < src->clk_[tid].epoch)
293 return false;
296 return true;
299 // Sets a single element in the vector clock.
300 // This function is called only from weird places like AcquireGlobal.
301 void ThreadClock::set(unsigned tid, u64 v) {
302 DCHECK_LT(tid, kMaxTid);
303 DCHECK_GE(v, clk_[tid].epoch);
304 clk_[tid].epoch = v;
305 if (nclk_ <= tid)
306 nclk_ = tid + 1;
307 last_acquire_ = clk_[tid_].epoch;
310 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
311 printf("clock=[");
312 for (uptr i = 0; i < nclk_; i++)
313 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
314 printf("] reused=[");
315 for (uptr i = 0; i < nclk_; i++)
316 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
317 printf("] tid=%u/%u last_acq=%llu",
318 tid_, reused_, last_acquire_);
321 SyncClock::SyncClock()
322 : clk_(MBlockClock) {
323 release_store_tid_ = kInvalidTid;
324 release_store_reused_ = 0;
325 for (uptr i = 0; i < kDirtyTids; i++)
326 dirty_tids_[i] = kInvalidTid;
329 void SyncClock::Reset() {
330 clk_.Reset();
331 release_store_tid_ = kInvalidTid;
332 release_store_reused_ = 0;
333 for (uptr i = 0; i < kDirtyTids; i++)
334 dirty_tids_[i] = kInvalidTid;
337 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
338 printf("clock=[");
339 for (uptr i = 0; i < clk_.Size(); i++)
340 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
341 printf("] reused=[");
342 for (uptr i = 0; i < clk_.Size(); i++)
343 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
344 printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
345 release_store_tid_, release_store_reused_,
346 dirty_tids_[0], dirty_tids_[1]);
348 } // namespace __tsan