Small ChangeLog tweak.
[official-gcc.git] / libsanitizer / tsan / tsan_clock.cc
blob23f9228a672095f33ab48bf2d027b1e00cc37bd9
1 //===-- tsan_clock.cc -----------------------------------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file is a part of ThreadSanitizer (TSan), a race detector.
9 //
10 //===----------------------------------------------------------------------===//
11 #include "tsan_clock.h"
12 #include "tsan_rtl.h"
13 #include "sanitizer_common/sanitizer_placement_new.h"
15 // SyncClock and ThreadClock implement vector clocks for sync variables
16 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
17 // ThreadClock contains fixed-size vector clock for maximum number of threads.
18 // SyncClock contains growable vector clock for currently necessary number of
19 // threads.
20 // Together they implement very simple model of operations, namely:
22 // void ThreadClock::acquire(const SyncClock *src) {
23 // for (int i = 0; i < kMaxThreads; i++)
24 // clock[i] = max(clock[i], src->clock[i]);
25 // }
27 // void ThreadClock::release(SyncClock *dst) const {
28 // for (int i = 0; i < kMaxThreads; i++)
29 // dst->clock[i] = max(dst->clock[i], clock[i]);
30 // }
32 // void ThreadClock::ReleaseStore(SyncClock *dst) const {
33 // for (int i = 0; i < kMaxThreads; i++)
34 // dst->clock[i] = clock[i];
35 // }
37 // void ThreadClock::acq_rel(SyncClock *dst) {
38 // acquire(dst);
39 // release(dst);
40 // }
42 // Conformance to this model is extensively verified in tsan_clock_test.cc.
43 // However, the implementation is significantly more complex. The complexity
44 // allows to implement important classes of use cases in O(1) instead of O(N).
46 // The use cases are:
47 // 1. Singleton/once atomic that has a single release-store operation followed
48 // by zillions of acquire-loads (the acquire-load is O(1)).
49 // 2. Thread-local mutex (both lock and unlock can be O(1)).
50 // 3. Leaf mutex (unlock is O(1)).
51 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
52 // 5. An atomic with a single writer (writes can be O(1)).
53 // The implementation dynamically adopts to workload. So if an atomic is in
54 // read-only phase, these reads will be O(1); if it later switches to read/write
55 // phase, the implementation will correctly handle that by switching to O(N).
57 // Thread-safety note: all const operations on SyncClock's are conducted under
58 // a shared lock; all non-const operations on SyncClock's are conducted under
59 // an exclusive lock; ThreadClock's are private to respective threads and so
60 // do not need any protection.
62 // Description of ThreadClock state:
63 // clk_ - fixed size vector clock.
64 // nclk_ - effective size of the vector clock (the rest is zeros).
65 // tid_ - index of the thread associated with he clock ("current thread").
66 // last_acquire_ - current thread time when it acquired something from
67 // other threads.
69 // Description of SyncClock state:
70 // clk_ - variable size vector clock, low kClkBits hold timestamp,
71 // the remaining bits hold "acquired" flag (the actual value is thread's
72 // reused counter);
73 // if acquried == thr->reused_, then the respective thread has already
74 // acquired this clock (except possibly dirty_tids_).
75 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
76 // need to acquire regardless of "acquired" flag value;
77 // release_store_tid_ - denotes that the clock state is a result of
78 // release-store operation by the thread with release_store_tid_ index.
79 // release_store_reused_ - reuse count of release_store_tid_.
81 // We don't have ThreadState in these methods, so this is an ugly hack that
82 // works only in C++.
83 #if !SANITIZER_GO
84 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
85 #else
86 # define CPP_STAT_INC(typ) (void)0
87 #endif
89 namespace __tsan {
91 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
92 : tid_(tid)
93 , reused_(reused + 1) { // 0 has special meaning
94 CHECK_LT(tid, kMaxTidInClock);
95 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
96 nclk_ = tid_ + 1;
97 last_acquire_ = 0;
98 internal_memset(clk_, 0, sizeof(clk_));
99 clk_[tid_].reused = reused_;
102 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
103 DCHECK_LE(nclk_, kMaxTid);
104 DCHECK_LE(src->size_, kMaxTid);
105 CPP_STAT_INC(StatClockAcquire);
107 // Check if it's empty -> no need to do anything.
108 const uptr nclk = src->size_;
109 if (nclk == 0) {
110 CPP_STAT_INC(StatClockAcquireEmpty);
111 return;
114 // Check if we've already acquired src after the last release operation on src
115 bool acquired = false;
116 if (nclk > tid_) {
117 CPP_STAT_INC(StatClockAcquireLarge);
118 if (src->elem(tid_).reused == reused_) {
119 CPP_STAT_INC(StatClockAcquireRepeat);
120 for (unsigned i = 0; i < kDirtyTids; i++) {
121 unsigned tid = src->dirty_tids_[i];
122 if (tid != kInvalidTid) {
123 u64 epoch = src->elem(tid).epoch;
124 if (clk_[tid].epoch < epoch) {
125 clk_[tid].epoch = epoch;
126 acquired = true;
130 if (acquired) {
131 CPP_STAT_INC(StatClockAcquiredSomething);
132 last_acquire_ = clk_[tid_].epoch;
134 return;
138 // O(N) acquire.
139 CPP_STAT_INC(StatClockAcquireFull);
140 nclk_ = max(nclk_, nclk);
141 for (uptr i = 0; i < nclk; i++) {
142 u64 epoch = src->elem(i).epoch;
143 if (clk_[i].epoch < epoch) {
144 clk_[i].epoch = epoch;
145 acquired = true;
149 // Remember that this thread has acquired this clock.
150 if (nclk > tid_)
151 src->elem(tid_).reused = reused_;
153 if (acquired) {
154 CPP_STAT_INC(StatClockAcquiredSomething);
155 last_acquire_ = clk_[tid_].epoch;
159 void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
160 DCHECK_LE(nclk_, kMaxTid);
161 DCHECK_LE(dst->size_, kMaxTid);
163 if (dst->size_ == 0) {
164 // ReleaseStore will correctly set release_store_tid_,
165 // which can be important for future operations.
166 ReleaseStore(c, dst);
167 return;
170 CPP_STAT_INC(StatClockRelease);
171 // Check if we need to resize dst.
172 if (dst->size_ < nclk_)
173 dst->Resize(c, nclk_);
175 // Check if we had not acquired anything from other threads
176 // since the last release on dst. If so, we need to update
177 // only dst->elem(tid_).
178 if (dst->elem(tid_).epoch > last_acquire_) {
179 UpdateCurrentThread(dst);
180 if (dst->release_store_tid_ != tid_ ||
181 dst->release_store_reused_ != reused_)
182 dst->release_store_tid_ = kInvalidTid;
183 return;
186 // O(N) release.
187 CPP_STAT_INC(StatClockReleaseFull);
188 // First, remember whether we've acquired dst.
189 bool acquired = IsAlreadyAcquired(dst);
190 if (acquired)
191 CPP_STAT_INC(StatClockReleaseAcquired);
192 // Update dst->clk_.
193 for (uptr i = 0; i < nclk_; i++) {
194 ClockElem &ce = dst->elem(i);
195 ce.epoch = max(ce.epoch, clk_[i].epoch);
196 ce.reused = 0;
198 // Clear 'acquired' flag in the remaining elements.
199 if (nclk_ < dst->size_)
200 CPP_STAT_INC(StatClockReleaseClearTail);
201 for (uptr i = nclk_; i < dst->size_; i++)
202 dst->elem(i).reused = 0;
203 for (unsigned i = 0; i < kDirtyTids; i++)
204 dst->dirty_tids_[i] = kInvalidTid;
205 dst->release_store_tid_ = kInvalidTid;
206 dst->release_store_reused_ = 0;
207 // If we've acquired dst, remember this fact,
208 // so that we don't need to acquire it on next acquire.
209 if (acquired)
210 dst->elem(tid_).reused = reused_;
213 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
214 DCHECK_LE(nclk_, kMaxTid);
215 DCHECK_LE(dst->size_, kMaxTid);
216 CPP_STAT_INC(StatClockStore);
218 // Check if we need to resize dst.
219 if (dst->size_ < nclk_)
220 dst->Resize(c, nclk_);
222 if (dst->release_store_tid_ == tid_ &&
223 dst->release_store_reused_ == reused_ &&
224 dst->elem(tid_).epoch > last_acquire_) {
225 CPP_STAT_INC(StatClockStoreFast);
226 UpdateCurrentThread(dst);
227 return;
230 // O(N) release-store.
231 CPP_STAT_INC(StatClockStoreFull);
232 for (uptr i = 0; i < nclk_; i++) {
233 ClockElem &ce = dst->elem(i);
234 ce.epoch = clk_[i].epoch;
235 ce.reused = 0;
237 // Clear the tail of dst->clk_.
238 if (nclk_ < dst->size_) {
239 for (uptr i = nclk_; i < dst->size_; i++) {
240 ClockElem &ce = dst->elem(i);
241 ce.epoch = 0;
242 ce.reused = 0;
244 CPP_STAT_INC(StatClockStoreTail);
246 for (unsigned i = 0; i < kDirtyTids; i++)
247 dst->dirty_tids_[i] = kInvalidTid;
248 dst->release_store_tid_ = tid_;
249 dst->release_store_reused_ = reused_;
250 // Rememeber that we don't need to acquire it in future.
251 dst->elem(tid_).reused = reused_;
254 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
255 CPP_STAT_INC(StatClockAcquireRelease);
256 acquire(c, dst);
257 ReleaseStore(c, dst);
260 // Updates only single element related to the current thread in dst->clk_.
261 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
262 // Update the threads time, but preserve 'acquired' flag.
263 dst->elem(tid_).epoch = clk_[tid_].epoch;
265 for (unsigned i = 0; i < kDirtyTids; i++) {
266 if (dst->dirty_tids_[i] == tid_) {
267 CPP_STAT_INC(StatClockReleaseFast1);
268 return;
270 if (dst->dirty_tids_[i] == kInvalidTid) {
271 CPP_STAT_INC(StatClockReleaseFast2);
272 dst->dirty_tids_[i] = tid_;
273 return;
276 // Reset all 'acquired' flags, O(N).
277 CPP_STAT_INC(StatClockReleaseSlow);
278 for (uptr i = 0; i < dst->size_; i++)
279 dst->elem(i).reused = 0;
280 for (unsigned i = 0; i < kDirtyTids; i++)
281 dst->dirty_tids_[i] = kInvalidTid;
284 // Checks whether the current threads has already acquired src.
285 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
286 if (src->elem(tid_).reused != reused_)
287 return false;
288 for (unsigned i = 0; i < kDirtyTids; i++) {
289 unsigned tid = src->dirty_tids_[i];
290 if (tid != kInvalidTid) {
291 if (clk_[tid].epoch < src->elem(tid).epoch)
292 return false;
295 return true;
298 void SyncClock::Resize(ClockCache *c, uptr nclk) {
299 CPP_STAT_INC(StatClockReleaseResize);
300 if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
301 RoundUpTo(size_, ClockBlock::kClockCount)) {
302 // Growing within the same block.
303 // Memory is already allocated, just increase the size.
304 size_ = nclk;
305 return;
307 if (nclk <= ClockBlock::kClockCount) {
308 // Grow from 0 to one-level table.
309 CHECK_EQ(size_, 0);
310 CHECK_EQ(tab_, 0);
311 CHECK_EQ(tab_idx_, 0);
312 size_ = nclk;
313 tab_idx_ = ctx->clock_alloc.Alloc(c);
314 tab_ = ctx->clock_alloc.Map(tab_idx_);
315 internal_memset(tab_, 0, sizeof(*tab_));
316 return;
318 // Growing two-level table.
319 if (size_ == 0) {
320 // Allocate first level table.
321 tab_idx_ = ctx->clock_alloc.Alloc(c);
322 tab_ = ctx->clock_alloc.Map(tab_idx_);
323 internal_memset(tab_, 0, sizeof(*tab_));
324 } else if (size_ <= ClockBlock::kClockCount) {
325 // Transform one-level table to two-level table.
326 u32 old = tab_idx_;
327 tab_idx_ = ctx->clock_alloc.Alloc(c);
328 tab_ = ctx->clock_alloc.Map(tab_idx_);
329 internal_memset(tab_, 0, sizeof(*tab_));
330 tab_->table[0] = old;
332 // At this point we have first level table allocated.
333 // Add second level tables as necessary.
334 for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
335 i < nclk; i += ClockBlock::kClockCount) {
336 u32 idx = ctx->clock_alloc.Alloc(c);
337 ClockBlock *cb = ctx->clock_alloc.Map(idx);
338 internal_memset(cb, 0, sizeof(*cb));
339 CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
340 tab_->table[i/ClockBlock::kClockCount] = idx;
342 size_ = nclk;
345 // Sets a single element in the vector clock.
346 // This function is called only from weird places like AcquireGlobal.
347 void ThreadClock::set(unsigned tid, u64 v) {
348 DCHECK_LT(tid, kMaxTid);
349 DCHECK_GE(v, clk_[tid].epoch);
350 clk_[tid].epoch = v;
351 if (nclk_ <= tid)
352 nclk_ = tid + 1;
353 last_acquire_ = clk_[tid_].epoch;
356 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
357 printf("clock=[");
358 for (uptr i = 0; i < nclk_; i++)
359 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
360 printf("] reused=[");
361 for (uptr i = 0; i < nclk_; i++)
362 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
363 printf("] tid=%u/%u last_acq=%llu",
364 tid_, reused_, last_acquire_);
367 SyncClock::SyncClock()
368 : release_store_tid_(kInvalidTid)
369 , release_store_reused_()
370 , tab_()
371 , tab_idx_()
372 , size_() {
373 for (uptr i = 0; i < kDirtyTids; i++)
374 dirty_tids_[i] = kInvalidTid;
377 SyncClock::~SyncClock() {
378 // Reset must be called before dtor.
379 CHECK_EQ(size_, 0);
380 CHECK_EQ(tab_, 0);
381 CHECK_EQ(tab_idx_, 0);
384 void SyncClock::Reset(ClockCache *c) {
385 if (size_ == 0) {
386 // nothing
387 } else if (size_ <= ClockBlock::kClockCount) {
388 // One-level table.
389 ctx->clock_alloc.Free(c, tab_idx_);
390 } else {
391 // Two-level table.
392 for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
393 ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
394 ctx->clock_alloc.Free(c, tab_idx_);
396 tab_ = 0;
397 tab_idx_ = 0;
398 size_ = 0;
399 release_store_tid_ = kInvalidTid;
400 release_store_reused_ = 0;
401 for (uptr i = 0; i < kDirtyTids; i++)
402 dirty_tids_[i] = kInvalidTid;
405 ClockElem &SyncClock::elem(unsigned tid) const {
406 DCHECK_LT(tid, size_);
407 if (size_ <= ClockBlock::kClockCount)
408 return tab_->clock[tid];
409 u32 idx = tab_->table[tid / ClockBlock::kClockCount];
410 ClockBlock *cb = ctx->clock_alloc.Map(idx);
411 return cb->clock[tid % ClockBlock::kClockCount];
414 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
415 printf("clock=[");
416 for (uptr i = 0; i < size_; i++)
417 printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
418 printf("] reused=[");
419 for (uptr i = 0; i < size_; i++)
420 printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
421 printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
422 release_store_tid_, release_store_reused_,
423 dirty_tids_[0], dirty_tids_[1]);
425 } // namespace __tsan