1 //===-- tsan_clock.cc -----------------------------------------------------===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // This file is a part of ThreadSanitizer (TSan), a race detector.
10 //===----------------------------------------------------------------------===//
11 #include "tsan_clock.h"
13 #include "sanitizer_common/sanitizer_placement_new.h"
15 // SyncClock and ThreadClock implement vector clocks for sync variables
16 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
17 // ThreadClock contains fixed-size vector clock for maximum number of threads.
18 // SyncClock contains growable vector clock for currently necessary number of
20 // Together they implement very simple model of operations, namely:
22 // void ThreadClock::acquire(const SyncClock *src) {
23 // for (int i = 0; i < kMaxThreads; i++)
24 // clock[i] = max(clock[i], src->clock[i]);
27 // void ThreadClock::release(SyncClock *dst) const {
28 // for (int i = 0; i < kMaxThreads; i++)
29 // dst->clock[i] = max(dst->clock[i], clock[i]);
32 // void ThreadClock::ReleaseStore(SyncClock *dst) const {
33 // for (int i = 0; i < kMaxThreads; i++)
34 // dst->clock[i] = clock[i];
37 // void ThreadClock::acq_rel(SyncClock *dst) {
42 // Conformance to this model is extensively verified in tsan_clock_test.cc.
43 // However, the implementation is significantly more complex. The complexity
44 // allows to implement important classes of use cases in O(1) instead of O(N).
47 // 1. Singleton/once atomic that has a single release-store operation followed
48 // by zillions of acquire-loads (the acquire-load is O(1)).
49 // 2. Thread-local mutex (both lock and unlock can be O(1)).
50 // 3. Leaf mutex (unlock is O(1)).
51 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
52 // 5. An atomic with a single writer (writes can be O(1)).
53 // The implementation dynamically adopts to workload. So if an atomic is in
54 // read-only phase, these reads will be O(1); if it later switches to read/write
55 // phase, the implementation will correctly handle that by switching to O(N).
57 // Thread-safety note: all const operations on SyncClock's are conducted under
58 // a shared lock; all non-const operations on SyncClock's are conducted under
59 // an exclusive lock; ThreadClock's are private to respective threads and so
60 // do not need any protection.
62 // Description of ThreadClock state:
63 // clk_ - fixed size vector clock.
64 // nclk_ - effective size of the vector clock (the rest is zeros).
65 // tid_ - index of the thread associated with he clock ("current thread").
66 // last_acquire_ - current thread time when it acquired something from
69 // Description of SyncClock state:
70 // clk_ - variable size vector clock, low kClkBits hold timestamp,
71 // the remaining bits hold "acquired" flag (the actual value is thread's
73 // if acquried == thr->reused_, then the respective thread has already
74 // acquired this clock (except possibly dirty_tids_).
75 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
76 // need to acquire regardless of "acquired" flag value;
77 // release_store_tid_ - denotes that the clock state is a result of
78 // release-store operation by the thread with release_store_tid_ index.
79 // release_store_reused_ - reuse count of release_store_tid_.
81 // We don't have ThreadState in these methods, so this is an ugly hack that
84 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
86 # define CPP_STAT_INC(typ) (void)0
91 const unsigned kInvalidTid
= (unsigned)-1;
93 ThreadClock::ThreadClock(unsigned tid
, unsigned reused
)
95 , reused_(reused
+ 1) { // 0 has special meaning
96 CHECK_LT(tid
, kMaxTidInClock
);
97 CHECK_EQ(reused_
, ((u64
)reused_
<< kClkBits
) >> kClkBits
);
100 internal_memset(clk_
, 0, sizeof(clk_
));
101 clk_
[tid_
].reused
= reused_
;
104 void ThreadClock::acquire(ClockCache
*c
, const SyncClock
*src
) {
105 DCHECK(nclk_
<= kMaxTid
);
106 DCHECK(src
->size_
<= kMaxTid
);
107 CPP_STAT_INC(StatClockAcquire
);
109 // Check if it's empty -> no need to do anything.
110 const uptr nclk
= src
->size_
;
112 CPP_STAT_INC(StatClockAcquireEmpty
);
116 // Check if we've already acquired src after the last release operation on src
117 bool acquired
= false;
119 CPP_STAT_INC(StatClockAcquireLarge
);
120 if (src
->elem(tid_
).reused
== reused_
) {
121 CPP_STAT_INC(StatClockAcquireRepeat
);
122 for (unsigned i
= 0; i
< kDirtyTids
; i
++) {
123 unsigned tid
= src
->dirty_tids_
[i
];
124 if (tid
!= kInvalidTid
) {
125 u64 epoch
= src
->elem(tid
).epoch
;
126 if (clk_
[tid
].epoch
< epoch
) {
127 clk_
[tid
].epoch
= epoch
;
133 CPP_STAT_INC(StatClockAcquiredSomething
);
134 last_acquire_
= clk_
[tid_
].epoch
;
141 CPP_STAT_INC(StatClockAcquireFull
);
142 nclk_
= max(nclk_
, nclk
);
143 for (uptr i
= 0; i
< nclk
; i
++) {
144 u64 epoch
= src
->elem(i
).epoch
;
145 if (clk_
[i
].epoch
< epoch
) {
146 clk_
[i
].epoch
= epoch
;
151 // Remember that this thread has acquired this clock.
153 src
->elem(tid_
).reused
= reused_
;
156 CPP_STAT_INC(StatClockAcquiredSomething
);
157 last_acquire_
= clk_
[tid_
].epoch
;
161 void ThreadClock::release(ClockCache
*c
, SyncClock
*dst
) const {
162 DCHECK_LE(nclk_
, kMaxTid
);
163 DCHECK_LE(dst
->size_
, kMaxTid
);
165 if (dst
->size_
== 0) {
166 // ReleaseStore will correctly set release_store_tid_,
167 // which can be important for future operations.
168 ReleaseStore(c
, dst
);
172 CPP_STAT_INC(StatClockRelease
);
173 // Check if we need to resize dst.
174 if (dst
->size_
< nclk_
)
175 dst
->Resize(c
, nclk_
);
177 // Check if we had not acquired anything from other threads
178 // since the last release on dst. If so, we need to update
179 // only dst->elem(tid_).
180 if (dst
->elem(tid_
).epoch
> last_acquire_
) {
181 UpdateCurrentThread(dst
);
182 if (dst
->release_store_tid_
!= tid_
||
183 dst
->release_store_reused_
!= reused_
)
184 dst
->release_store_tid_
= kInvalidTid
;
189 CPP_STAT_INC(StatClockReleaseFull
);
190 // First, remember whether we've acquired dst.
191 bool acquired
= IsAlreadyAcquired(dst
);
193 CPP_STAT_INC(StatClockReleaseAcquired
);
195 for (uptr i
= 0; i
< nclk_
; i
++) {
196 ClockElem
&ce
= dst
->elem(i
);
197 ce
.epoch
= max(ce
.epoch
, clk_
[i
].epoch
);
200 // Clear 'acquired' flag in the remaining elements.
201 if (nclk_
< dst
->size_
)
202 CPP_STAT_INC(StatClockReleaseClearTail
);
203 for (uptr i
= nclk_
; i
< dst
->size_
; i
++)
204 dst
->elem(i
).reused
= 0;
205 for (unsigned i
= 0; i
< kDirtyTids
; i
++)
206 dst
->dirty_tids_
[i
] = kInvalidTid
;
207 dst
->release_store_tid_
= kInvalidTid
;
208 dst
->release_store_reused_
= 0;
209 // If we've acquired dst, remember this fact,
210 // so that we don't need to acquire it on next acquire.
212 dst
->elem(tid_
).reused
= reused_
;
215 void ThreadClock::ReleaseStore(ClockCache
*c
, SyncClock
*dst
) const {
216 DCHECK(nclk_
<= kMaxTid
);
217 DCHECK(dst
->size_
<= kMaxTid
);
218 CPP_STAT_INC(StatClockStore
);
220 // Check if we need to resize dst.
221 if (dst
->size_
< nclk_
)
222 dst
->Resize(c
, nclk_
);
224 if (dst
->release_store_tid_
== tid_
&&
225 dst
->release_store_reused_
== reused_
&&
226 dst
->elem(tid_
).epoch
> last_acquire_
) {
227 CPP_STAT_INC(StatClockStoreFast
);
228 UpdateCurrentThread(dst
);
232 // O(N) release-store.
233 CPP_STAT_INC(StatClockStoreFull
);
234 for (uptr i
= 0; i
< nclk_
; i
++) {
235 ClockElem
&ce
= dst
->elem(i
);
236 ce
.epoch
= clk_
[i
].epoch
;
239 // Clear the tail of dst->clk_.
240 if (nclk_
< dst
->size_
) {
241 for (uptr i
= nclk_
; i
< dst
->size_
; i
++) {
242 ClockElem
&ce
= dst
->elem(i
);
246 CPP_STAT_INC(StatClockStoreTail
);
248 for (unsigned i
= 0; i
< kDirtyTids
; i
++)
249 dst
->dirty_tids_
[i
] = kInvalidTid
;
250 dst
->release_store_tid_
= tid_
;
251 dst
->release_store_reused_
= reused_
;
252 // Rememeber that we don't need to acquire it in future.
253 dst
->elem(tid_
).reused
= reused_
;
256 void ThreadClock::acq_rel(ClockCache
*c
, SyncClock
*dst
) {
257 CPP_STAT_INC(StatClockAcquireRelease
);
259 ReleaseStore(c
, dst
);
262 // Updates only single element related to the current thread in dst->clk_.
263 void ThreadClock::UpdateCurrentThread(SyncClock
*dst
) const {
264 // Update the threads time, but preserve 'acquired' flag.
265 dst
->elem(tid_
).epoch
= clk_
[tid_
].epoch
;
267 for (unsigned i
= 0; i
< kDirtyTids
; i
++) {
268 if (dst
->dirty_tids_
[i
] == tid_
) {
269 CPP_STAT_INC(StatClockReleaseFast1
);
272 if (dst
->dirty_tids_
[i
] == kInvalidTid
) {
273 CPP_STAT_INC(StatClockReleaseFast2
);
274 dst
->dirty_tids_
[i
] = tid_
;
278 // Reset all 'acquired' flags, O(N).
279 CPP_STAT_INC(StatClockReleaseSlow
);
280 for (uptr i
= 0; i
< dst
->size_
; i
++)
281 dst
->elem(i
).reused
= 0;
282 for (unsigned i
= 0; i
< kDirtyTids
; i
++)
283 dst
->dirty_tids_
[i
] = kInvalidTid
;
286 // Checks whether the current threads has already acquired src.
287 bool ThreadClock::IsAlreadyAcquired(const SyncClock
*src
) const {
288 if (src
->elem(tid_
).reused
!= reused_
)
290 for (unsigned i
= 0; i
< kDirtyTids
; i
++) {
291 unsigned tid
= src
->dirty_tids_
[i
];
292 if (tid
!= kInvalidTid
) {
293 if (clk_
[tid
].epoch
< src
->elem(tid
).epoch
)
300 void SyncClock::Resize(ClockCache
*c
, uptr nclk
) {
301 CPP_STAT_INC(StatClockReleaseResize
);
302 if (RoundUpTo(nclk
, ClockBlock::kClockCount
) <=
303 RoundUpTo(size_
, ClockBlock::kClockCount
)) {
304 // Growing within the same block.
305 // Memory is already allocated, just increase the size.
309 if (nclk
<= ClockBlock::kClockCount
) {
310 // Grow from 0 to one-level table.
313 CHECK_EQ(tab_idx_
, 0);
315 tab_idx_
= ctx
->clock_alloc
.Alloc(c
);
316 tab_
= ctx
->clock_alloc
.Map(tab_idx_
);
317 internal_memset(tab_
, 0, sizeof(*tab_
));
320 // Growing two-level table.
322 // Allocate first level table.
323 tab_idx_
= ctx
->clock_alloc
.Alloc(c
);
324 tab_
= ctx
->clock_alloc
.Map(tab_idx_
);
325 internal_memset(tab_
, 0, sizeof(*tab_
));
326 } else if (size_
<= ClockBlock::kClockCount
) {
327 // Transform one-level table to two-level table.
329 tab_idx_
= ctx
->clock_alloc
.Alloc(c
);
330 tab_
= ctx
->clock_alloc
.Map(tab_idx_
);
331 internal_memset(tab_
, 0, sizeof(*tab_
));
332 tab_
->table
[0] = old
;
334 // At this point we have first level table allocated.
335 // Add second level tables as necessary.
336 for (uptr i
= RoundUpTo(size_
, ClockBlock::kClockCount
);
337 i
< nclk
; i
+= ClockBlock::kClockCount
) {
338 u32 idx
= ctx
->clock_alloc
.Alloc(c
);
339 ClockBlock
*cb
= ctx
->clock_alloc
.Map(idx
);
340 internal_memset(cb
, 0, sizeof(*cb
));
341 CHECK_EQ(tab_
->table
[i
/ClockBlock::kClockCount
], 0);
342 tab_
->table
[i
/ClockBlock::kClockCount
] = idx
;
347 // Sets a single element in the vector clock.
348 // This function is called only from weird places like AcquireGlobal.
349 void ThreadClock::set(unsigned tid
, u64 v
) {
350 DCHECK_LT(tid
, kMaxTid
);
351 DCHECK_GE(v
, clk_
[tid
].epoch
);
355 last_acquire_
= clk_
[tid_
].epoch
;
358 void ThreadClock::DebugDump(int(*printf
)(const char *s
, ...)) {
360 for (uptr i
= 0; i
< nclk_
; i
++)
361 printf("%s%llu", i
== 0 ? "" : ",", clk_
[i
].epoch
);
362 printf("] reused=[");
363 for (uptr i
= 0; i
< nclk_
; i
++)
364 printf("%s%llu", i
== 0 ? "" : ",", clk_
[i
].reused
);
365 printf("] tid=%u/%u last_acq=%llu",
366 tid_
, reused_
, last_acquire_
);
369 SyncClock::SyncClock()
370 : release_store_tid_(kInvalidTid
)
371 , release_store_reused_()
375 for (uptr i
= 0; i
< kDirtyTids
; i
++)
376 dirty_tids_
[i
] = kInvalidTid
;
379 SyncClock::~SyncClock() {
380 // Reset must be called before dtor.
383 CHECK_EQ(tab_idx_
, 0);
386 void SyncClock::Reset(ClockCache
*c
) {
389 } else if (size_
<= ClockBlock::kClockCount
) {
391 ctx
->clock_alloc
.Free(c
, tab_idx_
);
394 for (uptr i
= 0; i
< size_
; i
+= ClockBlock::kClockCount
)
395 ctx
->clock_alloc
.Free(c
, tab_
->table
[i
/ ClockBlock::kClockCount
]);
396 ctx
->clock_alloc
.Free(c
, tab_idx_
);
401 release_store_tid_
= kInvalidTid
;
402 release_store_reused_
= 0;
403 for (uptr i
= 0; i
< kDirtyTids
; i
++)
404 dirty_tids_
[i
] = kInvalidTid
;
407 ClockElem
&SyncClock::elem(unsigned tid
) const {
408 DCHECK_LT(tid
, size_
);
409 if (size_
<= ClockBlock::kClockCount
)
410 return tab_
->clock
[tid
];
411 u32 idx
= tab_
->table
[tid
/ ClockBlock::kClockCount
];
412 ClockBlock
*cb
= ctx
->clock_alloc
.Map(idx
);
413 return cb
->clock
[tid
% ClockBlock::kClockCount
];
416 void SyncClock::DebugDump(int(*printf
)(const char *s
, ...)) {
418 for (uptr i
= 0; i
< size_
; i
++)
419 printf("%s%llu", i
== 0 ? "" : ",", elem(i
).epoch
);
420 printf("] reused=[");
421 for (uptr i
= 0; i
< size_
; i
++)
422 printf("%s%llu", i
== 0 ? "" : ",", elem(i
).reused
);
423 printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
424 release_store_tid_
, release_store_reused_
,
425 dirty_tids_
[0], dirty_tids_
[1]);
427 } // namespace __tsan