libsanitizer/tsan/tsan_clock.cc

   1 //===-- tsan_clock.cc -----------------------------------------------------===//
   2 //
   3 // This file is distributed under the University of Illinois Open Source
   4 // License. See LICENSE.TXT for details.
   5 //
   6 //===----------------------------------------------------------------------===//
   7 //
   8 // This file is a part of ThreadSanitizer (TSan), a race detector.
   9 //
  10 //===----------------------------------------------------------------------===//
  11 #include "tsan_clock.h"
  12 #include "tsan_rtl.h"
  13
  14 // SyncClock and ThreadClock implement vector clocks for sync variables
  15 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  16 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  17 // SyncClock contains growable vector clock for currently necessary number of
  18 // threads.
  19 // Together they implement very simple model of operations, namely:
  20 //
  21 //   void ThreadClock::acquire(const SyncClock *src) {
  22 //     for (int i = 0; i < kMaxThreads; i++)
  23 //       clock[i] = max(clock[i], src->clock[i]);
  24 //   }
  25 //
  26 //   void ThreadClock::release(SyncClock *dst) const {
  27 //     for (int i = 0; i < kMaxThreads; i++)
  28 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  29 //   }
  30 //
  31 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  32 //     for (int i = 0; i < kMaxThreads; i++)
  33 //       dst->clock[i] = clock[i];
  34 //   }
  35 //
  36 //   void ThreadClock::acq_rel(SyncClock *dst) {
  37 //     acquire(dst);
  38 //     release(dst);
  39 //   }
  40 //
  41 // Conformance to this model is extensively verified in tsan_clock_test.cc.
  42 // However, the implementation is significantly more complex. The complexity
  43 // allows to implement important classes of use cases in O(1) instead of O(N).
  44 //
  45 // The use cases are:
  46 // 1. Singleton/once atomic that has a single release-store operation followed
  47 //    by zillions of acquire-loads (the acquire-load is O(1)).
  48 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  49 // 3. Leaf mutex (unlock is O(1)).
  50 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  51 // 5. An atomic with a single writer (writes can be O(1)).
  52 // The implementation dynamically adopts to workload. So if an atomic is in
  53 // read-only phase, these reads will be O(1); if it later switches to read/write
  54 // phase, the implementation will correctly handle that by switching to O(N).
  55 //
  56 // Thread-safety note: all const operations on SyncClock's are conducted under
  57 // a shared lock; all non-const operations on SyncClock's are conducted under
  58 // an exclusive lock; ThreadClock's are private to respective threads and so
  59 // do not need any protection.
  60 //
  61 // Description of ThreadClock state:
  62 // clk_ - fixed size vector clock.
  63 // nclk_ - effective size of the vector clock (the rest is zeros).
  64 // tid_ - index of the thread associated with he clock ("current thread").
  65 // last_acquire_ - current thread time when it acquired something from
  66 //   other threads.
  67 //
  68 // Description of SyncClock state:
  69 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  70 //   the remaining bits hold "acquired" flag (the actual value is thread's
  71 //   reused counter);
  72 //   if acquried == thr->reused_, then the respective thread has already
  73 //   acquired this clock (except possibly dirty_tids_).
  74 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
  75 //   need to acquire regardless of "acquired" flag value;
  76 // release_store_tid_ - denotes that the clock state is a result of
  77 //   release-store operation by the thread with release_store_tid_ index.
  78 // release_store_reused_ - reuse count of release_store_tid_.
  79
  80 // We don't have ThreadState in these methods, so this is an ugly hack that
  81 // works only in C++.
  82 #ifndef TSAN_GO
  83 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  84 #else
  85 # define CPP_STAT_INC(typ) (void)0
  86 #endif
  87
  88 namespace __tsan {
  89
  90 const unsigned kInvalidTid = (unsigned)-1;
  91
  92 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
  93     : tid_(tid)
  94     , reused_(reused + 1) {  // 0 has special meaning
  95   CHECK_LT(tid, kMaxTidInClock);
  96   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
  97   nclk_ = tid_ + 1;
  98   last_acquire_ = 0;
  99   internal_memset(clk_, 0, sizeof(clk_));
 100   clk_[tid_].reused = reused_;
 101 }
 102
 103 void ThreadClock::acquire(const SyncClock *src) {
 104   DCHECK(nclk_ <= kMaxTid);
 105   DCHECK(src->clk_.Size() <= kMaxTid);
 106   CPP_STAT_INC(StatClockAcquire);
 107
 108   // Check if it's empty -> no need to do anything.
 109   const uptr nclk = src->clk_.Size();
 110   if (nclk == 0) {
 111     CPP_STAT_INC(StatClockAcquireEmpty);
 112     return;
 113   }
 114
 115   // Check if we've already acquired src after the last release operation on src
 116   bool acquired = false;
 117   if (nclk > tid_) {
 118     CPP_STAT_INC(StatClockAcquireLarge);
 119     if (src->clk_[tid_].reused == reused_) {
 120       CPP_STAT_INC(StatClockAcquireRepeat);
 121       for (unsigned i = 0; i < kDirtyTids; i++) {
 122         unsigned tid = src->dirty_tids_[i];
 123         if (tid != kInvalidTid) {
 124           u64 epoch = src->clk_[tid].epoch;
 125           if (clk_[tid].epoch < epoch) {
 126             clk_[tid].epoch = epoch;
 127             acquired = true;
 128           }
 129         }
 130       }
 131       if (acquired) {
 132         CPP_STAT_INC(StatClockAcquiredSomething);
 133         last_acquire_ = clk_[tid_].epoch;
 134       }
 135       return;
 136     }
 137   }
 138
 139   // O(N) acquire.
 140   CPP_STAT_INC(StatClockAcquireFull);
 141   nclk_ = max(nclk_, nclk);
 142   for (uptr i = 0; i < nclk; i++) {
 143     u64 epoch = src->clk_[i].epoch;
 144     if (clk_[i].epoch < epoch) {
 145       clk_[i].epoch = epoch;
 146       acquired = true;
 147     }
 148   }
 149
 150   // Remember that this thread has acquired this clock.
 151   if (nclk > tid_)
 152     src->clk_[tid_].reused = reused_;
 153
 154   if (acquired) {
 155     CPP_STAT_INC(StatClockAcquiredSomething);
 156     last_acquire_ = clk_[tid_].epoch;
 157   }
 158 }
 159
 160 void ThreadClock::release(SyncClock *dst) const {
 161   DCHECK_LE(nclk_, kMaxTid);
 162   DCHECK_LE(dst->clk_.Size(), kMaxTid);
 163
 164   if (dst->clk_.Size() == 0) {
 165     // ReleaseStore will correctly set release_store_tid_,
 166     // which can be important for future operations.
 167     ReleaseStore(dst);
 168     return;
 169   }
 170
 171   CPP_STAT_INC(StatClockRelease);
 172   // Check if we need to resize dst.
 173   if (dst->clk_.Size() < nclk_) {
 174     CPP_STAT_INC(StatClockReleaseResize);
 175     dst->clk_.Resize(nclk_);
 176   }
 177
 178   // Check if we had not acquired anything from other threads
 179   // since the last release on dst. If so, we need to update
 180   // only dst->clk_[tid_].
 181   if (dst->clk_[tid_].epoch > last_acquire_) {
 182     UpdateCurrentThread(dst);
 183     if (dst->release_store_tid_ != tid_ ||
 184         dst->release_store_reused_ != reused_)
 185       dst->release_store_tid_ = kInvalidTid;
 186     return;
 187   }
 188
 189   // O(N) release.
 190   CPP_STAT_INC(StatClockReleaseFull);
 191   // First, remember whether we've acquired dst.
 192   bool acquired = IsAlreadyAcquired(dst);
 193   if (acquired)
 194     CPP_STAT_INC(StatClockReleaseAcquired);
 195   // Update dst->clk_.
 196   for (uptr i = 0; i < nclk_; i++) {
 197     dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
 198     dst->clk_[i].reused = 0;
 199   }
 200   // Clear 'acquired' flag in the remaining elements.
 201   if (nclk_ < dst->clk_.Size())
 202     CPP_STAT_INC(StatClockReleaseClearTail);
 203   for (uptr i = nclk_; i < dst->clk_.Size(); i++)
 204     dst->clk_[i].reused = 0;
 205   for (unsigned i = 0; i < kDirtyTids; i++)
 206     dst->dirty_tids_[i] = kInvalidTid;
 207   dst->release_store_tid_ = kInvalidTid;
 208   dst->release_store_reused_ = 0;
 209   // If we've acquired dst, remember this fact,
 210   // so that we don't need to acquire it on next acquire.
 211   if (acquired)
 212     dst->clk_[tid_].reused = reused_;
 213 }
 214
 215 void ThreadClock::ReleaseStore(SyncClock *dst) const {
 216   DCHECK(nclk_ <= kMaxTid);
 217   DCHECK(dst->clk_.Size() <= kMaxTid);
 218   CPP_STAT_INC(StatClockStore);
 219
 220   // Check if we need to resize dst.
 221   if (dst->clk_.Size() < nclk_) {
 222     CPP_STAT_INC(StatClockStoreResize);
 223     dst->clk_.Resize(nclk_);
 224   }
 225
 226   if (dst->release_store_tid_ == tid_ &&
 227       dst->release_store_reused_ == reused_ &&
 228       dst->clk_[tid_].epoch > last_acquire_) {
 229     CPP_STAT_INC(StatClockStoreFast);
 230     UpdateCurrentThread(dst);
 231     return;
 232   }
 233
 234   // O(N) release-store.
 235   CPP_STAT_INC(StatClockStoreFull);
 236   for (uptr i = 0; i < nclk_; i++) {
 237     dst->clk_[i].epoch = clk_[i].epoch;
 238     dst->clk_[i].reused = 0;
 239   }
 240   // Clear the tail of dst->clk_.
 241   if (nclk_ < dst->clk_.Size()) {
 242     internal_memset(&dst->clk_[nclk_], 0,
 243         (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
 244     CPP_STAT_INC(StatClockStoreTail);
 245   }
 246   for (unsigned i = 0; i < kDirtyTids; i++)
 247     dst->dirty_tids_[i] = kInvalidTid;
 248   dst->release_store_tid_ = tid_;
 249   dst->release_store_reused_ = reused_;
 250   // Rememeber that we don't need to acquire it in future.
 251   dst->clk_[tid_].reused = reused_;
 252 }
 253
 254 void ThreadClock::acq_rel(SyncClock *dst) {
 255   CPP_STAT_INC(StatClockAcquireRelease);
 256   acquire(dst);
 257   ReleaseStore(dst);
 258 }
 259
 260 // Updates only single element related to the current thread in dst->clk_.
 261 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
 262   // Update the threads time, but preserve 'acquired' flag.
 263   dst->clk_[tid_].epoch = clk_[tid_].epoch;
 264
 265   for (unsigned i = 0; i < kDirtyTids; i++) {
 266     if (dst->dirty_tids_[i] == tid_) {
 267       CPP_STAT_INC(StatClockReleaseFast1);
 268       return;
 269     }
 270     if (dst->dirty_tids_[i] == kInvalidTid) {
 271       CPP_STAT_INC(StatClockReleaseFast2);
 272       dst->dirty_tids_[i] = tid_;
 273       return;
 274     }
 275   }
 276   // Reset all 'acquired' flags, O(N).
 277   CPP_STAT_INC(StatClockReleaseSlow);
 278   for (uptr i = 0; i < dst->clk_.Size(); i++) {
 279     dst->clk_[i].reused = 0;
 280   }
 281   for (unsigned i = 0; i < kDirtyTids; i++)
 282     dst->dirty_tids_[i] = kInvalidTid;
 283 }
 284
 285 // Checks whether the current threads has already acquired src.
 286 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 287   if (src->clk_[tid_].reused != reused_)
 288     return false;
 289   for (unsigned i = 0; i < kDirtyTids; i++) {
 290     unsigned tid = src->dirty_tids_[i];
 291     if (tid != kInvalidTid) {
 292       if (clk_[tid].epoch < src->clk_[tid].epoch)
 293         return false;
 294     }
 295   }
 296   return true;
 297 }
 298
 299 // Sets a single element in the vector clock.
 300 // This function is called only from weird places like AcquireGlobal.
 301 void ThreadClock::set(unsigned tid, u64 v) {
 302   DCHECK_LT(tid, kMaxTid);
 303   DCHECK_GE(v, clk_[tid].epoch);
 304   clk_[tid].epoch = v;
 305   if (nclk_ <= tid)
 306     nclk_ = tid + 1;
 307   last_acquire_ = clk_[tid_].epoch;
 308 }
 309
 310 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 311   printf("clock=[");
 312   for (uptr i = 0; i < nclk_; i++)
 313     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
 314   printf("] reused=[");
 315   for (uptr i = 0; i < nclk_; i++)
 316     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
 317   printf("] tid=%u/%u last_acq=%llu",
 318       tid_, reused_, last_acquire_);
 319 }
 320
 321 SyncClock::SyncClock()
 322     : clk_(MBlockClock) {
 323   release_store_tid_ = kInvalidTid;
 324   release_store_reused_ = 0;
 325   for (uptr i = 0; i < kDirtyTids; i++)
 326     dirty_tids_[i] = kInvalidTid;
 327 }
 328
 329 void SyncClock::Reset() {
 330   clk_.Reset();
 331   release_store_tid_ = kInvalidTid;
 332   release_store_reused_ = 0;
 333   for (uptr i = 0; i < kDirtyTids; i++)
 334     dirty_tids_[i] = kInvalidTid;
 335 }
 336
 337 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 338   printf("clock=[");
 339   for (uptr i = 0; i < clk_.Size(); i++)
 340     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
 341   printf("] reused=[");
 342   for (uptr i = 0; i < clk_.Size(); i++)
 343     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
 344   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
 345       release_store_tid_, release_store_reused_,
 346       dirty_tids_[0], dirty_tids_[1]);
 347 }
 348 }  // namespace __tsan