libsanitizer/tsan/tsan_clock.cc

   1 //===-- tsan_clock.cc -----------------------------------------------------===//
   2 //
   3 // This file is distributed under the University of Illinois Open Source
   4 // License. See LICENSE.TXT for details.
   5 //
   6 //===----------------------------------------------------------------------===//
   7 //
   8 // This file is a part of ThreadSanitizer (TSan), a race detector.
   9 //
  10 //===----------------------------------------------------------------------===//
  11 #include "tsan_clock.h"
  12 #include "tsan_rtl.h"
  13 #include "sanitizer_common/sanitizer_placement_new.h"
  14
  15 // SyncClock and ThreadClock implement vector clocks for sync variables
  16 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  17 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  18 // SyncClock contains growable vector clock for currently necessary number of
  19 // threads.
  20 // Together they implement very simple model of operations, namely:
  21 //
  22 //   void ThreadClock::acquire(const SyncClock *src) {
  23 //     for (int i = 0; i < kMaxThreads; i++)
  24 //       clock[i] = max(clock[i], src->clock[i]);
  25 //   }
  26 //
  27 //   void ThreadClock::release(SyncClock *dst) const {
  28 //     for (int i = 0; i < kMaxThreads; i++)
  29 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  30 //   }
  31 //
  32 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  33 //     for (int i = 0; i < kMaxThreads; i++)
  34 //       dst->clock[i] = clock[i];
  35 //   }
  36 //
  37 //   void ThreadClock::acq_rel(SyncClock *dst) {
  38 //     acquire(dst);
  39 //     release(dst);
  40 //   }
  41 //
  42 // Conformance to this model is extensively verified in tsan_clock_test.cc.
  43 // However, the implementation is significantly more complex. The complexity
  44 // allows to implement important classes of use cases in O(1) instead of O(N).
  45 //
  46 // The use cases are:
  47 // 1. Singleton/once atomic that has a single release-store operation followed
  48 //    by zillions of acquire-loads (the acquire-load is O(1)).
  49 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  50 // 3. Leaf mutex (unlock is O(1)).
  51 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  52 // 5. An atomic with a single writer (writes can be O(1)).
  53 // The implementation dynamically adopts to workload. So if an atomic is in
  54 // read-only phase, these reads will be O(1); if it later switches to read/write
  55 // phase, the implementation will correctly handle that by switching to O(N).
  56 //
  57 // Thread-safety note: all const operations on SyncClock's are conducted under
  58 // a shared lock; all non-const operations on SyncClock's are conducted under
  59 // an exclusive lock; ThreadClock's are private to respective threads and so
  60 // do not need any protection.
  61 //
  62 // Description of ThreadClock state:
  63 // clk_ - fixed size vector clock.
  64 // nclk_ - effective size of the vector clock (the rest is zeros).
  65 // tid_ - index of the thread associated with he clock ("current thread").
  66 // last_acquire_ - current thread time when it acquired something from
  67 //   other threads.
  68 //
  69 // Description of SyncClock state:
  70 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  71 //   the remaining bits hold "acquired" flag (the actual value is thread's
  72 //   reused counter);
  73 //   if acquried == thr->reused_, then the respective thread has already
  74 //   acquired this clock (except possibly dirty_tids_).
  75 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
  76 //   need to acquire regardless of "acquired" flag value;
  77 // release_store_tid_ - denotes that the clock state is a result of
  78 //   release-store operation by the thread with release_store_tid_ index.
  79 // release_store_reused_ - reuse count of release_store_tid_.
  80
  81 // We don't have ThreadState in these methods, so this is an ugly hack that
  82 // works only in C++.
  83 #ifndef TSAN_GO
  84 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  85 #else
  86 # define CPP_STAT_INC(typ) (void)0
  87 #endif
  88
  89 namespace __tsan {
  90
  91 const unsigned kInvalidTid = (unsigned)-1;
  92
  93 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
  94     : tid_(tid)
  95     , reused_(reused + 1) {  // 0 has special meaning
  96   CHECK_LT(tid, kMaxTidInClock);
  97   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
  98   nclk_ = tid_ + 1;
  99   last_acquire_ = 0;
 100   internal_memset(clk_, 0, sizeof(clk_));
 101   clk_[tid_].reused = reused_;
 102 }
 103
 104 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
 105   DCHECK(nclk_ <= kMaxTid);
 106   DCHECK(src->size_ <= kMaxTid);
 107   CPP_STAT_INC(StatClockAcquire);
 108
 109   // Check if it's empty -> no need to do anything.
 110   const uptr nclk = src->size_;
 111   if (nclk == 0) {
 112     CPP_STAT_INC(StatClockAcquireEmpty);
 113     return;
 114   }
 115
 116   // Check if we've already acquired src after the last release operation on src
 117   bool acquired = false;
 118   if (nclk > tid_) {
 119     CPP_STAT_INC(StatClockAcquireLarge);
 120     if (src->elem(tid_).reused == reused_) {
 121       CPP_STAT_INC(StatClockAcquireRepeat);
 122       for (unsigned i = 0; i < kDirtyTids; i++) {
 123         unsigned tid = src->dirty_tids_[i];
 124         if (tid != kInvalidTid) {
 125           u64 epoch = src->elem(tid).epoch;
 126           if (clk_[tid].epoch < epoch) {
 127             clk_[tid].epoch = epoch;
 128             acquired = true;
 129           }
 130         }
 131       }
 132       if (acquired) {
 133         CPP_STAT_INC(StatClockAcquiredSomething);
 134         last_acquire_ = clk_[tid_].epoch;
 135       }
 136       return;
 137     }
 138   }
 139
 140   // O(N) acquire.
 141   CPP_STAT_INC(StatClockAcquireFull);
 142   nclk_ = max(nclk_, nclk);
 143   for (uptr i = 0; i < nclk; i++) {
 144     u64 epoch = src->elem(i).epoch;
 145     if (clk_[i].epoch < epoch) {
 146       clk_[i].epoch = epoch;
 147       acquired = true;
 148     }
 149   }
 150
 151   // Remember that this thread has acquired this clock.
 152   if (nclk > tid_)
 153     src->elem(tid_).reused = reused_;
 154
 155   if (acquired) {
 156     CPP_STAT_INC(StatClockAcquiredSomething);
 157     last_acquire_ = clk_[tid_].epoch;
 158   }
 159 }
 160
 161 void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
 162   DCHECK_LE(nclk_, kMaxTid);
 163   DCHECK_LE(dst->size_, kMaxTid);
 164
 165   if (dst->size_ == 0) {
 166     // ReleaseStore will correctly set release_store_tid_,
 167     // which can be important for future operations.
 168     ReleaseStore(c, dst);
 169     return;
 170   }
 171
 172   CPP_STAT_INC(StatClockRelease);
 173   // Check if we need to resize dst.
 174   if (dst->size_ < nclk_)
 175     dst->Resize(c, nclk_);
 176
 177   // Check if we had not acquired anything from other threads
 178   // since the last release on dst. If so, we need to update
 179   // only dst->elem(tid_).
 180   if (dst->elem(tid_).epoch > last_acquire_) {
 181     UpdateCurrentThread(dst);
 182     if (dst->release_store_tid_ != tid_ ||
 183         dst->release_store_reused_ != reused_)
 184       dst->release_store_tid_ = kInvalidTid;
 185     return;
 186   }
 187
 188   // O(N) release.
 189   CPP_STAT_INC(StatClockReleaseFull);
 190   // First, remember whether we've acquired dst.
 191   bool acquired = IsAlreadyAcquired(dst);
 192   if (acquired)
 193     CPP_STAT_INC(StatClockReleaseAcquired);
 194   // Update dst->clk_.
 195   for (uptr i = 0; i < nclk_; i++) {
 196     ClockElem &ce = dst->elem(i);
 197     ce.epoch = max(ce.epoch, clk_[i].epoch);
 198     ce.reused = 0;
 199   }
 200   // Clear 'acquired' flag in the remaining elements.
 201   if (nclk_ < dst->size_)
 202     CPP_STAT_INC(StatClockReleaseClearTail);
 203   for (uptr i = nclk_; i < dst->size_; i++)
 204     dst->elem(i).reused = 0;
 205   for (unsigned i = 0; i < kDirtyTids; i++)
 206     dst->dirty_tids_[i] = kInvalidTid;
 207   dst->release_store_tid_ = kInvalidTid;
 208   dst->release_store_reused_ = 0;
 209   // If we've acquired dst, remember this fact,
 210   // so that we don't need to acquire it on next acquire.
 211   if (acquired)
 212     dst->elem(tid_).reused = reused_;
 213 }
 214
 215 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
 216   DCHECK(nclk_ <= kMaxTid);
 217   DCHECK(dst->size_ <= kMaxTid);
 218   CPP_STAT_INC(StatClockStore);
 219
 220   // Check if we need to resize dst.
 221   if (dst->size_ < nclk_)
 222     dst->Resize(c, nclk_);
 223
 224   if (dst->release_store_tid_ == tid_ &&
 225       dst->release_store_reused_ == reused_ &&
 226       dst->elem(tid_).epoch > last_acquire_) {
 227     CPP_STAT_INC(StatClockStoreFast);
 228     UpdateCurrentThread(dst);
 229     return;
 230   }
 231
 232   // O(N) release-store.
 233   CPP_STAT_INC(StatClockStoreFull);
 234   for (uptr i = 0; i < nclk_; i++) {
 235     ClockElem &ce = dst->elem(i);
 236     ce.epoch = clk_[i].epoch;
 237     ce.reused = 0;
 238   }
 239   // Clear the tail of dst->clk_.
 240   if (nclk_ < dst->size_) {
 241     for (uptr i = nclk_; i < dst->size_; i++) {
 242       ClockElem &ce = dst->elem(i);
 243       ce.epoch = 0;
 244       ce.reused = 0;
 245     }
 246     CPP_STAT_INC(StatClockStoreTail);
 247   }
 248   for (unsigned i = 0; i < kDirtyTids; i++)
 249     dst->dirty_tids_[i] = kInvalidTid;
 250   dst->release_store_tid_ = tid_;
 251   dst->release_store_reused_ = reused_;
 252   // Rememeber that we don't need to acquire it in future.
 253   dst->elem(tid_).reused = reused_;
 254 }
 255
 256 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 257   CPP_STAT_INC(StatClockAcquireRelease);
 258   acquire(c, dst);
 259   ReleaseStore(c, dst);
 260 }
 261
 262 // Updates only single element related to the current thread in dst->clk_.
 263 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
 264   // Update the threads time, but preserve 'acquired' flag.
 265   dst->elem(tid_).epoch = clk_[tid_].epoch;
 266
 267   for (unsigned i = 0; i < kDirtyTids; i++) {
 268     if (dst->dirty_tids_[i] == tid_) {
 269       CPP_STAT_INC(StatClockReleaseFast1);
 270       return;
 271     }
 272     if (dst->dirty_tids_[i] == kInvalidTid) {
 273       CPP_STAT_INC(StatClockReleaseFast2);
 274       dst->dirty_tids_[i] = tid_;
 275       return;
 276     }
 277   }
 278   // Reset all 'acquired' flags, O(N).
 279   CPP_STAT_INC(StatClockReleaseSlow);
 280   for (uptr i = 0; i < dst->size_; i++)
 281     dst->elem(i).reused = 0;
 282   for (unsigned i = 0; i < kDirtyTids; i++)
 283     dst->dirty_tids_[i] = kInvalidTid;
 284 }
 285
 286 // Checks whether the current threads has already acquired src.
 287 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 288   if (src->elem(tid_).reused != reused_)
 289     return false;
 290   for (unsigned i = 0; i < kDirtyTids; i++) {
 291     unsigned tid = src->dirty_tids_[i];
 292     if (tid != kInvalidTid) {
 293       if (clk_[tid].epoch < src->elem(tid).epoch)
 294         return false;
 295     }
 296   }
 297   return true;
 298 }
 299
 300 void SyncClock::Resize(ClockCache *c, uptr nclk) {
 301   CPP_STAT_INC(StatClockReleaseResize);
 302   if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
 303       RoundUpTo(size_, ClockBlock::kClockCount)) {
 304     // Growing within the same block.
 305     // Memory is already allocated, just increase the size.
 306     size_ = nclk;
 307     return;
 308   }
 309   if (nclk <= ClockBlock::kClockCount) {
 310     // Grow from 0 to one-level table.
 311     CHECK_EQ(size_, 0);
 312     CHECK_EQ(tab_, 0);
 313     CHECK_EQ(tab_idx_, 0);
 314     size_ = nclk;
 315     tab_idx_ = ctx->clock_alloc.Alloc(c);
 316     tab_ = ctx->clock_alloc.Map(tab_idx_);
 317     internal_memset(tab_, 0, sizeof(*tab_));
 318     return;
 319   }
 320   // Growing two-level table.
 321   if (size_ == 0) {
 322     // Allocate first level table.
 323     tab_idx_ = ctx->clock_alloc.Alloc(c);
 324     tab_ = ctx->clock_alloc.Map(tab_idx_);
 325     internal_memset(tab_, 0, sizeof(*tab_));
 326   } else if (size_ <= ClockBlock::kClockCount) {
 327     // Transform one-level table to two-level table.
 328     u32 old = tab_idx_;
 329     tab_idx_ = ctx->clock_alloc.Alloc(c);
 330     tab_ = ctx->clock_alloc.Map(tab_idx_);
 331     internal_memset(tab_, 0, sizeof(*tab_));
 332     tab_->table[0] = old;
 333   }
 334   // At this point we have first level table allocated.
 335   // Add second level tables as necessary.
 336   for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
 337       i < nclk; i += ClockBlock::kClockCount) {
 338     u32 idx = ctx->clock_alloc.Alloc(c);
 339     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 340     internal_memset(cb, 0, sizeof(*cb));
 341     CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
 342     tab_->table[i/ClockBlock::kClockCount] = idx;
 343   }
 344   size_ = nclk;
 345 }
 346
 347 // Sets a single element in the vector clock.
 348 // This function is called only from weird places like AcquireGlobal.
 349 void ThreadClock::set(unsigned tid, u64 v) {
 350   DCHECK_LT(tid, kMaxTid);
 351   DCHECK_GE(v, clk_[tid].epoch);
 352   clk_[tid].epoch = v;
 353   if (nclk_ <= tid)
 354     nclk_ = tid + 1;
 355   last_acquire_ = clk_[tid_].epoch;
 356 }
 357
 358 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 359   printf("clock=[");
 360   for (uptr i = 0; i < nclk_; i++)
 361     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
 362   printf("] reused=[");
 363   for (uptr i = 0; i < nclk_; i++)
 364     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
 365   printf("] tid=%u/%u last_acq=%llu",
 366       tid_, reused_, last_acquire_);
 367 }
 368
 369 SyncClock::SyncClock()
 370     : release_store_tid_(kInvalidTid)
 371     , release_store_reused_()
 372     , tab_()
 373     , tab_idx_()
 374     , size_() {
 375   for (uptr i = 0; i < kDirtyTids; i++)
 376     dirty_tids_[i] = kInvalidTid;
 377 }
 378
 379 SyncClock::~SyncClock() {
 380   // Reset must be called before dtor.
 381   CHECK_EQ(size_, 0);
 382   CHECK_EQ(tab_, 0);
 383   CHECK_EQ(tab_idx_, 0);
 384 }
 385
 386 void SyncClock::Reset(ClockCache *c) {
 387   if (size_ == 0) {
 388     // nothing
 389   } else if (size_ <= ClockBlock::kClockCount) {
 390     // One-level table.
 391     ctx->clock_alloc.Free(c, tab_idx_);
 392   } else {
 393     // Two-level table.
 394     for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
 395       ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
 396     ctx->clock_alloc.Free(c, tab_idx_);
 397   }
 398   tab_ = 0;
 399   tab_idx_ = 0;
 400   size_ = 0;
 401   release_store_tid_ = kInvalidTid;
 402   release_store_reused_ = 0;
 403   for (uptr i = 0; i < kDirtyTids; i++)
 404     dirty_tids_[i] = kInvalidTid;
 405 }
 406
 407 ClockElem &SyncClock::elem(unsigned tid) const {
 408   DCHECK_LT(tid, size_);
 409   if (size_ <= ClockBlock::kClockCount)
 410     return tab_->clock[tid];
 411   u32 idx = tab_->table[tid / ClockBlock::kClockCount];
 412   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 413   return cb->clock[tid % ClockBlock::kClockCount];
 414 }
 415
 416 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 417   printf("clock=[");
 418   for (uptr i = 0; i < size_; i++)
 419     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
 420   printf("] reused=[");
 421   for (uptr i = 0; i < size_; i++)
 422     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
 423   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
 424       release_store_tid_, release_store_reused_,
 425       dirty_tids_[0], dirty_tids_[1]);
 426 }
 427 }  // namespace __tsan