libsanitizer/tsan/tsan_clock.cc

   1 //===-- tsan_clock.cc -----------------------------------------------------===//
   2 //
   3 // This file is distributed under the University of Illinois Open Source
   4 // License. See LICENSE.TXT for details.
   5 //
   6 //===----------------------------------------------------------------------===//
   7 //
   8 // This file is a part of ThreadSanitizer (TSan), a race detector.
   9 //
  10 //===----------------------------------------------------------------------===//
  11 #include "tsan_clock.h"
  12 #include "tsan_rtl.h"
  13 #include "sanitizer_common/sanitizer_placement_new.h"
  14
  15 // SyncClock and ThreadClock implement vector clocks for sync variables
  16 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  17 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  18 // SyncClock contains growable vector clock for currently necessary number of
  19 // threads.
  20 // Together they implement very simple model of operations, namely:
  21 //
  22 //   void ThreadClock::acquire(const SyncClock *src) {
  23 //     for (int i = 0; i < kMaxThreads; i++)
  24 //       clock[i] = max(clock[i], src->clock[i]);
  25 //   }
  26 //
  27 //   void ThreadClock::release(SyncClock *dst) const {
  28 //     for (int i = 0; i < kMaxThreads; i++)
  29 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  30 //   }
  31 //
  32 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  33 //     for (int i = 0; i < kMaxThreads; i++)
  34 //       dst->clock[i] = clock[i];
  35 //   }
  36 //
  37 //   void ThreadClock::acq_rel(SyncClock *dst) {
  38 //     acquire(dst);
  39 //     release(dst);
  40 //   }
  41 //
  42 // Conformance to this model is extensively verified in tsan_clock_test.cc.
  43 // However, the implementation is significantly more complex. The complexity
  44 // allows to implement important classes of use cases in O(1) instead of O(N).
  45 //
  46 // The use cases are:
  47 // 1. Singleton/once atomic that has a single release-store operation followed
  48 //    by zillions of acquire-loads (the acquire-load is O(1)).
  49 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  50 // 3. Leaf mutex (unlock is O(1)).
  51 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  52 // 5. An atomic with a single writer (writes can be O(1)).
  53 // The implementation dynamically adopts to workload. So if an atomic is in
  54 // read-only phase, these reads will be O(1); if it later switches to read/write
  55 // phase, the implementation will correctly handle that by switching to O(N).
  56 //
  57 // Thread-safety note: all const operations on SyncClock's are conducted under
  58 // a shared lock; all non-const operations on SyncClock's are conducted under
  59 // an exclusive lock; ThreadClock's are private to respective threads and so
  60 // do not need any protection.
  61 //
  62 // Description of ThreadClock state:
  63 // clk_ - fixed size vector clock.
  64 // nclk_ - effective size of the vector clock (the rest is zeros).
  65 // tid_ - index of the thread associated with he clock ("current thread").
  66 // last_acquire_ - current thread time when it acquired something from
  67 //   other threads.
  68 //
  69 // Description of SyncClock state:
  70 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  71 //   the remaining bits hold "acquired" flag (the actual value is thread's
  72 //   reused counter);
  73 //   if acquried == thr->reused_, then the respective thread has already
  74 //   acquired this clock (except possibly dirty_tids_).
  75 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
  76 //   need to acquire regardless of "acquired" flag value;
  77 // release_store_tid_ - denotes that the clock state is a result of
  78 //   release-store operation by the thread with release_store_tid_ index.
  79 // release_store_reused_ - reuse count of release_store_tid_.
  80
  81 // We don't have ThreadState in these methods, so this is an ugly hack that
  82 // works only in C++.
  83 #if !SANITIZER_GO
  84 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  85 #else
  86 # define CPP_STAT_INC(typ) (void)0
  87 #endif
  88
  89 namespace __tsan {
  90
  91 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
  92     : tid_(tid)
  93     , reused_(reused + 1) {  // 0 has special meaning
  94   CHECK_LT(tid, kMaxTidInClock);
  95   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
  96   nclk_ = tid_ + 1;
  97   last_acquire_ = 0;
  98   internal_memset(clk_, 0, sizeof(clk_));
  99   clk_[tid_].reused = reused_;
 100 }
 101
 102 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
 103   DCHECK_LE(nclk_, kMaxTid);
 104   DCHECK_LE(src->size_, kMaxTid);
 105   CPP_STAT_INC(StatClockAcquire);
 106
 107   // Check if it's empty -> no need to do anything.
 108   const uptr nclk = src->size_;
 109   if (nclk == 0) {
 110     CPP_STAT_INC(StatClockAcquireEmpty);
 111     return;
 112   }
 113
 114   // Check if we've already acquired src after the last release operation on src
 115   bool acquired = false;
 116   if (nclk > tid_) {
 117     CPP_STAT_INC(StatClockAcquireLarge);
 118     if (src->elem(tid_).reused == reused_) {
 119       CPP_STAT_INC(StatClockAcquireRepeat);
 120       for (unsigned i = 0; i < kDirtyTids; i++) {
 121         unsigned tid = src->dirty_tids_[i];
 122         if (tid != kInvalidTid) {
 123           u64 epoch = src->elem(tid).epoch;
 124           if (clk_[tid].epoch < epoch) {
 125             clk_[tid].epoch = epoch;
 126             acquired = true;
 127           }
 128         }
 129       }
 130       if (acquired) {
 131         CPP_STAT_INC(StatClockAcquiredSomething);
 132         last_acquire_ = clk_[tid_].epoch;
 133       }
 134       return;
 135     }
 136   }
 137
 138   // O(N) acquire.
 139   CPP_STAT_INC(StatClockAcquireFull);
 140   nclk_ = max(nclk_, nclk);
 141   for (uptr i = 0; i < nclk; i++) {
 142     u64 epoch = src->elem(i).epoch;
 143     if (clk_[i].epoch < epoch) {
 144       clk_[i].epoch = epoch;
 145       acquired = true;
 146     }
 147   }
 148
 149   // Remember that this thread has acquired this clock.
 150   if (nclk > tid_)
 151     src->elem(tid_).reused = reused_;
 152
 153   if (acquired) {
 154     CPP_STAT_INC(StatClockAcquiredSomething);
 155     last_acquire_ = clk_[tid_].epoch;
 156   }
 157 }
 158
 159 void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
 160   DCHECK_LE(nclk_, kMaxTid);
 161   DCHECK_LE(dst->size_, kMaxTid);
 162
 163   if (dst->size_ == 0) {
 164     // ReleaseStore will correctly set release_store_tid_,
 165     // which can be important for future operations.
 166     ReleaseStore(c, dst);
 167     return;
 168   }
 169
 170   CPP_STAT_INC(StatClockRelease);
 171   // Check if we need to resize dst.
 172   if (dst->size_ < nclk_)
 173     dst->Resize(c, nclk_);
 174
 175   // Check if we had not acquired anything from other threads
 176   // since the last release on dst. If so, we need to update
 177   // only dst->elem(tid_).
 178   if (dst->elem(tid_).epoch > last_acquire_) {
 179     UpdateCurrentThread(dst);
 180     if (dst->release_store_tid_ != tid_ ||
 181         dst->release_store_reused_ != reused_)
 182       dst->release_store_tid_ = kInvalidTid;
 183     return;
 184   }
 185
 186   // O(N) release.
 187   CPP_STAT_INC(StatClockReleaseFull);
 188   // First, remember whether we've acquired dst.
 189   bool acquired = IsAlreadyAcquired(dst);
 190   if (acquired)
 191     CPP_STAT_INC(StatClockReleaseAcquired);
 192   // Update dst->clk_.
 193   for (uptr i = 0; i < nclk_; i++) {
 194     ClockElem &ce = dst->elem(i);
 195     ce.epoch = max(ce.epoch, clk_[i].epoch);
 196     ce.reused = 0;
 197   }
 198   // Clear 'acquired' flag in the remaining elements.
 199   if (nclk_ < dst->size_)
 200     CPP_STAT_INC(StatClockReleaseClearTail);
 201   for (uptr i = nclk_; i < dst->size_; i++)
 202     dst->elem(i).reused = 0;
 203   for (unsigned i = 0; i < kDirtyTids; i++)
 204     dst->dirty_tids_[i] = kInvalidTid;
 205   dst->release_store_tid_ = kInvalidTid;
 206   dst->release_store_reused_ = 0;
 207   // If we've acquired dst, remember this fact,
 208   // so that we don't need to acquire it on next acquire.
 209   if (acquired)
 210     dst->elem(tid_).reused = reused_;
 211 }
 212
 213 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
 214   DCHECK_LE(nclk_, kMaxTid);
 215   DCHECK_LE(dst->size_, kMaxTid);
 216   CPP_STAT_INC(StatClockStore);
 217
 218   // Check if we need to resize dst.
 219   if (dst->size_ < nclk_)
 220     dst->Resize(c, nclk_);
 221
 222   if (dst->release_store_tid_ == tid_ &&
 223       dst->release_store_reused_ == reused_ &&
 224       dst->elem(tid_).epoch > last_acquire_) {
 225     CPP_STAT_INC(StatClockStoreFast);
 226     UpdateCurrentThread(dst);
 227     return;
 228   }
 229
 230   // O(N) release-store.
 231   CPP_STAT_INC(StatClockStoreFull);
 232   for (uptr i = 0; i < nclk_; i++) {
 233     ClockElem &ce = dst->elem(i);
 234     ce.epoch = clk_[i].epoch;
 235     ce.reused = 0;
 236   }
 237   // Clear the tail of dst->clk_.
 238   if (nclk_ < dst->size_) {
 239     for (uptr i = nclk_; i < dst->size_; i++) {
 240       ClockElem &ce = dst->elem(i);
 241       ce.epoch = 0;
 242       ce.reused = 0;
 243     }
 244     CPP_STAT_INC(StatClockStoreTail);
 245   }
 246   for (unsigned i = 0; i < kDirtyTids; i++)
 247     dst->dirty_tids_[i] = kInvalidTid;
 248   dst->release_store_tid_ = tid_;
 249   dst->release_store_reused_ = reused_;
 250   // Rememeber that we don't need to acquire it in future.
 251   dst->elem(tid_).reused = reused_;
 252 }
 253
 254 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 255   CPP_STAT_INC(StatClockAcquireRelease);
 256   acquire(c, dst);
 257   ReleaseStore(c, dst);
 258 }
 259
 260 // Updates only single element related to the current thread in dst->clk_.
 261 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
 262   // Update the threads time, but preserve 'acquired' flag.
 263   dst->elem(tid_).epoch = clk_[tid_].epoch;
 264
 265   for (unsigned i = 0; i < kDirtyTids; i++) {
 266     if (dst->dirty_tids_[i] == tid_) {
 267       CPP_STAT_INC(StatClockReleaseFast1);
 268       return;
 269     }
 270     if (dst->dirty_tids_[i] == kInvalidTid) {
 271       CPP_STAT_INC(StatClockReleaseFast2);
 272       dst->dirty_tids_[i] = tid_;
 273       return;
 274     }
 275   }
 276   // Reset all 'acquired' flags, O(N).
 277   CPP_STAT_INC(StatClockReleaseSlow);
 278   for (uptr i = 0; i < dst->size_; i++)
 279     dst->elem(i).reused = 0;
 280   for (unsigned i = 0; i < kDirtyTids; i++)
 281     dst->dirty_tids_[i] = kInvalidTid;
 282 }
 283
 284 // Checks whether the current threads has already acquired src.
 285 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 286   if (src->elem(tid_).reused != reused_)
 287     return false;
 288   for (unsigned i = 0; i < kDirtyTids; i++) {
 289     unsigned tid = src->dirty_tids_[i];
 290     if (tid != kInvalidTid) {
 291       if (clk_[tid].epoch < src->elem(tid).epoch)
 292         return false;
 293     }
 294   }
 295   return true;
 296 }
 297
 298 void SyncClock::Resize(ClockCache *c, uptr nclk) {
 299   CPP_STAT_INC(StatClockReleaseResize);
 300   if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
 301       RoundUpTo(size_, ClockBlock::kClockCount)) {
 302     // Growing within the same block.
 303     // Memory is already allocated, just increase the size.
 304     size_ = nclk;
 305     return;
 306   }
 307   if (nclk <= ClockBlock::kClockCount) {
 308     // Grow from 0 to one-level table.
 309     CHECK_EQ(size_, 0);
 310     CHECK_EQ(tab_, 0);
 311     CHECK_EQ(tab_idx_, 0);
 312     size_ = nclk;
 313     tab_idx_ = ctx->clock_alloc.Alloc(c);
 314     tab_ = ctx->clock_alloc.Map(tab_idx_);
 315     internal_memset(tab_, 0, sizeof(*tab_));
 316     return;
 317   }
 318   // Growing two-level table.
 319   if (size_ == 0) {
 320     // Allocate first level table.
 321     tab_idx_ = ctx->clock_alloc.Alloc(c);
 322     tab_ = ctx->clock_alloc.Map(tab_idx_);
 323     internal_memset(tab_, 0, sizeof(*tab_));
 324   } else if (size_ <= ClockBlock::kClockCount) {
 325     // Transform one-level table to two-level table.
 326     u32 old = tab_idx_;
 327     tab_idx_ = ctx->clock_alloc.Alloc(c);
 328     tab_ = ctx->clock_alloc.Map(tab_idx_);
 329     internal_memset(tab_, 0, sizeof(*tab_));
 330     tab_->table[0] = old;
 331   }
 332   // At this point we have first level table allocated.
 333   // Add second level tables as necessary.
 334   for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
 335       i < nclk; i += ClockBlock::kClockCount) {
 336     u32 idx = ctx->clock_alloc.Alloc(c);
 337     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 338     internal_memset(cb, 0, sizeof(*cb));
 339     CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
 340     tab_->table[i/ClockBlock::kClockCount] = idx;
 341   }
 342   size_ = nclk;
 343 }
 344
 345 // Sets a single element in the vector clock.
 346 // This function is called only from weird places like AcquireGlobal.
 347 void ThreadClock::set(unsigned tid, u64 v) {
 348   DCHECK_LT(tid, kMaxTid);
 349   DCHECK_GE(v, clk_[tid].epoch);
 350   clk_[tid].epoch = v;
 351   if (nclk_ <= tid)
 352     nclk_ = tid + 1;
 353   last_acquire_ = clk_[tid_].epoch;
 354 }
 355
 356 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 357   printf("clock=[");
 358   for (uptr i = 0; i < nclk_; i++)
 359     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
 360   printf("] reused=[");
 361   for (uptr i = 0; i < nclk_; i++)
 362     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
 363   printf("] tid=%u/%u last_acq=%llu",
 364       tid_, reused_, last_acquire_);
 365 }
 366
 367 SyncClock::SyncClock()
 368     : release_store_tid_(kInvalidTid)
 369     , release_store_reused_()
 370     , tab_()
 371     , tab_idx_()
 372     , size_() {
 373   for (uptr i = 0; i < kDirtyTids; i++)
 374     dirty_tids_[i] = kInvalidTid;
 375 }
 376
 377 SyncClock::~SyncClock() {
 378   // Reset must be called before dtor.
 379   CHECK_EQ(size_, 0);
 380   CHECK_EQ(tab_, 0);
 381   CHECK_EQ(tab_idx_, 0);
 382 }
 383
 384 void SyncClock::Reset(ClockCache *c) {
 385   if (size_ == 0) {
 386     // nothing
 387   } else if (size_ <= ClockBlock::kClockCount) {
 388     // One-level table.
 389     ctx->clock_alloc.Free(c, tab_idx_);
 390   } else {
 391     // Two-level table.
 392     for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
 393       ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
 394     ctx->clock_alloc.Free(c, tab_idx_);
 395   }
 396   tab_ = 0;
 397   tab_idx_ = 0;
 398   size_ = 0;
 399   release_store_tid_ = kInvalidTid;
 400   release_store_reused_ = 0;
 401   for (uptr i = 0; i < kDirtyTids; i++)
 402     dirty_tids_[i] = kInvalidTid;
 403 }
 404
 405 ClockElem &SyncClock::elem(unsigned tid) const {
 406   DCHECK_LT(tid, size_);
 407   if (size_ <= ClockBlock::kClockCount)
 408     return tab_->clock[tid];
 409   u32 idx = tab_->table[tid / ClockBlock::kClockCount];
 410   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 411   return cb->clock[tid % ClockBlock::kClockCount];
 412 }
 413
 414 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 415   printf("clock=[");
 416   for (uptr i = 0; i < size_; i++)
 417     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
 418   printf("] reused=[");
 419   for (uptr i = 0; i < size_; i++)
 420     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
 421   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
 422       release_store_tid_, release_store_reused_,
 423       dirty_tids_[0], dirty_tids_[1]);
 424 }
 425 }  // namespace __tsan