libsanitizer/tsan/tsan_clock.cpp

   1 //===-- tsan_clock.cpp ----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file is a part of ThreadSanitizer (TSan), a race detector.
  10 //
  11 //===----------------------------------------------------------------------===//
  12 #include "tsan_clock.h"
  13 #include "tsan_rtl.h"
  14 #include "sanitizer_common/sanitizer_placement_new.h"
  15
  16 // SyncClock and ThreadClock implement vector clocks for sync variables
  17 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  18 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  19 // SyncClock contains growable vector clock for currently necessary number of
  20 // threads.
  21 // Together they implement very simple model of operations, namely:
  22 //
  23 //   void ThreadClock::acquire(const SyncClock *src) {
  24 //     for (int i = 0; i < kMaxThreads; i++)
  25 //       clock[i] = max(clock[i], src->clock[i]);
  26 //   }
  27 //
  28 //   void ThreadClock::release(SyncClock *dst) const {
  29 //     for (int i = 0; i < kMaxThreads; i++)
  30 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  31 //   }
  32 //
  33 //   void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
  34 //     for (int i = 0; i < kMaxThreads; i++) {
  35 //       tmp = clock[i];
  36 //       clock[i] = max(clock[i], sc->clock[i]);
  37 //       sc->clock[i] = tmp;
  38 //     }
  39 //   }
  40 //
  41 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  42 //     for (int i = 0; i < kMaxThreads; i++)
  43 //       dst->clock[i] = clock[i];
  44 //   }
  45 //
  46 //   void ThreadClock::acq_rel(SyncClock *dst) {
  47 //     acquire(dst);
  48 //     release(dst);
  49 //   }
  50 //
  51 // Conformance to this model is extensively verified in tsan_clock_test.cpp.
  52 // However, the implementation is significantly more complex. The complexity
  53 // allows to implement important classes of use cases in O(1) instead of O(N).
  54 //
  55 // The use cases are:
  56 // 1. Singleton/once atomic that has a single release-store operation followed
  57 //    by zillions of acquire-loads (the acquire-load is O(1)).
  58 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  59 // 3. Leaf mutex (unlock is O(1)).
  60 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  61 // 5. An atomic with a single writer (writes can be O(1)).
  62 // The implementation dynamically adopts to workload. So if an atomic is in
  63 // read-only phase, these reads will be O(1); if it later switches to read/write
  64 // phase, the implementation will correctly handle that by switching to O(N).
  65 //
  66 // Thread-safety note: all const operations on SyncClock's are conducted under
  67 // a shared lock; all non-const operations on SyncClock's are conducted under
  68 // an exclusive lock; ThreadClock's are private to respective threads and so
  69 // do not need any protection.
  70 //
  71 // Description of SyncClock state:
  72 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  73 //   the remaining bits hold "acquired" flag (the actual value is thread's
  74 //   reused counter);
  75 //   if acquried == thr->reused_, then the respective thread has already
  76 //   acquired this clock (except possibly for dirty elements).
  77 // dirty_ - holds up to two indeces in the vector clock that other threads
  78 //   need to acquire regardless of "acquired" flag value;
  79 // release_store_tid_ - denotes that the clock state is a result of
  80 //   release-store operation by the thread with release_store_tid_ index.
  81 // release_store_reused_ - reuse count of release_store_tid_.
  82
  83 // We don't have ThreadState in these methods, so this is an ugly hack that
  84 // works only in C++.
  85 #if !SANITIZER_GO
  86 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  87 #else
  88 # define CPP_STAT_INC(typ) (void)0
  89 #endif
  90
  91 namespace __tsan {
  92
  93 static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
  94   return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
  95 }
  96
  97 // Drop reference to the first level block idx.
  98 static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
  99   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 100   atomic_uint32_t *ref = ref_ptr(cb);
 101   u32 v = atomic_load(ref, memory_order_acquire);
 102   for (;;) {
 103     CHECK_GT(v, 0);
 104     if (v == 1)
 105       break;
 106     if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
 107       return;
 108   }
 109   // First level block owns second level blocks, so them as well.
 110   for (uptr i = 0; i < blocks; i++)
 111     ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
 112   ctx->clock_alloc.Free(c, idx);
 113 }
 114
 115 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
 116     : tid_(tid)
 117     , reused_(reused + 1)  // 0 has special meaning
 118     , last_acquire_()
 119     , global_acquire_()
 120     , cached_idx_()
 121     , cached_size_()
 122     , cached_blocks_() {
 123   CHECK_LT(tid, kMaxTidInClock);
 124   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
 125   nclk_ = tid_ + 1;
 126   internal_memset(clk_, 0, sizeof(clk_));
 127 }
 128
 129 void ThreadClock::ResetCached(ClockCache *c) {
 130   if (cached_idx_) {
 131     UnrefClockBlock(c, cached_idx_, cached_blocks_);
 132     cached_idx_ = 0;
 133     cached_size_ = 0;
 134     cached_blocks_ = 0;
 135   }
 136 }
 137
 138 void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
 139   DCHECK_LE(nclk_, kMaxTid);
 140   DCHECK_LE(src->size_, kMaxTid);
 141   CPP_STAT_INC(StatClockAcquire);
 142
 143   // Check if it's empty -> no need to do anything.
 144   const uptr nclk = src->size_;
 145   if (nclk == 0) {
 146     CPP_STAT_INC(StatClockAcquireEmpty);
 147     return;
 148   }
 149
 150   bool acquired = false;
 151   for (unsigned i = 0; i < kDirtyTids; i++) {
 152     SyncClock::Dirty dirty = src->dirty_[i];
 153     unsigned tid = dirty.tid;
 154     if (tid != kInvalidTid) {
 155       if (clk_[tid] < dirty.epoch) {
 156         clk_[tid] = dirty.epoch;
 157         acquired = true;
 158       }
 159     }
 160   }
 161
 162   // Check if we've already acquired src after the last release operation on src
 163   if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
 164     // O(N) acquire.
 165     CPP_STAT_INC(StatClockAcquireFull);
 166     nclk_ = max(nclk_, nclk);
 167     u64 *dst_pos = &clk_[0];
 168     for (ClockElem &src_elem : *src) {
 169       u64 epoch = src_elem.epoch;
 170       if (*dst_pos < epoch) {
 171         *dst_pos = epoch;
 172         acquired = true;
 173       }
 174       dst_pos++;
 175     }
 176
 177     // Remember that this thread has acquired this clock.
 178     if (nclk > tid_)
 179       src->elem(tid_).reused = reused_;
 180   }
 181
 182   if (acquired) {
 183     CPP_STAT_INC(StatClockAcquiredSomething);
 184     last_acquire_ = clk_[tid_];
 185     ResetCached(c);
 186   }
 187 }
 188
 189 void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
 190   DCHECK_LE(nclk_, kMaxTid);
 191   DCHECK_LE(sc->size_, kMaxTid);
 192
 193   if (sc->size_ == 0) {
 194     // ReleaseStore will correctly set release_store_tid_,
 195     // which can be important for future operations.
 196     ReleaseStore(c, sc);
 197     return;
 198   }
 199
 200   nclk_ = max(nclk_, (uptr) sc->size_);
 201
 202   // Check if we need to resize sc.
 203   if (sc->size_ < nclk_)
 204     sc->Resize(c, nclk_);
 205
 206   bool acquired = false;
 207
 208   sc->Unshare(c);
 209   // Update sc->clk_.
 210   sc->FlushDirty();
 211   uptr i = 0;
 212   for (ClockElem &ce : *sc) {
 213     u64 tmp = clk_[i];
 214     if (clk_[i] < ce.epoch) {
 215       clk_[i] = ce.epoch;
 216       acquired = true;
 217     }
 218     ce.epoch = tmp;
 219     ce.reused = 0;
 220     i++;
 221   }
 222   sc->release_store_tid_ = kInvalidTid;
 223   sc->release_store_reused_ = 0;
 224
 225   if (acquired) {
 226     CPP_STAT_INC(StatClockAcquiredSomething);
 227     last_acquire_ = clk_[tid_];
 228     ResetCached(c);
 229   }
 230 }
 231
 232 void ThreadClock::release(ClockCache *c, SyncClock *dst) {
 233   DCHECK_LE(nclk_, kMaxTid);
 234   DCHECK_LE(dst->size_, kMaxTid);
 235
 236   if (dst->size_ == 0) {
 237     // ReleaseStore will correctly set release_store_tid_,
 238     // which can be important for future operations.
 239     ReleaseStore(c, dst);
 240     return;
 241   }
 242
 243   CPP_STAT_INC(StatClockRelease);
 244   // Check if we need to resize dst.
 245   if (dst->size_ < nclk_)
 246     dst->Resize(c, nclk_);
 247
 248   // Check if we had not acquired anything from other threads
 249   // since the last release on dst. If so, we need to update
 250   // only dst->elem(tid_).
 251   if (!HasAcquiredAfterRelease(dst)) {
 252     UpdateCurrentThread(c, dst);
 253     if (dst->release_store_tid_ != tid_ ||
 254         dst->release_store_reused_ != reused_)
 255       dst->release_store_tid_ = kInvalidTid;
 256     return;
 257   }
 258
 259   // O(N) release.
 260   CPP_STAT_INC(StatClockReleaseFull);
 261   dst->Unshare(c);
 262   // First, remember whether we've acquired dst.
 263   bool acquired = IsAlreadyAcquired(dst);
 264   if (acquired)
 265     CPP_STAT_INC(StatClockReleaseAcquired);
 266   // Update dst->clk_.
 267   dst->FlushDirty();
 268   uptr i = 0;
 269   for (ClockElem &ce : *dst) {
 270     ce.epoch = max(ce.epoch, clk_[i]);
 271     ce.reused = 0;
 272     i++;
 273   }
 274   // Clear 'acquired' flag in the remaining elements.
 275   if (nclk_ < dst->size_)
 276     CPP_STAT_INC(StatClockReleaseClearTail);
 277   dst->release_store_tid_ = kInvalidTid;
 278   dst->release_store_reused_ = 0;
 279   // If we've acquired dst, remember this fact,
 280   // so that we don't need to acquire it on next acquire.
 281   if (acquired)
 282     dst->elem(tid_).reused = reused_;
 283 }
 284
 285 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
 286   DCHECK_LE(nclk_, kMaxTid);
 287   DCHECK_LE(dst->size_, kMaxTid);
 288   CPP_STAT_INC(StatClockStore);
 289
 290   if (dst->size_ == 0 && cached_idx_ != 0) {
 291     // Reuse the cached clock.
 292     // Note: we could reuse/cache the cached clock in more cases:
 293     // we could update the existing clock and cache it, or replace it with the
 294     // currently cached clock and release the old one. And for a shared
 295     // existing clock, we could replace it with the currently cached;
 296     // or unshare, update and cache. But, for simplicity, we currnetly reuse
 297     // cached clock only when the target clock is empty.
 298     dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
 299     dst->tab_idx_ = cached_idx_;
 300     dst->size_ = cached_size_;
 301     dst->blocks_ = cached_blocks_;
 302     CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
 303     // The cached clock is shared (immutable),
 304     // so this is where we store the current clock.
 305     dst->dirty_[0].tid = tid_;
 306     dst->dirty_[0].epoch = clk_[tid_];
 307     dst->release_store_tid_ = tid_;
 308     dst->release_store_reused_ = reused_;
 309     // Rememeber that we don't need to acquire it in future.
 310     dst->elem(tid_).reused = reused_;
 311     // Grab a reference.
 312     atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
 313     return;
 314   }
 315
 316   // Check if we need to resize dst.
 317   if (dst->size_ < nclk_)
 318     dst->Resize(c, nclk_);
 319
 320   if (dst->release_store_tid_ == tid_ &&
 321       dst->release_store_reused_ == reused_ &&
 322       !HasAcquiredAfterRelease(dst)) {
 323     CPP_STAT_INC(StatClockStoreFast);
 324     UpdateCurrentThread(c, dst);
 325     return;
 326   }
 327
 328   // O(N) release-store.
 329   CPP_STAT_INC(StatClockStoreFull);
 330   dst->Unshare(c);
 331   // Note: dst can be larger than this ThreadClock.
 332   // This is fine since clk_ beyond size is all zeros.
 333   uptr i = 0;
 334   for (ClockElem &ce : *dst) {
 335     ce.epoch = clk_[i];
 336     ce.reused = 0;
 337     i++;
 338   }
 339   for (uptr i = 0; i < kDirtyTids; i++)
 340     dst->dirty_[i].tid = kInvalidTid;
 341   dst->release_store_tid_ = tid_;
 342   dst->release_store_reused_ = reused_;
 343   // Rememeber that we don't need to acquire it in future.
 344   dst->elem(tid_).reused = reused_;
 345
 346   // If the resulting clock is cachable, cache it for future release operations.
 347   // The clock is always cachable if we released to an empty sync object.
 348   if (cached_idx_ == 0 && dst->Cachable()) {
 349     // Grab a reference to the ClockBlock.
 350     atomic_uint32_t *ref = ref_ptr(dst->tab_);
 351     if (atomic_load(ref, memory_order_acquire) == 1)
 352       atomic_store_relaxed(ref, 2);
 353     else
 354       atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
 355     cached_idx_ = dst->tab_idx_;
 356     cached_size_ = dst->size_;
 357     cached_blocks_ = dst->blocks_;
 358   }
 359 }
 360
 361 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 362   CPP_STAT_INC(StatClockAcquireRelease);
 363   acquire(c, dst);
 364   ReleaseStore(c, dst);
 365 }
 366
 367 // Updates only single element related to the current thread in dst->clk_.
 368 void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
 369   // Update the threads time, but preserve 'acquired' flag.
 370   for (unsigned i = 0; i < kDirtyTids; i++) {
 371     SyncClock::Dirty *dirty = &dst->dirty_[i];
 372     const unsigned tid = dirty->tid;
 373     if (tid == tid_ || tid == kInvalidTid) {
 374       CPP_STAT_INC(StatClockReleaseFast);
 375       dirty->tid = tid_;
 376       dirty->epoch = clk_[tid_];
 377       return;
 378     }
 379   }
 380   // Reset all 'acquired' flags, O(N).
 381   // We are going to touch dst elements, so we need to unshare it.
 382   dst->Unshare(c);
 383   CPP_STAT_INC(StatClockReleaseSlow);
 384   dst->elem(tid_).epoch = clk_[tid_];
 385   for (uptr i = 0; i < dst->size_; i++)
 386     dst->elem(i).reused = 0;
 387   dst->FlushDirty();
 388 }
 389
 390 // Checks whether the current thread has already acquired src.
 391 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 392   if (src->elem(tid_).reused != reused_)
 393     return false;
 394   for (unsigned i = 0; i < kDirtyTids; i++) {
 395     SyncClock::Dirty dirty = src->dirty_[i];
 396     if (dirty.tid != kInvalidTid) {
 397       if (clk_[dirty.tid] < dirty.epoch)
 398         return false;
 399     }
 400   }
 401   return true;
 402 }
 403
 404 // Checks whether the current thread has acquired anything
 405 // from other clocks after releasing to dst (directly or indirectly).
 406 bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
 407   const u64 my_epoch = dst->elem(tid_).epoch;
 408   return my_epoch <= last_acquire_ ||
 409       my_epoch <= atomic_load_relaxed(&global_acquire_);
 410 }
 411
 412 // Sets a single element in the vector clock.
 413 // This function is called only from weird places like AcquireGlobal.
 414 void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
 415   DCHECK_LT(tid, kMaxTid);
 416   DCHECK_GE(v, clk_[tid]);
 417   clk_[tid] = v;
 418   if (nclk_ <= tid)
 419     nclk_ = tid + 1;
 420   last_acquire_ = clk_[tid_];
 421   ResetCached(c);
 422 }
 423
 424 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 425   printf("clock=[");
 426   for (uptr i = 0; i < nclk_; i++)
 427     printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
 428   printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
 429 }
 430
 431 SyncClock::SyncClock() {
 432   ResetImpl();
 433 }
 434
 435 SyncClock::~SyncClock() {
 436   // Reset must be called before dtor.
 437   CHECK_EQ(size_, 0);
 438   CHECK_EQ(blocks_, 0);
 439   CHECK_EQ(tab_, 0);
 440   CHECK_EQ(tab_idx_, 0);
 441 }
 442
 443 void SyncClock::Reset(ClockCache *c) {
 444   if (size_)
 445     UnrefClockBlock(c, tab_idx_, blocks_);
 446   ResetImpl();
 447 }
 448
 449 void SyncClock::ResetImpl() {
 450   tab_ = 0;
 451   tab_idx_ = 0;
 452   size_ = 0;
 453   blocks_ = 0;
 454   release_store_tid_ = kInvalidTid;
 455   release_store_reused_ = 0;
 456   for (uptr i = 0; i < kDirtyTids; i++)
 457     dirty_[i].tid = kInvalidTid;
 458 }
 459
 460 void SyncClock::Resize(ClockCache *c, uptr nclk) {
 461   CPP_STAT_INC(StatClockReleaseResize);
 462   Unshare(c);
 463   if (nclk <= capacity()) {
 464     // Memory is already allocated, just increase the size.
 465     size_ = nclk;
 466     return;
 467   }
 468   if (size_ == 0) {
 469     // Grow from 0 to one-level table.
 470     CHECK_EQ(size_, 0);
 471     CHECK_EQ(blocks_, 0);
 472     CHECK_EQ(tab_, 0);
 473     CHECK_EQ(tab_idx_, 0);
 474     tab_idx_ = ctx->clock_alloc.Alloc(c);
 475     tab_ = ctx->clock_alloc.Map(tab_idx_);
 476     internal_memset(tab_, 0, sizeof(*tab_));
 477     atomic_store_relaxed(ref_ptr(tab_), 1);
 478     size_ = 1;
 479   } else if (size_ > blocks_ * ClockBlock::kClockCount) {
 480     u32 idx = ctx->clock_alloc.Alloc(c);
 481     ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
 482     uptr top = size_ - blocks_ * ClockBlock::kClockCount;
 483     CHECK_LT(top, ClockBlock::kClockCount);
 484     const uptr move = top * sizeof(tab_->clock[0]);
 485     internal_memcpy(&new_cb->clock[0], tab_->clock, move);
 486     internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
 487     internal_memset(tab_->clock, 0, move);
 488     append_block(idx);
 489   }
 490   // At this point we have first level table allocated and all clock elements
 491   // are evacuated from it to a second level block.
 492   // Add second level tables as necessary.
 493   while (nclk > capacity()) {
 494     u32 idx = ctx->clock_alloc.Alloc(c);
 495     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 496     internal_memset(cb, 0, sizeof(*cb));
 497     append_block(idx);
 498   }
 499   size_ = nclk;
 500 }
 501
 502 // Flushes all dirty elements into the main clock array.
 503 void SyncClock::FlushDirty() {
 504   for (unsigned i = 0; i < kDirtyTids; i++) {
 505     Dirty *dirty = &dirty_[i];
 506     if (dirty->tid != kInvalidTid) {
 507       CHECK_LT(dirty->tid, size_);
 508       elem(dirty->tid).epoch = dirty->epoch;
 509       dirty->tid = kInvalidTid;
 510     }
 511   }
 512 }
 513
 514 bool SyncClock::IsShared() const {
 515   if (size_ == 0)
 516     return false;
 517   atomic_uint32_t *ref = ref_ptr(tab_);
 518   u32 v = atomic_load(ref, memory_order_acquire);
 519   CHECK_GT(v, 0);
 520   return v > 1;
 521 }
 522
 523 // Unshares the current clock if it's shared.
 524 // Shared clocks are immutable, so they need to be unshared before any updates.
 525 // Note: this does not apply to dirty entries as they are not shared.
 526 void SyncClock::Unshare(ClockCache *c) {
 527   if (!IsShared())
 528     return;
 529   // First, copy current state into old.
 530   SyncClock old;
 531   old.tab_ = tab_;
 532   old.tab_idx_ = tab_idx_;
 533   old.size_ = size_;
 534   old.blocks_ = blocks_;
 535   old.release_store_tid_ = release_store_tid_;
 536   old.release_store_reused_ = release_store_reused_;
 537   for (unsigned i = 0; i < kDirtyTids; i++)
 538     old.dirty_[i] = dirty_[i];
 539   // Then, clear current object.
 540   ResetImpl();
 541   // Allocate brand new clock in the current object.
 542   Resize(c, old.size_);
 543   // Now copy state back into this object.
 544   Iter old_iter(&old);
 545   for (ClockElem &ce : *this) {
 546     ce = *old_iter;
 547     ++old_iter;
 548   }
 549   release_store_tid_ = old.release_store_tid_;
 550   release_store_reused_ = old.release_store_reused_;
 551   for (unsigned i = 0; i < kDirtyTids; i++)
 552     dirty_[i] = old.dirty_[i];
 553   // Drop reference to old and delete if necessary.
 554   old.Reset(c);
 555 }
 556
 557 // Can we cache this clock for future release operations?
 558 ALWAYS_INLINE bool SyncClock::Cachable() const {
 559   if (size_ == 0)
 560     return false;
 561   for (unsigned i = 0; i < kDirtyTids; i++) {
 562     if (dirty_[i].tid != kInvalidTid)
 563       return false;
 564   }
 565   return atomic_load_relaxed(ref_ptr(tab_)) == 1;
 566 }
 567
 568 // elem linearizes the two-level structure into linear array.
 569 // Note: this is used only for one time accesses, vector operations use
 570 // the iterator as it is much faster.
 571 ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
 572   DCHECK_LT(tid, size_);
 573   const uptr block = tid / ClockBlock::kClockCount;
 574   DCHECK_LE(block, blocks_);
 575   tid %= ClockBlock::kClockCount;
 576   if (block == blocks_)
 577     return tab_->clock[tid];
 578   u32 idx = get_block(block);
 579   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 580   return cb->clock[tid];
 581 }
 582
 583 ALWAYS_INLINE uptr SyncClock::capacity() const {
 584   if (size_ == 0)
 585     return 0;
 586   uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
 587   // How many clock elements we can fit into the first level block.
 588   // +1 for ref counter.
 589   uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
 590   return blocks_ * ClockBlock::kClockCount + top;
 591 }
 592
 593 ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
 594   DCHECK(size_);
 595   DCHECK_LT(bi, blocks_);
 596   return tab_->table[ClockBlock::kBlockIdx - bi];
 597 }
 598
 599 ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
 600   uptr bi = blocks_++;
 601   CHECK_EQ(get_block(bi), 0);
 602   tab_->table[ClockBlock::kBlockIdx - bi] = idx;
 603 }
 604
 605 // Used only by tests.
 606 u64 SyncClock::get(unsigned tid) const {
 607   for (unsigned i = 0; i < kDirtyTids; i++) {
 608     Dirty dirty = dirty_[i];
 609     if (dirty.tid == tid)
 610       return dirty.epoch;
 611   }
 612   return elem(tid).epoch;
 613 }
 614
 615 // Used only by Iter test.
 616 u64 SyncClock::get_clean(unsigned tid) const {
 617   return elem(tid).epoch;
 618 }
 619
 620 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 621   printf("clock=[");
 622   for (uptr i = 0; i < size_; i++)
 623     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
 624   printf("] reused=[");
 625   for (uptr i = 0; i < size_; i++)
 626     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
 627   printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
 628       release_store_tid_, release_store_reused_,
 629       dirty_[0].tid, dirty_[0].epoch,
 630       dirty_[1].tid, dirty_[1].epoch);
 631 }
 632
 633 void SyncClock::Iter::Next() {
 634   // Finished with the current block, move on to the next one.
 635   block_++;
 636   if (block_ < parent_->blocks_) {
 637     // Iterate over the next second level block.
 638     u32 idx = parent_->get_block(block_);
 639     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 640     pos_ = &cb->clock[0];
 641     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
 642         ClockBlock::kClockCount);
 643     return;
 644   }
 645   if (block_ == parent_->blocks_ &&
 646       parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
 647     // Iterate over elements in the first level block.
 648     pos_ = &parent_->tab_->clock[0];
 649     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
 650         ClockBlock::kClockCount);
 651     return;
 652   }
 653   parent_ = nullptr;  // denotes end
 654 }
 655 }  // namespace __tsan