libsanitizer/tsan/tsan_clock.h

   1 //===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file is a part of ThreadSanitizer (TSan), a race detector.
  10 //
  11 //===----------------------------------------------------------------------===//
  12 #ifndef TSAN_CLOCK_H
  13 #define TSAN_CLOCK_H
  14
  15 #include "tsan_defs.h"
  16 #include "tsan_dense_alloc.h"
  17
  18 namespace __tsan {
  19
  20 typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc;
  21 typedef DenseSlabAllocCache ClockCache;
  22
  23 // The clock that lives in sync variables (mutexes, atomics, etc).
  24 class SyncClock {
  25  public:
  26   SyncClock();
  27   ~SyncClock();
  28
  29   uptr size() const;
  30
  31   // These are used only in tests.
  32   u64 get(unsigned tid) const;
  33   u64 get_clean(unsigned tid) const;
  34
  35   void Resize(ClockCache *c, uptr nclk);
  36   void Reset(ClockCache *c);
  37
  38   void DebugDump(int(*printf)(const char *s, ...));
  39
  40   // Clock element iterator.
  41   // Note: it iterates only over the table without regard to dirty entries.
  42   class Iter {
  43    public:
  44     explicit Iter(SyncClock* parent);
  45     Iter& operator++();
  46     bool operator!=(const Iter& other);
  47     ClockElem &operator*();
  48
  49    private:
  50     SyncClock *parent_;
  51     // [pos_, end_) is the current continuous range of clock elements.
  52     ClockElem *pos_;
  53     ClockElem *end_;
  54     int block_;  // Current number of second level block.
  55
  56     NOINLINE void Next();
  57   };
  58
  59   Iter begin();
  60   Iter end();
  61
  62  private:
  63   friend class ThreadClock;
  64   friend class Iter;
  65   static const uptr kDirtyTids = 2;
  66
  67   struct Dirty {
  68     u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; }
  69     void set_tid(u32 tid) {
  70       tid_ = tid == kInvalidTid ? kShortInvalidTid : tid;
  71     }
  72     u64 epoch : kClkBits;
  73
  74    private:
  75     // Full kInvalidTid won't fit into Dirty::tid.
  76     static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1;
  77     u64 tid_ : 64 - kClkBits;  // kInvalidId if not active
  78   };
  79
  80   static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit");
  81
  82   unsigned release_store_tid_;
  83   unsigned release_store_reused_;
  84   Dirty dirty_[kDirtyTids];
  85   // If size_ is 0, tab_ is nullptr.
  86   // If size <= 64 (kClockCount), tab_ contains pointer to an array with
  87   // 64 ClockElem's (ClockBlock::clock).
  88   // Otherwise, tab_ points to an array with up to 127 u32 elements,
  89   // each pointing to the second-level 512b block with 64 ClockElem's.
  90   // Unused space in the first level ClockBlock is used to store additional
  91   // clock elements.
  92   // The last u32 element in the first level ClockBlock is always used as
  93   // reference counter.
  94   //
  95   // See the following scheme for details.
  96   // All memory blocks are 512 bytes (allocated from ClockAlloc).
  97   // Clock (clk) elements are 64 bits.
  98   // Idx and ref are 32 bits.
  99   //
 100   // tab_
 101   //    |
 102   //    \/
 103   //    +----------------------------------------------------+
 104   //    | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
 105   //    +----------------------------------------------------+
 106   //                                        |      |
 107   //                                        |      \/
 108   //                                        |      +----------------+
 109   //                                        |      | clk0 ... clk63 |
 110   //                                        |      +----------------+
 111   //                                        \/
 112   //                                        +------------------+
 113   //                                        | clk64 ... clk127 |
 114   //                                        +------------------+
 115   //
 116   // Note: dirty entries, if active, always override what's stored in the clock.
 117   ClockBlock *tab_;
 118   u32 tab_idx_;
 119   u16 size_;
 120   u16 blocks_;  // Number of second level blocks.
 121
 122   void Unshare(ClockCache *c);
 123   bool IsShared() const;
 124   bool Cachable() const;
 125   void ResetImpl();
 126   void FlushDirty();
 127   uptr capacity() const;
 128   u32 get_block(uptr bi) const;
 129   void append_block(u32 idx);
 130   ClockElem &elem(unsigned tid) const;
 131 };
 132
 133 // The clock that lives in threads.
 134 class ThreadClock {
 135  public:
 136   typedef DenseSlabAllocCache Cache;
 137
 138   explicit ThreadClock(unsigned tid, unsigned reused = 0);
 139
 140   u64 get(unsigned tid) const;
 141   void set(ClockCache *c, unsigned tid, u64 v);
 142   void set(u64 v);
 143   void tick();
 144   uptr size() const;
 145
 146   void acquire(ClockCache *c, SyncClock *src);
 147   void releaseStoreAcquire(ClockCache *c, SyncClock *src);
 148   void release(ClockCache *c, SyncClock *dst);
 149   void acq_rel(ClockCache *c, SyncClock *dst);
 150   void ReleaseStore(ClockCache *c, SyncClock *dst);
 151   void ResetCached(ClockCache *c);
 152   void NoteGlobalAcquire(u64 v);
 153
 154   void DebugReset();
 155   void DebugDump(int(*printf)(const char *s, ...));
 156
 157  private:
 158   static const uptr kDirtyTids = SyncClock::kDirtyTids;
 159   // Index of the thread associated with he clock ("current thread").
 160   const unsigned tid_;
 161   const unsigned reused_;  // tid_ reuse count.
 162   // Current thread time when it acquired something from other threads.
 163   u64 last_acquire_;
 164
 165   // Last time another thread has done a global acquire of this thread's clock.
 166   // It helps to avoid problem described in:
 167   // https://github.com/golang/go/issues/39186
 168   // See test/tsan/java_finalizer2.cpp for a regression test.
 169   // Note the failuire is _extremely_ hard to hit, so if you are trying
 170   // to reproduce it, you may want to run something like:
 171   // $ go get golang.org/x/tools/cmd/stress
 172   // $ stress -p=64 ./a.out
 173   //
 174   // The crux of the problem is roughly as follows.
 175   // A number of O(1) optimizations in the clocks algorithm assume proper
 176   // transitive cumulative propagation of clock values. The AcquireGlobal
 177   // operation may produce an inconsistent non-linearazable view of
 178   // thread clocks. Namely, it may acquire a later value from a thread
 179   // with a higher ID, but fail to acquire an earlier value from a thread
 180   // with a lower ID. If a thread that executed AcquireGlobal then releases
 181   // to a sync clock, it will spoil the sync clock with the inconsistent
 182   // values. If another thread later releases to the sync clock, the optimized
 183   // algorithm may break.
 184   //
 185   // The exact sequence of events that leads to the failure.
 186   // - thread 1 executes AcquireGlobal
 187   // - thread 1 acquires value 1 for thread 2
 188   // - thread 2 increments clock to 2
 189   // - thread 2 releases to sync object 1
 190   // - thread 3 at time 1
 191   // - thread 3 acquires from sync object 1
 192   // - thread 3 increments clock to 2
 193   // - thread 1 acquires value 2 for thread 3
 194   // - thread 1 releases to sync object 2
 195   // - sync object 2 clock has 1 for thread 2 and 2 for thread 3
 196   // - thread 3 releases to sync object 2
 197   // - thread 3 sees value 2 in the clock for itself
 198   //   and decides that it has already released to the clock
 199   //   and did not acquire anything from other threads after that
 200   //   (the last_acquire_ check in release operation)
 201   // - thread 3 does not update the value for thread 2 in the clock from 1 to 2
 202   // - thread 4 acquires from sync object 2
 203   // - thread 4 detects a false race with thread 2
 204   //   as it should have been synchronized with thread 2 up to time 2,
 205   //   but because of the broken clock it is now synchronized only up to time 1
 206   //
 207   // The global_acquire_ value helps to prevent this scenario.
 208   // Namely, thread 3 will not trust any own clock values up to global_acquire_
 209   // for the purposes of the last_acquire_ optimization.
 210   atomic_uint64_t global_acquire_;
 211
 212   // Cached SyncClock (without dirty entries and release_store_tid_).
 213   // We reuse it for subsequent store-release operations without intervening
 214   // acquire operations. Since it is shared (and thus constant), clock value
 215   // for the current thread is then stored in dirty entries in the SyncClock.
 216   // We host a reference to the table while it is cached here.
 217   u32 cached_idx_;
 218   u16 cached_size_;
 219   u16 cached_blocks_;
 220
 221   // Number of active elements in the clk_ table (the rest is zeros).
 222   uptr nclk_;
 223   u64 clk_[kMaxTidInClock];  // Fixed size vector clock.
 224
 225   bool IsAlreadyAcquired(const SyncClock *src) const;
 226   bool HasAcquiredAfterRelease(const SyncClock *dst) const;
 227   void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
 228 };
 229
 230 ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
 231   DCHECK_LT(tid, kMaxTidInClock);
 232   return clk_[tid];
 233 }
 234
 235 ALWAYS_INLINE void ThreadClock::set(u64 v) {
 236   DCHECK_GE(v, clk_[tid_]);
 237   clk_[tid_] = v;
 238 }
 239
 240 ALWAYS_INLINE void ThreadClock::tick() {
 241   clk_[tid_]++;
 242 }
 243
 244 ALWAYS_INLINE uptr ThreadClock::size() const {
 245   return nclk_;
 246 }
 247
 248 ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
 249   // Here we rely on the fact that AcquireGlobal is protected by
 250   // ThreadRegistryLock, thus only one thread at a time executes it
 251   // and values passed to this function should not go backwards.
 252   CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
 253   atomic_store_relaxed(&global_acquire_, v);
 254 }
 255
 256 ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
 257   return Iter(this);
 258 }
 259
 260 ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
 261   return Iter(nullptr);
 262 }
 263
 264 ALWAYS_INLINE uptr SyncClock::size() const {
 265   return size_;
 266 }
 267
 268 ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
 269     : parent_(parent)
 270     , pos_(nullptr)
 271     , end_(nullptr)
 272     , block_(-1) {
 273   if (parent)
 274     Next();
 275 }
 276
 277 ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
 278   pos_++;
 279   if (UNLIKELY(pos_ >= end_))
 280     Next();
 281   return *this;
 282 }
 283
 284 ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
 285   return parent_ != other.parent_;
 286 }
 287
 288 ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
 289   return *pos_;
 290 }
 291 }  // namespace __tsan
 292
 293 #endif  // TSAN_CLOCK_H