libsanitizer merge from upstream r209283

From-SVN: r210743
author: Kostya Serebryany <kcc@google.com> 2014-05-22 07:09:21 +0000
committer: Kostya Serebryany <kcc@gcc.gnu.org> 2014-05-22 07:09:21 +0000
commit: dee5ea7a0bfe95367820a443ef4a7c813e598b55 (patch)
tree: f90afdf42b3ae78508a5c6422f458a5bb0216aa2 /libsanitizer/tsan/tsan_clock.cc
parent: b95591361e32a755231d99c348f8a43e2aed0187 (diff)
1 files changed, 297 insertions, 58 deletions
diff --git a/libsanitizer/tsan/tsan_clock.cc b/libsanitizer/tsan/tsan_clock.cc
index 5d45a5d15fb..b944cc50d54 100644
--- a/libsanitizer/tsan/tsan_clock.cc
+++ b/libsanitizer/tsan/tsan_clock.cc
@@ -11,99 +11,338 @@
 #include "tsan_clock.h"
 #include "tsan_rtl.h"
 
-// It's possible to optimize clock operations for some important cases
-// so that they are O(1). The cases include singletons, once's, local mutexes.
-// First, SyncClock must be re-implemented to allow indexing by tid.
-// It must not necessarily be a full vector clock, though. For example it may
-// be a multi-level table.
-// Then, each slot in SyncClock must contain a dirty bit (it's united with
-// the clock value, so no space increase). The acquire algorithm looks
-// as follows:
-// void acquire(thr, tid, thr_clock, sync_clock) {
-//   if (!sync_clock[tid].dirty)
-//     return;  // No new info to acquire.
-//              // This handles constant reads of singleton pointers and
-//              // stop-flags.
-//   acquire_impl(thr_clock, sync_clock);  // As usual, O(N).
-//   sync_clock[tid].dirty = false;
-//   sync_clock.dirty_count--;
-// }
-// The release operation looks as follows:
-// void release(thr, tid, thr_clock, sync_clock) {
-//   // thr->sync_cache is a simple fixed-size hash-based cache that holds
-//   // several previous sync_clock's.
-//   if (thr->sync_cache[sync_clock] >= thr->last_acquire_epoch) {
-//     // The thread did no acquire operations since last release on this clock.
-//     // So update only the thread's slot (other slots can't possibly change).
-//     sync_clock[tid].clock = thr->epoch;
-//     if (sync_clock.dirty_count == sync_clock.cnt
-//         || (sync_clock.dirty_count == sync_clock.cnt - 1
-//           && sync_clock[tid].dirty == false))
-//       // All dirty flags are set, bail out.
-//       return;
-//     set all dirty bits, but preserve the thread's bit.  // O(N)
-//     update sync_clock.dirty_count;
-//     return;
+// SyncClock and ThreadClock implement vector clocks for sync variables
+// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
+// ThreadClock contains fixed-size vector clock for maximum number of threads.
+// SyncClock contains growable vector clock for currently necessary number of
+// threads.
+// Together they implement very simple model of operations, namely:
+//
+//   void ThreadClock::acquire(const SyncClock *src) {
+//     for (int i = 0; i < kMaxThreads; i++)
+//       clock[i] = max(clock[i], src->clock[i]);
+//   }
+//
+//   void ThreadClock::release(SyncClock *dst) const {
+//     for (int i = 0; i < kMaxThreads; i++)
+//       dst->clock[i] = max(dst->clock[i], clock[i]);
+//   }
+//
+//   void ThreadClock::ReleaseStore(SyncClock *dst) const {
+//     for (int i = 0; i < kMaxThreads; i++)
+//       dst->clock[i] = clock[i];
+//   }
+//
+//   void ThreadClock::acq_rel(SyncClock *dst) {
+//     acquire(dst);
+//     release(dst);
 //   }
-//   release_impl(thr_clock, sync_clock);  // As usual, O(N).
-//   set all dirty bits, but preserve the thread's bit.
-//   // The previous step is combined with release_impl(), so that
-//   // we scan the arrays only once.
-//   update sync_clock.dirty_count;
-// }
+//
+// Conformance to this model is extensively verified in tsan_clock_test.cc.
+// However, the implementation is significantly more complex. The complexity
+// allows to implement important classes of use cases in O(1) instead of O(N).
+//
+// The use cases are:
+// 1. Singleton/once atomic that has a single release-store operation followed
+//    by zillions of acquire-loads (the acquire-load is O(1)).
+// 2. Thread-local mutex (both lock and unlock can be O(1)).
+// 3. Leaf mutex (unlock is O(1)).
+// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
+// 5. An atomic with a single writer (writes can be O(1)).
+// The implementation dynamically adopts to workload. So if an atomic is in
+// read-only phase, these reads will be O(1); if it later switches to read/write
+// phase, the implementation will correctly handle that by switching to O(N).
+//
+// Thread-safety note: all const operations on SyncClock's are conducted under
+// a shared lock; all non-const operations on SyncClock's are conducted under
+// an exclusive lock; ThreadClock's are private to respective threads and so
+// do not need any protection.
+//
+// Description of ThreadClock state:
+// clk_ - fixed size vector clock.
+// nclk_ - effective size of the vector clock (the rest is zeros).
+// tid_ - index of the thread associated with he clock ("current thread").
+// last_acquire_ - current thread time when it acquired something from
+//   other threads.
+//
+// Description of SyncClock state:
+// clk_ - variable size vector clock, low kClkBits hold timestamp,
+//   the remaining bits hold "acquired" flag (the actual value is thread's
+//   reused counter);
+//   if acquried == thr->reused_, then the respective thread has already
+//   acquired this clock (except possibly dirty_tids_).
+// dirty_tids_ - holds up to two indeces in the vector clock that other threads
+//   need to acquire regardless of "acquired" flag value;
+// release_store_tid_ - denotes that the clock state is a result of
+//   release-store operation by the thread with release_store_tid_ index.
+// release_store_reused_ - reuse count of release_store_tid_.
+
+// We don't have ThreadState in these methods, so this is an ugly hack that
+// works only in C++.
+#ifndef TSAN_GO
+# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
+#else
+# define CPP_STAT_INC(typ) (void)0
+#endif
 
 namespace __tsan {
 
-ThreadClock::ThreadClock() {
-  nclk_ = 0;
-  for (uptr i = 0; i < (uptr)kMaxTidInClock; i++)
-    clk_[i] = 0;
+const unsigned kInvalidTid = (unsigned)-1;
+
+ThreadClock::ThreadClock(unsigned tid, unsigned reused)
+    : tid_(tid)
+    , reused_(reused + 1) {  // 0 has special meaning
+  CHECK_LT(tid, kMaxTidInClock);
+  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
+  nclk_ = tid_ + 1;
+  last_acquire_ = 0;
+  internal_memset(clk_, 0, sizeof(clk_));
+  clk_[tid_].reused = reused_;
 }
 
 void ThreadClock::acquire(const SyncClock *src) {
   DCHECK(nclk_ <= kMaxTid);
   DCHECK(src->clk_.Size() <= kMaxTid);
+  CPP_STAT_INC(StatClockAcquire);
 
+  // Check if it's empty -> no need to do anything.
   const uptr nclk = src->clk_.Size();
-  if (nclk == 0)
+  if (nclk == 0) {
+    CPP_STAT_INC(StatClockAcquireEmpty);
     return;
+  }
+
+  // Check if we've already acquired src after the last release operation on src
+  bool acquired = false;
+  if (nclk > tid_) {
+    CPP_STAT_INC(StatClockAcquireLarge);
+    if (src->clk_[tid_].reused == reused_) {
+      CPP_STAT_INC(StatClockAcquireRepeat);
+      for (unsigned i = 0; i < kDirtyTids; i++) {
+        unsigned tid = src->dirty_tids_[i];
+        if (tid != kInvalidTid) {
+          u64 epoch = src->clk_[tid].epoch;
+          if (clk_[tid].epoch < epoch) {
+            clk_[tid].epoch = epoch;
+            acquired = true;
+          }
+        }
+      }
+      if (acquired) {
+        CPP_STAT_INC(StatClockAcquiredSomething);
+        last_acquire_ = clk_[tid_].epoch;
+      }
+      return;
+    }
+  }
+
+  // O(N) acquire.
+  CPP_STAT_INC(StatClockAcquireFull);
   nclk_ = max(nclk_, nclk);
   for (uptr i = 0; i < nclk; i++) {
-    if (clk_[i] < src->clk_[i])
-      clk_[i] = src->clk_[i];
+    u64 epoch = src->clk_[i].epoch;
+    if (clk_[i].epoch < epoch) {
+      clk_[i].epoch = epoch;
+      acquired = true;
+    }
+  }
+
+  // Remember that this thread has acquired this clock.
+  if (nclk > tid_)
+    src->clk_[tid_].reused = reused_;
+
+  if (acquired) {
+    CPP_STAT_INC(StatClockAcquiredSomething);
+    last_acquire_ = clk_[tid_].epoch;
   }
 }
 
 void ThreadClock::release(SyncClock *dst) const {
-  DCHECK(nclk_ <= kMaxTid);
-  DCHECK(dst->clk_.Size() <= kMaxTid);
+  DCHECK_LE(nclk_, kMaxTid);
+  DCHECK_LE(dst->clk_.Size(), kMaxTid);
 
-  if (dst->clk_.Size() < nclk_)
+  if (dst->clk_.Size() == 0) {
+    // ReleaseStore will correctly set release_store_tid_,
+    // which can be important for future operations.
+    ReleaseStore(dst);
+    return;
+  }
+
+  CPP_STAT_INC(StatClockRelease);
+  // Check if we need to resize dst.
+  if (dst->clk_.Size() < nclk_) {
+    CPP_STAT_INC(StatClockReleaseResize);
     dst->clk_.Resize(nclk_);
+  }
+
+  // Check if we had not acquired anything from other threads
+  // since the last release on dst. If so, we need to update
+  // only dst->clk_[tid_].
+  if (dst->clk_[tid_].epoch > last_acquire_) {
+    UpdateCurrentThread(dst);
+    if (dst->release_store_tid_ != tid_ ||
+        dst->release_store_reused_ != reused_)
+      dst->release_store_tid_ = kInvalidTid;
+    return;
+  }
+
+  // O(N) release.
+  CPP_STAT_INC(StatClockReleaseFull);
+  // First, remember whether we've acquired dst.
+  bool acquired = IsAlreadyAcquired(dst);
+  if (acquired)
+    CPP_STAT_INC(StatClockReleaseAcquired);
+  // Update dst->clk_.
   for (uptr i = 0; i < nclk_; i++) {
-    if (dst->clk_[i] < clk_[i])
-      dst->clk_[i] = clk_[i];
+    dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
+    dst->clk_[i].reused = 0;
   }
+  // Clear 'acquired' flag in the remaining elements.
+  if (nclk_ < dst->clk_.Size())
+    CPP_STAT_INC(StatClockReleaseClearTail);
+  for (uptr i = nclk_; i < dst->clk_.Size(); i++)
+    dst->clk_[i].reused = 0;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    dst->dirty_tids_[i] = kInvalidTid;
+  dst->release_store_tid_ = kInvalidTid;
+  dst->release_store_reused_ = 0;
+  // If we've acquired dst, remember this fact,
+  // so that we don't need to acquire it on next acquire.
+  if (acquired)
+    dst->clk_[tid_].reused = reused_;
 }
 
 void ThreadClock::ReleaseStore(SyncClock *dst) const {
   DCHECK(nclk_ <= kMaxTid);
   DCHECK(dst->clk_.Size() <= kMaxTid);
+  CPP_STAT_INC(StatClockStore);
 
-  if (dst->clk_.Size() < nclk_)
+  // Check if we need to resize dst.
+  if (dst->clk_.Size() < nclk_) {
+    CPP_STAT_INC(StatClockStoreResize);
     dst->clk_.Resize(nclk_);
-  for (uptr i = 0; i < nclk_; i++)
-    dst->clk_[i] = clk_[i];
-  for (uptr i = nclk_; i < dst->clk_.Size(); i++)
-    dst->clk_[i] = 0;
+  }
+
+  if (dst->release_store_tid_ == tid_ &&
+      dst->release_store_reused_ == reused_ &&
+      dst->clk_[tid_].epoch > last_acquire_) {
+    CPP_STAT_INC(StatClockStoreFast);
+    UpdateCurrentThread(dst);
+    return;
+  }
+
+  // O(N) release-store.
+  CPP_STAT_INC(StatClockStoreFull);
+  for (uptr i = 0; i < nclk_; i++) {
+    dst->clk_[i].epoch = clk_[i].epoch;
+    dst->clk_[i].reused = 0;
+  }
+  // Clear the tail of dst->clk_.
+  if (nclk_ < dst->clk_.Size()) {
+    internal_memset(&dst->clk_[nclk_], 0,
+        (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
+    CPP_STAT_INC(StatClockStoreTail);
+  }
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    dst->dirty_tids_[i] = kInvalidTid;
+  dst->release_store_tid_ = tid_;
+  dst->release_store_reused_ = reused_;
+  // Rememeber that we don't need to acquire it in future.
+  dst->clk_[tid_].reused = reused_;
 }
 
 void ThreadClock::acq_rel(SyncClock *dst) {
+  CPP_STAT_INC(StatClockAcquireRelease);
   acquire(dst);
-  release(dst);
+  ReleaseStore(dst);
+}
+
+// Updates only single element related to the current thread in dst->clk_.
+void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
+  // Update the threads time, but preserve 'acquired' flag.
+  dst->clk_[tid_].epoch = clk_[tid_].epoch;
+
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    if (dst->dirty_tids_[i] == tid_) {
+      CPP_STAT_INC(StatClockReleaseFast1);
+      return;
+    }
+    if (dst->dirty_tids_[i] == kInvalidTid) {
+      CPP_STAT_INC(StatClockReleaseFast2);
+      dst->dirty_tids_[i] = tid_;
+      return;
+    }
+  }
+  // Reset all 'acquired' flags, O(N).
+  CPP_STAT_INC(StatClockReleaseSlow);
+  for (uptr i = 0; i < dst->clk_.Size(); i++) {
+    dst->clk_[i].reused = 0;
+  }
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    dst->dirty_tids_[i] = kInvalidTid;
+}
+
+// Checks whether the current threads has already acquired src.
+bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
+  if (src->clk_[tid_].reused != reused_)
+    return false;
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    unsigned tid = src->dirty_tids_[i];
+    if (tid != kInvalidTid) {
+      if (clk_[tid].epoch < src->clk_[tid].epoch)
+        return false;
+    }
+  }
+  return true;
+}
+
+// Sets a single element in the vector clock.
+// This function is called only from weird places like AcquireGlobal.
+void ThreadClock::set(unsigned tid, u64 v) {
+  DCHECK_LT(tid, kMaxTid);
+  DCHECK_GE(v, clk_[tid].epoch);
+  clk_[tid].epoch = v;
+  if (nclk_ <= tid)
+    nclk_ = tid + 1;
+  last_acquire_ = clk_[tid_].epoch;
+}
+
+void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
+  printf("clock=[");
+  for (uptr i = 0; i < nclk_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+  printf("] reused=[");
+  for (uptr i = 0; i < nclk_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+  printf("] tid=%u/%u last_acq=%llu",
+      tid_, reused_, last_acquire_);
 }
 
 SyncClock::SyncClock()
-  : clk_(MBlockClock) {
+    : clk_(MBlockClock) {
+  release_store_tid_ = kInvalidTid;
+  release_store_reused_ = 0;
+  for (uptr i = 0; i < kDirtyTids; i++)
+    dirty_tids_[i] = kInvalidTid;
+}
+
+void SyncClock::Reset() {
+  clk_.Reset();
+  release_store_tid_ = kInvalidTid;
+  release_store_reused_ = 0;
+  for (uptr i = 0; i < kDirtyTids; i++)
+    dirty_tids_[i] = kInvalidTid;
+}
+
+void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
+  printf("clock=[");
+  for (uptr i = 0; i < clk_.Size(); i++)
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+  printf("] reused=[");
+  for (uptr i = 0; i < clk_.Size(); i++)
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
+      release_store_tid_, release_store_reused_,
+      dirty_tids_[0], dirty_tids_[1]);
 }
 }  // namespace __tsan
author	Kostya Serebryany <kcc@google.com>	2014-05-22 07:09:21 +0000
committer	Kostya Serebryany <kcc@gcc.gnu.org>	2014-05-22 07:09:21 +0000
commit	dee5ea7a0bfe95367820a443ef4a7c813e598b55 (patch)
tree	f90afdf42b3ae78508a5c6422f458a5bb0216aa2 /libsanitizer/tsan/tsan_clock.cc
parent	b95591361e32a755231d99c348f8a43e2aed0187 (diff)