aboutsummaryrefslogtreecommitdiff
path: root/libsanitizer/tsan/tsan_clock.cc
diff options
context:
space:
mode:
authorKostya Serebryany <kcc@google.com>2014-05-22 07:09:21 +0000
committerKostya Serebryany <kcc@gcc.gnu.org>2014-05-22 07:09:21 +0000
commitdee5ea7a0bfe95367820a443ef4a7c813e598b55 (patch)
treef90afdf42b3ae78508a5c6422f458a5bb0216aa2 /libsanitizer/tsan/tsan_clock.cc
parentb95591361e32a755231d99c348f8a43e2aed0187 (diff)
libsanitizer merge from upstream r209283
From-SVN: r210743
Diffstat (limited to 'libsanitizer/tsan/tsan_clock.cc')
-rw-r--r--libsanitizer/tsan/tsan_clock.cc355
1 files changed, 297 insertions, 58 deletions
diff --git a/libsanitizer/tsan/tsan_clock.cc b/libsanitizer/tsan/tsan_clock.cc
index 5d45a5d15fb..b944cc50d54 100644
--- a/libsanitizer/tsan/tsan_clock.cc
+++ b/libsanitizer/tsan/tsan_clock.cc
@@ -11,99 +11,338 @@
#include "tsan_clock.h"
#include "tsan_rtl.h"
-// It's possible to optimize clock operations for some important cases
-// so that they are O(1). The cases include singletons, once's, local mutexes.
-// First, SyncClock must be re-implemented to allow indexing by tid.
-// It must not necessarily be a full vector clock, though. For example it may
-// be a multi-level table.
-// Then, each slot in SyncClock must contain a dirty bit (it's united with
-// the clock value, so no space increase). The acquire algorithm looks
-// as follows:
-// void acquire(thr, tid, thr_clock, sync_clock) {
-// if (!sync_clock[tid].dirty)
-// return; // No new info to acquire.
-// // This handles constant reads of singleton pointers and
-// // stop-flags.
-// acquire_impl(thr_clock, sync_clock); // As usual, O(N).
-// sync_clock[tid].dirty = false;
-// sync_clock.dirty_count--;
-// }
-// The release operation looks as follows:
-// void release(thr, tid, thr_clock, sync_clock) {
-// // thr->sync_cache is a simple fixed-size hash-based cache that holds
-// // several previous sync_clock's.
-// if (thr->sync_cache[sync_clock] >= thr->last_acquire_epoch) {
-// // The thread did no acquire operations since last release on this clock.
-// // So update only the thread's slot (other slots can't possibly change).
-// sync_clock[tid].clock = thr->epoch;
-// if (sync_clock.dirty_count == sync_clock.cnt
-// || (sync_clock.dirty_count == sync_clock.cnt - 1
-// && sync_clock[tid].dirty == false))
-// // All dirty flags are set, bail out.
-// return;
-// set all dirty bits, but preserve the thread's bit. // O(N)
-// update sync_clock.dirty_count;
-// return;
+// SyncClock and ThreadClock implement vector clocks for sync variables
+// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
+// ThreadClock contains fixed-size vector clock for maximum number of threads.
+// SyncClock contains growable vector clock for currently necessary number of
+// threads.
+// Together they implement very simple model of operations, namely:
+//
+// void ThreadClock::acquire(const SyncClock *src) {
+// for (int i = 0; i < kMaxThreads; i++)
+// clock[i] = max(clock[i], src->clock[i]);
+// }
+//
+// void ThreadClock::release(SyncClock *dst) const {
+// for (int i = 0; i < kMaxThreads; i++)
+// dst->clock[i] = max(dst->clock[i], clock[i]);
+// }
+//
+// void ThreadClock::ReleaseStore(SyncClock *dst) const {
+// for (int i = 0; i < kMaxThreads; i++)
+// dst->clock[i] = clock[i];
+// }
+//
+// void ThreadClock::acq_rel(SyncClock *dst) {
+// acquire(dst);
+// release(dst);
// }
-// release_impl(thr_clock, sync_clock); // As usual, O(N).
-// set all dirty bits, but preserve the thread's bit.
-// // The previous step is combined with release_impl(), so that
-// // we scan the arrays only once.
-// update sync_clock.dirty_count;
-// }
+//
+// Conformance to this model is extensively verified in tsan_clock_test.cc.
+// However, the implementation is significantly more complex. The complexity
+// allows to implement important classes of use cases in O(1) instead of O(N).
+//
+// The use cases are:
+// 1. Singleton/once atomic that has a single release-store operation followed
+// by zillions of acquire-loads (the acquire-load is O(1)).
+// 2. Thread-local mutex (both lock and unlock can be O(1)).
+// 3. Leaf mutex (unlock is O(1)).
+// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
+// 5. An atomic with a single writer (writes can be O(1)).
+// The implementation dynamically adopts to workload. So if an atomic is in
+// read-only phase, these reads will be O(1); if it later switches to read/write
+// phase, the implementation will correctly handle that by switching to O(N).
+//
+// Thread-safety note: all const operations on SyncClock's are conducted under
+// a shared lock; all non-const operations on SyncClock's are conducted under
+// an exclusive lock; ThreadClock's are private to respective threads and so
+// do not need any protection.
+//
+// Description of ThreadClock state:
+// clk_ - fixed size vector clock.
+// nclk_ - effective size of the vector clock (the rest is zeros).
+// tid_ - index of the thread associated with he clock ("current thread").
+// last_acquire_ - current thread time when it acquired something from
+// other threads.
+//
+// Description of SyncClock state:
+// clk_ - variable size vector clock, low kClkBits hold timestamp,
+// the remaining bits hold "acquired" flag (the actual value is thread's
+// reused counter);
+// if acquried == thr->reused_, then the respective thread has already
+// acquired this clock (except possibly dirty_tids_).
+// dirty_tids_ - holds up to two indeces in the vector clock that other threads
+// need to acquire regardless of "acquired" flag value;
+// release_store_tid_ - denotes that the clock state is a result of
+// release-store operation by the thread with release_store_tid_ index.
+// release_store_reused_ - reuse count of release_store_tid_.
+
+// We don't have ThreadState in these methods, so this is an ugly hack that
+// works only in C++.
+#ifndef TSAN_GO
+# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
+#else
+# define CPP_STAT_INC(typ) (void)0
+#endif
namespace __tsan {
-ThreadClock::ThreadClock() {
- nclk_ = 0;
- for (uptr i = 0; i < (uptr)kMaxTidInClock; i++)
- clk_[i] = 0;
+const unsigned kInvalidTid = (unsigned)-1;
+
+ThreadClock::ThreadClock(unsigned tid, unsigned reused)
+ : tid_(tid)
+ , reused_(reused + 1) { // 0 has special meaning
+ CHECK_LT(tid, kMaxTidInClock);
+ CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
+ nclk_ = tid_ + 1;
+ last_acquire_ = 0;
+ internal_memset(clk_, 0, sizeof(clk_));
+ clk_[tid_].reused = reused_;
}
void ThreadClock::acquire(const SyncClock *src) {
DCHECK(nclk_ <= kMaxTid);
DCHECK(src->clk_.Size() <= kMaxTid);
+ CPP_STAT_INC(StatClockAcquire);
+ // Check if it's empty -> no need to do anything.
const uptr nclk = src->clk_.Size();
- if (nclk == 0)
+ if (nclk == 0) {
+ CPP_STAT_INC(StatClockAcquireEmpty);
return;
+ }
+
+ // Check if we've already acquired src after the last release operation on src
+ bool acquired = false;
+ if (nclk > tid_) {
+ CPP_STAT_INC(StatClockAcquireLarge);
+ if (src->clk_[tid_].reused == reused_) {
+ CPP_STAT_INC(StatClockAcquireRepeat);
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ unsigned tid = src->dirty_tids_[i];
+ if (tid != kInvalidTid) {
+ u64 epoch = src->clk_[tid].epoch;
+ if (clk_[tid].epoch < epoch) {
+ clk_[tid].epoch = epoch;
+ acquired = true;
+ }
+ }
+ }
+ if (acquired) {
+ CPP_STAT_INC(StatClockAcquiredSomething);
+ last_acquire_ = clk_[tid_].epoch;
+ }
+ return;
+ }
+ }
+
+ // O(N) acquire.
+ CPP_STAT_INC(StatClockAcquireFull);
nclk_ = max(nclk_, nclk);
for (uptr i = 0; i < nclk; i++) {
- if (clk_[i] < src->clk_[i])
- clk_[i] = src->clk_[i];
+ u64 epoch = src->clk_[i].epoch;
+ if (clk_[i].epoch < epoch) {
+ clk_[i].epoch = epoch;
+ acquired = true;
+ }
+ }
+
+ // Remember that this thread has acquired this clock.
+ if (nclk > tid_)
+ src->clk_[tid_].reused = reused_;
+
+ if (acquired) {
+ CPP_STAT_INC(StatClockAcquiredSomething);
+ last_acquire_ = clk_[tid_].epoch;
}
}
void ThreadClock::release(SyncClock *dst) const {
- DCHECK(nclk_ <= kMaxTid);
- DCHECK(dst->clk_.Size() <= kMaxTid);
+ DCHECK_LE(nclk_, kMaxTid);
+ DCHECK_LE(dst->clk_.Size(), kMaxTid);
- if (dst->clk_.Size() < nclk_)
+ if (dst->clk_.Size() == 0) {
+ // ReleaseStore will correctly set release_store_tid_,
+ // which can be important for future operations.
+ ReleaseStore(dst);
+ return;
+ }
+
+ CPP_STAT_INC(StatClockRelease);
+ // Check if we need to resize dst.
+ if (dst->clk_.Size() < nclk_) {
+ CPP_STAT_INC(StatClockReleaseResize);
dst->clk_.Resize(nclk_);
+ }
+
+ // Check if we had not acquired anything from other threads
+ // since the last release on dst. If so, we need to update
+ // only dst->clk_[tid_].
+ if (dst->clk_[tid_].epoch > last_acquire_) {
+ UpdateCurrentThread(dst);
+ if (dst->release_store_tid_ != tid_ ||
+ dst->release_store_reused_ != reused_)
+ dst->release_store_tid_ = kInvalidTid;
+ return;
+ }
+
+ // O(N) release.
+ CPP_STAT_INC(StatClockReleaseFull);
+ // First, remember whether we've acquired dst.
+ bool acquired = IsAlreadyAcquired(dst);
+ if (acquired)
+ CPP_STAT_INC(StatClockReleaseAcquired);
+ // Update dst->clk_.
for (uptr i = 0; i < nclk_; i++) {
- if (dst->clk_[i] < clk_[i])
- dst->clk_[i] = clk_[i];
+ dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
+ dst->clk_[i].reused = 0;
}
+ // Clear 'acquired' flag in the remaining elements.
+ if (nclk_ < dst->clk_.Size())
+ CPP_STAT_INC(StatClockReleaseClearTail);
+ for (uptr i = nclk_; i < dst->clk_.Size(); i++)
+ dst->clk_[i].reused = 0;
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dst->dirty_tids_[i] = kInvalidTid;
+ dst->release_store_tid_ = kInvalidTid;
+ dst->release_store_reused_ = 0;
+ // If we've acquired dst, remember this fact,
+ // so that we don't need to acquire it on next acquire.
+ if (acquired)
+ dst->clk_[tid_].reused = reused_;
}
void ThreadClock::ReleaseStore(SyncClock *dst) const {
DCHECK(nclk_ <= kMaxTid);
DCHECK(dst->clk_.Size() <= kMaxTid);
+ CPP_STAT_INC(StatClockStore);
- if (dst->clk_.Size() < nclk_)
+ // Check if we need to resize dst.
+ if (dst->clk_.Size() < nclk_) {
+ CPP_STAT_INC(StatClockStoreResize);
dst->clk_.Resize(nclk_);
- for (uptr i = 0; i < nclk_; i++)
- dst->clk_[i] = clk_[i];
- for (uptr i = nclk_; i < dst->clk_.Size(); i++)
- dst->clk_[i] = 0;
+ }
+
+ if (dst->release_store_tid_ == tid_ &&
+ dst->release_store_reused_ == reused_ &&
+ dst->clk_[tid_].epoch > last_acquire_) {
+ CPP_STAT_INC(StatClockStoreFast);
+ UpdateCurrentThread(dst);
+ return;
+ }
+
+ // O(N) release-store.
+ CPP_STAT_INC(StatClockStoreFull);
+ for (uptr i = 0; i < nclk_; i++) {
+ dst->clk_[i].epoch = clk_[i].epoch;
+ dst->clk_[i].reused = 0;
+ }
+ // Clear the tail of dst->clk_.
+ if (nclk_ < dst->clk_.Size()) {
+ internal_memset(&dst->clk_[nclk_], 0,
+ (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
+ CPP_STAT_INC(StatClockStoreTail);
+ }
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dst->dirty_tids_[i] = kInvalidTid;
+ dst->release_store_tid_ = tid_;
+ dst->release_store_reused_ = reused_;
+ // Rememeber that we don't need to acquire it in future.
+ dst->clk_[tid_].reused = reused_;
}
void ThreadClock::acq_rel(SyncClock *dst) {
+ CPP_STAT_INC(StatClockAcquireRelease);
acquire(dst);
- release(dst);
+ ReleaseStore(dst);
+}
+
+// Updates only single element related to the current thread in dst->clk_.
+void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
+ // Update the threads time, but preserve 'acquired' flag.
+ dst->clk_[tid_].epoch = clk_[tid_].epoch;
+
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ if (dst->dirty_tids_[i] == tid_) {
+ CPP_STAT_INC(StatClockReleaseFast1);
+ return;
+ }
+ if (dst->dirty_tids_[i] == kInvalidTid) {
+ CPP_STAT_INC(StatClockReleaseFast2);
+ dst->dirty_tids_[i] = tid_;
+ return;
+ }
+ }
+ // Reset all 'acquired' flags, O(N).
+ CPP_STAT_INC(StatClockReleaseSlow);
+ for (uptr i = 0; i < dst->clk_.Size(); i++) {
+ dst->clk_[i].reused = 0;
+ }
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dst->dirty_tids_[i] = kInvalidTid;
+}
+
+// Checks whether the current threads has already acquired src.
+bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
+ if (src->clk_[tid_].reused != reused_)
+ return false;
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ unsigned tid = src->dirty_tids_[i];
+ if (tid != kInvalidTid) {
+ if (clk_[tid].epoch < src->clk_[tid].epoch)
+ return false;
+ }
+ }
+ return true;
+}
+
+// Sets a single element in the vector clock.
+// This function is called only from weird places like AcquireGlobal.
+void ThreadClock::set(unsigned tid, u64 v) {
+ DCHECK_LT(tid, kMaxTid);
+ DCHECK_GE(v, clk_[tid].epoch);
+ clk_[tid].epoch = v;
+ if (nclk_ <= tid)
+ nclk_ = tid + 1;
+ last_acquire_ = clk_[tid_].epoch;
+}
+
+void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
+ printf("clock=[");
+ for (uptr i = 0; i < nclk_; i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+ printf("] reused=[");
+ for (uptr i = 0; i < nclk_; i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+ printf("] tid=%u/%u last_acq=%llu",
+ tid_, reused_, last_acquire_);
}
SyncClock::SyncClock()
- : clk_(MBlockClock) {
+ : clk_(MBlockClock) {
+ release_store_tid_ = kInvalidTid;
+ release_store_reused_ = 0;
+ for (uptr i = 0; i < kDirtyTids; i++)
+ dirty_tids_[i] = kInvalidTid;
+}
+
+void SyncClock::Reset() {
+ clk_.Reset();
+ release_store_tid_ = kInvalidTid;
+ release_store_reused_ = 0;
+ for (uptr i = 0; i < kDirtyTids; i++)
+ dirty_tids_[i] = kInvalidTid;
+}
+
+void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
+ printf("clock=[");
+ for (uptr i = 0; i < clk_.Size(); i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+ printf("] reused=[");
+ for (uptr i = 0; i < clk_.Size(); i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+ printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
+ release_store_tid_, release_store_reused_,
+ dirty_tids_[0], dirty_tids_[1]);
}
} // namespace __tsan