qsp: QEMU's Synchronization Profiler

The goal of this module is to profile synchronization primitives (i.e. mutexes, recursive mutexes and condition variables) so that scalability issues can be quickly diagnosed. Sync primitives are profiled by QSP based on the vaddr of the object accessed as well as the call site (file:line_nr). That means the same object called from two different call sites will be tracked in separate entries, which might be reported together or separately (see subsequent commit on call site coalescing). Some perf numbers: Host: Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz Command: taskset -c 0 tests/atomic_add-bench -d 5 -m - Before: 54.80 Mops/s - After: 54.75 Mops/s That is, a negligible slowdown due to the now indirect call to qemu_mutex_lock. Note that using a branch instead of an indirect call introduces a more severe slowdown (53.65 Mops/s, i.e. 2% slowdown). Enabling the profiler (with -p, added in this series) is more interesting: - No profiling: 54.75 Mops/s - W/ profiling: 12.53 Mops/s That is, a 4.36X slowdown. We can break down this slowdown by removing the get_clock calls or the entry lookup: - No profiling: 54.75 Mops/s - W/o get_clock: 25.37 Mops/s - W/o entry lookup: 19.30 Mops/s - W/ profiling: 12.53 Mops/s Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Emilio G. Cota <cota@braap.org> 2017-08-08 13:53:15 -0400
committer: Paolo Bonzini <pbonzini@redhat.com> 2018-08-23 18:46:25 +0200
commit: fe9959a275fc7b4744e49201a390627b3adda597 (patch)
tree: b6d4d8e7b58866e9fa1ffd3a76ec1a1581065cc0 /util/qht.c
parent: c04649eeeaf5f84ba9e43d0c4ffbe1719b0d940c (diff)
1 files changed, 37 insertions, 10 deletions
diff --git a/util/qht.c b/util/qht.c
index c138777a9c..1e3a072e25 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -90,6 +90,33 @@
 #endif
 
 /*
+ * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise
+ * the profiler (QSP) will deadlock.
+ */
+static inline void qht_lock(struct qht *ht)
+{
+    if (ht->mode & QHT_MODE_RAW_MUTEXES) {
+        qemu_mutex_lock__raw(&ht->lock);
+    } else {
+        qemu_mutex_lock(&ht->lock);
+    }
+}
+
+static inline int qht_trylock(struct qht *ht)
+{
+    if (ht->mode & QHT_MODE_RAW_MUTEXES) {
+        return qemu_mutex_trylock__raw(&(ht)->lock);
+    }
+    return qemu_mutex_trylock(&(ht)->lock);
+}
+
+/* this inline is not really necessary, but it helps keep code consistent */
+static inline void qht_unlock(struct qht *ht)
+{
+    qemu_mutex_unlock(&ht->lock);
+}
+
+/*
  * Note: reading partially-updated pointers in @pointers could lead to
  * segfaults. We thus access them with atomic_read/set; this guarantees
  * that the compiler makes all those accesses atomic. We also need the
@@ -254,10 +281,10 @@ void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
     qht_map_unlock_buckets(map);
 
     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     map = ht->map;
     qht_map_lock_buckets(map);
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
     *pmap = map;
     return;
 }
@@ -288,11 +315,11 @@ struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
     qemu_spin_unlock(&b->lock);
 
     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     map = ht->map;
     b = qht_map_to_bucket(map, hash);
     qemu_spin_lock(&b->lock);
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
     *pmap = map;
     return b;
 }
@@ -430,13 +457,13 @@ bool qht_reset_size(struct qht *ht, size_t n_elems)
 
     n_buckets = qht_elems_to_buckets(n_elems);
 
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     map = ht->map;
     if (n_buckets != map->n_buckets) {
         new = qht_map_create(n_buckets);
     }
     qht_do_resize_and_reset(ht, new);
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
 
     return !!new;
 }
@@ -565,7 +592,7 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
      * If the lock is taken it probably means there's an ongoing resize,
      * so bail out.
      */
-    if (qemu_mutex_trylock(&ht->lock)) {
+    if (qht_trylock(ht)) {
         return;
     }
     map = ht->map;
@@ -575,7 +602,7 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
 
         qht_do_resize(ht, new);
     }
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
 }
 
 bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
@@ -788,7 +815,7 @@ bool qht_resize(struct qht *ht, size_t n_elems)
     size_t n_buckets = qht_elems_to_buckets(n_elems);
     size_t ret = false;
 
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     if (n_buckets != ht->map->n_buckets) {
         struct qht_map *new;
 
@@ -796,7 +823,7 @@ bool qht_resize(struct qht *ht, size_t n_elems)
         qht_do_resize(ht, new);
         ret = true;
     }
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
 
     return ret;
 }
author	Emilio G. Cota <cota@braap.org>	2017-08-08 13:53:15 -0400
committer	Paolo Bonzini <pbonzini@redhat.com>	2018-08-23 18:46:25 +0200
commit	fe9959a275fc7b4744e49201a390627b3adda597 (patch)
tree	b6d4d8e7b58866e9fa1ffd3a76ec1a1581065cc0 /util/qht.c
parent	c04649eeeaf5f84ba9e43d0c4ffbe1719b0d940c (diff)