xref: /openbmc/qemu/util/qsp.c (revision 0cb113ad1aa94d09dce97c32eb1347016c18aa64)
1  /*
2   * qsp.c - QEMU Synchronization Profiler
3   *
4   * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
5   *
6   * License: GNU GPL, version 2 or later.
7   *   See the COPYING file in the top-level directory.
8   *
9   * QSP profiles the time spent in synchronization primitives, which can
10   * help diagnose performance problems, e.g. scalability issues when
11   * contention is high.
12   *
13   * The primitives currently supported are mutexes, recursive mutexes and
14   * condition variables. Note that not all related functions are intercepted;
15   * instead we profile only those functions that can have a performance impact,
16   * either due to blocking (e.g. cond_wait, mutex_lock) or cache line
17   * contention (e.g. mutex_lock, mutex_trylock).
18   *
19   * QSP's design focuses on speed and scalability. This is achieved
20   * by having threads do their profiling entirely on thread-local data.
21   * The appropriate thread-local data is found via a QHT, i.e. a concurrent hash
22   * table. To aggregate data in order to generate a report, we iterate over
23   * all entries in the hash table. Depending on the number of threads and
24   * synchronization objects this might be expensive, but note that it is
25   * very rarely called -- reports are generated only when requested by users.
26   *
27   * Reports are generated as a table where each row represents a call site. A
28   * call site is the triplet formed by the __file__ and __LINE__ of the caller
29   * as well as the address of the "object" (i.e. mutex, rec. mutex or condvar)
30   * being operated on. Optionally, call sites that operate on different objects
31   * of the same type can be coalesced, which can be particularly useful when
32   * profiling dynamically-allocated objects.
33   *
34   * Alternative designs considered:
35   *
36   * - Use an off-the-shelf profiler such as mutrace. This is not a viable option
37   *   for us because QEMU has __malloc_hook set (by one of the libraries it
38   *   uses); leaving this hook unset is required to avoid deadlock in mutrace.
39   *
40   * - Use a glib HT for each thread, protecting each HT with its own lock.
41   *   This isn't simpler than the current design, and is 10% slower in the
42   *   atomic_add-bench microbenchmark (-m option).
43   *
44   * - For reports, just use a binary tree as we aggregate data, instead of having
45   *   an intermediate hash table. This would simplify the code only slightly, but
46   *   would perform badly if there were many threads and objects to track.
47   *
48   * - Wrap operations on qsp entries with RCU read-side critical sections, so
49   *   that qsp_reset() can delete entries. Unfortunately, the overhead of calling
50   *   rcu_read_lock/unlock slows down atomic_add-bench -m by 24%. Having
51   *   a snapshot that is updated on qsp_reset() avoids this overhead.
52   *
53   * Related Work:
54   * - Lennart Poettering's mutrace: http://0pointer.de/blog/projects/mutrace.html
55   * - Lozi, David, Thomas, Lawall and Muller. "Remote Core Locking: Migrating
56   *   Critical-Section Execution to Improve the Performance of Multithreaded
57   *   Applications", USENIX ATC'12.
58   */
59  
60  #include "qemu/osdep.h"
61  #include "qemu/qemu-print.h"
62  #include "qemu/thread.h"
63  #include "qemu/timer.h"
64  #include "qemu/qht.h"
65  #include "qemu/rcu.h"
66  #include "qemu/xxhash.h"
67  
68  enum QSPType {
69      QSP_MUTEX,
70      QSP_BQL_MUTEX,
71      QSP_REC_MUTEX,
72      QSP_CONDVAR,
73  };
74  
75  struct QSPCallSite {
76      const void *obj;
77      const char *file; /* i.e. __FILE__; shortened later */
78      int line;
79      enum QSPType type;
80  };
81  typedef struct QSPCallSite QSPCallSite;
82  
83  struct QSPEntry {
84      void *thread_ptr;
85      const QSPCallSite *callsite;
86      uint64_t n_acqs;
87      uint64_t ns;
88      unsigned int n_objs; /* count of coalesced objs; only used for reporting */
89  };
90  typedef struct QSPEntry QSPEntry;
91  
92  struct QSPSnapshot {
93      struct rcu_head rcu;
94      struct qht ht;
95  };
96  typedef struct QSPSnapshot QSPSnapshot;
97  
98  /* initial sizing for hash tables */
99  #define QSP_INITIAL_SIZE 64
100  
101  /* If this file is moved, QSP_REL_PATH should be updated accordingly */
102  #define QSP_REL_PATH "util/qsp.c"
103  
104  /* this file's full path. Used to present all call sites with relative paths */
105  static size_t qsp_qemu_path_len;
106  
107  /* the address of qsp_thread gives us a unique 'thread ID' */
108  static __thread int qsp_thread;
109  
110  /*
111   * Call sites are the same for all threads, so we track them in a separate hash
112   * table to save memory.
113   */
114  static struct qht qsp_callsite_ht;
115  
116  static struct qht qsp_ht;
117  static QSPSnapshot *qsp_snapshot;
118  static bool qsp_initialized, qsp_initializing;
119  
120  static const char * const qsp_typenames[] = {
121      [QSP_MUTEX]     = "mutex",
122      [QSP_BQL_MUTEX] = "BQL mutex",
123      [QSP_REC_MUTEX] = "rec_mutex",
124      [QSP_CONDVAR]   = "condvar",
125  };
126  
127  QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl;
128  QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl;
129  QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl;
130  QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl;
131  QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func =
132      qemu_rec_mutex_trylock_impl;
133  QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl;
134  QemuCondTimedWaitFunc qemu_cond_timedwait_func = qemu_cond_timedwait_impl;
135  
136  /*
137   * It pays off to _not_ hash callsite->file; hashing a string is slow, and
138   * without it we still get a pretty unique hash.
139   */
140  static inline
141  uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab)
142  {
143      uint64_t cd = (uint64_t)(uintptr_t)callsite->obj;
144      uint32_t e = callsite->line;
145      uint32_t f = callsite->type;
146  
147      return qemu_xxhash6(ab, cd, e, f);
148  }
149  
150  static inline
151  uint32_t qsp_callsite_hash(const QSPCallSite *callsite)
152  {
153      return do_qsp_callsite_hash(callsite, 0);
154  }
155  
156  static inline uint32_t do_qsp_entry_hash(const QSPEntry *entry, uint64_t a)
157  {
158      return do_qsp_callsite_hash(entry->callsite, a);
159  }
160  
161  static uint32_t qsp_entry_hash(const QSPEntry *entry)
162  {
163      return do_qsp_entry_hash(entry, (uint64_t)(uintptr_t)entry->thread_ptr);
164  }
165  
166  static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry)
167  {
168      return do_qsp_entry_hash(entry, 0);
169  }
170  
171  /* without the objects we need to hash the file name to get a decent hash */
172  static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
173  {
174      const QSPCallSite *callsite = entry->callsite;
175      uint64_t ab = g_str_hash(callsite->file);
176      uint64_t cd = callsite->line;
177      uint32_t e = callsite->type;
178  
179      return qemu_xxhash5(ab, cd, e);
180  }
181  
182  static bool qsp_callsite_cmp(const void *ap, const void *bp)
183  {
184      const QSPCallSite *a = ap;
185      const QSPCallSite *b = bp;
186  
187      return a == b ||
188          (a->obj == b->obj &&
189           a->line == b->line &&
190           a->type == b->type &&
191           (a->file == b->file || !strcmp(a->file, b->file)));
192  }
193  
194  static bool qsp_callsite_no_obj_cmp(const void *ap, const void *bp)
195  {
196      const QSPCallSite *a = ap;
197      const QSPCallSite *b = bp;
198  
199      return a == b ||
200          (a->line == b->line &&
201           a->type == b->type &&
202           (a->file == b->file || !strcmp(a->file, b->file)));
203  }
204  
205  static bool qsp_entry_no_thread_cmp(const void *ap, const void *bp)
206  {
207      const QSPEntry *a = ap;
208      const QSPEntry *b = bp;
209  
210      return qsp_callsite_cmp(a->callsite, b->callsite);
211  }
212  
213  static bool qsp_entry_no_thread_obj_cmp(const void *ap, const void *bp)
214  {
215      const QSPEntry *a = ap;
216      const QSPEntry *b = bp;
217  
218      return qsp_callsite_no_obj_cmp(a->callsite, b->callsite);
219  }
220  
221  static bool qsp_entry_cmp(const void *ap, const void *bp)
222  {
223      const QSPEntry *a = ap;
224      const QSPEntry *b = bp;
225  
226      return a->thread_ptr == b->thread_ptr &&
227          qsp_callsite_cmp(a->callsite, b->callsite);
228  }
229  
230  /*
231   * Normally we'd call this from a constructor function, but we want it to work
232   * via libutil as well.
233   */
234  static void qsp_do_init(void)
235  {
236      /* make sure this file's path in the tree is up to date with QSP_REL_PATH */
237      g_assert(strstr(__FILE__, QSP_REL_PATH));
238      qsp_qemu_path_len = strlen(__FILE__) - strlen(QSP_REL_PATH);
239  
240      qht_init(&qsp_ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
241               QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
242      qht_init(&qsp_callsite_ht, qsp_callsite_cmp, QSP_INITIAL_SIZE,
243               QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
244  }
245  
246  static __attribute__((noinline)) void qsp_init__slowpath(void)
247  {
248      if (qatomic_cmpxchg(&qsp_initializing, false, true) == false) {
249          qsp_do_init();
250          qatomic_set(&qsp_initialized, true);
251      } else {
252          while (!qatomic_read(&qsp_initialized)) {
253              cpu_relax();
254          }
255      }
256  }
257  
258  /* qsp_init() must be called from _all_ exported functions */
259  static inline void qsp_init(void)
260  {
261      if (likely(qatomic_read(&qsp_initialized))) {
262          return;
263      }
264      qsp_init__slowpath();
265  }
266  
267  static QSPCallSite *qsp_callsite_find(const QSPCallSite *orig)
268  {
269      QSPCallSite *callsite;
270      uint32_t hash;
271  
272      hash = qsp_callsite_hash(orig);
273      callsite = qht_lookup(&qsp_callsite_ht, orig, hash);
274      if (callsite == NULL) {
275          void *existing = NULL;
276  
277          callsite = g_new(QSPCallSite, 1);
278          memcpy(callsite, orig, sizeof(*callsite));
279          qht_insert(&qsp_callsite_ht, callsite, hash, &existing);
280          if (unlikely(existing)) {
281              g_free(callsite);
282              callsite = existing;
283          }
284      }
285      return callsite;
286  }
287  
288  static QSPEntry *
289  qsp_entry_create(struct qht *ht, const QSPEntry *entry, uint32_t hash)
290  {
291      QSPEntry *e;
292      void *existing = NULL;
293  
294      e = g_new0(QSPEntry, 1);
295      e->thread_ptr = entry->thread_ptr;
296      e->callsite = qsp_callsite_find(entry->callsite);
297  
298      qht_insert(ht, e, hash, &existing);
299      if (unlikely(existing)) {
300          g_free(e);
301          e = existing;
302      }
303      return e;
304  }
305  
306  static QSPEntry *
307  qsp_entry_find(struct qht *ht, const QSPEntry *entry, uint32_t hash)
308  {
309      QSPEntry *e;
310  
311      e = qht_lookup(ht, entry, hash);
312      if (e == NULL) {
313          e = qsp_entry_create(ht, entry, hash);
314      }
315      return e;
316  }
317  
318  /*
319   * Note: Entries are never removed, so callers do not have to be in an RCU
320   * read-side critical section.
321   */
322  static QSPEntry *qsp_entry_get(const void *obj, const char *file, int line,
323                                 enum QSPType type)
324  {
325      QSPCallSite callsite = {
326          .obj = obj,
327          .file = file,
328          .line = line,
329          .type = type,
330      };
331      QSPEntry orig;
332      uint32_t hash;
333  
334      qsp_init();
335  
336      orig.thread_ptr = &qsp_thread;
337      orig.callsite = &callsite;
338  
339      hash = qsp_entry_hash(&orig);
340      return qsp_entry_find(&qsp_ht, &orig, hash);
341  }
342  
343  /*
344   * @e is in the global hash table; it is only written to by the current thread,
345   * so we write to it atomically (as in "write once") to prevent torn reads.
346   */
347  static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
348  {
349      qatomic_set_u64(&e->ns, e->ns + delta);
350      if (acq) {
351          qatomic_set_u64(&e->n_acqs, e->n_acqs + 1);
352      }
353  }
354  
355  static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
356  {
357      do_qsp_entry_record(e, delta, true);
358  }
359  
360  #define QSP_GEN_VOID(type_, qsp_t_, func_, impl_)                       \
361      static void func_(type_ *obj, const char *file, int line)           \
362      {                                                                   \
363          QSPEntry *e;                                                    \
364          int64_t t0, t1;                                                 \
365                                                                          \
366          t0 = get_clock();                                               \
367          impl_(obj, file, line);                                         \
368          t1 = get_clock();                                               \
369                                                                          \
370          e = qsp_entry_get(obj, file, line, qsp_t_);                     \
371          qsp_entry_record(e, t1 - t0);                                   \
372      }
373  
374  #define QSP_GEN_RET1(type_, qsp_t_, func_, impl_)                       \
375      static int func_(type_ *obj, const char *file, int line)            \
376      {                                                                   \
377          QSPEntry *e;                                                    \
378          int64_t t0, t1;                                                 \
379          int err;                                                        \
380                                                                          \
381          t0 = get_clock();                                               \
382          err = impl_(obj, file, line);                                   \
383          t1 = get_clock();                                               \
384                                                                          \
385          e = qsp_entry_get(obj, file, line, qsp_t_);                     \
386          do_qsp_entry_record(e, t1 - t0, !err);                          \
387          return err;                                                     \
388      }
389  
390  QSP_GEN_VOID(QemuMutex, QSP_BQL_MUTEX, qsp_bql_mutex_lock, qemu_mutex_lock_impl)
391  QSP_GEN_VOID(QemuMutex, QSP_MUTEX, qsp_mutex_lock, qemu_mutex_lock_impl)
392  QSP_GEN_RET1(QemuMutex, QSP_MUTEX, qsp_mutex_trylock, qemu_mutex_trylock_impl)
393  
394  QSP_GEN_VOID(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_lock,
395               qemu_rec_mutex_lock_impl)
396  QSP_GEN_RET1(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_trylock,
397               qemu_rec_mutex_trylock_impl)
398  
399  #undef QSP_GEN_RET1
400  #undef QSP_GEN_VOID
401  
402  static void
403  qsp_cond_wait(QemuCond *cond, QemuMutex *mutex, const char *file, int line)
404  {
405      QSPEntry *e;
406      int64_t t0, t1;
407  
408      t0 = get_clock();
409      qemu_cond_wait_impl(cond, mutex, file, line);
410      t1 = get_clock();
411  
412      e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
413      qsp_entry_record(e, t1 - t0);
414  }
415  
416  static bool
417  qsp_cond_timedwait(QemuCond *cond, QemuMutex *mutex, int ms,
418                     const char *file, int line)
419  {
420      QSPEntry *e;
421      int64_t t0, t1;
422      bool ret;
423  
424      t0 = get_clock();
425      ret = qemu_cond_timedwait_impl(cond, mutex, ms, file, line);
426      t1 = get_clock();
427  
428      e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
429      qsp_entry_record(e, t1 - t0);
430      return ret;
431  }
432  
433  bool qsp_is_enabled(void)
434  {
435      return qatomic_read(&qemu_mutex_lock_func) == qsp_mutex_lock;
436  }
437  
438  void qsp_enable(void)
439  {
440      qatomic_set(&qemu_mutex_lock_func, qsp_mutex_lock);
441      qatomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock);
442      qatomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock);
443      qatomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock);
444      qatomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock);
445      qatomic_set(&qemu_cond_wait_func, qsp_cond_wait);
446      qatomic_set(&qemu_cond_timedwait_func, qsp_cond_timedwait);
447  }
448  
449  void qsp_disable(void)
450  {
451      qatomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl);
452      qatomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl);
453      qatomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl);
454      qatomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl);
455      qatomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl);
456      qatomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl);
457      qatomic_set(&qemu_cond_timedwait_func, qemu_cond_timedwait_impl);
458  }
459  
460  static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up)
461  {
462      const QSPEntry *a = ap;
463      const QSPEntry *b = bp;
464      enum QSPSortBy sort_by = *(enum QSPSortBy *)up;
465      const QSPCallSite *ca;
466      const QSPCallSite *cb;
467  
468      switch (sort_by) {
469      case QSP_SORT_BY_TOTAL_WAIT_TIME:
470          if (a->ns > b->ns) {
471              return -1;
472          } else if (a->ns < b->ns) {
473              return 1;
474          }
475          break;
476      case QSP_SORT_BY_AVG_WAIT_TIME:
477      {
478          double avg_a = a->n_acqs ? a->ns / a->n_acqs : 0;
479          double avg_b = b->n_acqs ? b->ns / b->n_acqs : 0;
480  
481          if (avg_a > avg_b) {
482              return -1;
483          } else if (avg_a < avg_b) {
484              return 1;
485          }
486          break;
487      }
488      default:
489          g_assert_not_reached();
490      }
491  
492      ca = a->callsite;
493      cb = b->callsite;
494      /* Break the tie with the object's address */
495      if (ca->obj < cb->obj) {
496          return -1;
497      } else if (ca->obj > cb->obj) {
498          return 1;
499      } else {
500          int cmp;
501  
502          /* same obj. Break the tie with the callsite's file */
503          cmp = strcmp(ca->file, cb->file);
504          if (cmp) {
505              return cmp;
506          }
507          /* same callsite file. Break the tie with the callsite's line */
508          g_assert(ca->line != cb->line);
509          if (ca->line < cb->line) {
510              return -1;
511          } else if (ca->line > cb->line) {
512              return 1;
513          } else {
514              /* break the tie with the callsite's type */
515              return cb->type - ca->type;
516          }
517      }
518  }
519  
520  static void qsp_sort(void *p, uint32_t h, void *userp)
521  {
522      QSPEntry *e = p;
523      GTree *tree = userp;
524  
525      g_tree_insert(tree, e, NULL);
526  }
527  
528  static void qsp_aggregate(void *p, uint32_t h, void *up)
529  {
530      struct qht *ht = up;
531      const QSPEntry *e = p;
532      QSPEntry *agg;
533      uint32_t hash;
534  
535      hash = qsp_entry_no_thread_hash(e);
536      agg = qsp_entry_find(ht, e, hash);
537      /*
538       * The entry is in the global hash table; read from it atomically (as in
539       * "read once").
540       */
541      agg->ns += qatomic_read_u64(&e->ns);
542      agg->n_acqs += qatomic_read_u64(&e->n_acqs);
543  }
544  
545  static void qsp_iter_diff(void *p, uint32_t hash, void *htp)
546  {
547      struct qht *ht = htp;
548      QSPEntry *old = p;
549      QSPEntry *new;
550  
551      new = qht_lookup(ht, old, hash);
552      /* entries are never deleted, so we must have this one */
553      g_assert(new != NULL);
554      /* our reading of the stats happened after the snapshot was taken */
555      g_assert(new->n_acqs >= old->n_acqs);
556      g_assert(new->ns >= old->ns);
557  
558      new->n_acqs -= old->n_acqs;
559      new->ns -= old->ns;
560  
561      /* No point in reporting an empty entry */
562      if (new->n_acqs == 0 && new->ns == 0) {
563          bool removed = qht_remove(ht, new, hash);
564  
565          g_assert(removed);
566          g_free(new);
567      }
568  }
569  
570  static void qsp_diff(struct qht *orig, struct qht *new)
571  {
572      qht_iter(orig, qsp_iter_diff, new);
573  }
574  
575  static void qsp_iter_callsite_coalesce(void *p, uint32_t h, void *htp)
576  {
577      struct qht *ht = htp;
578      QSPEntry *old = p;
579      QSPEntry *e;
580      uint32_t hash;
581  
582      hash = qsp_entry_no_thread_obj_hash(old);
583      e = qht_lookup(ht, old, hash);
584      if (e == NULL) {
585          e = qsp_entry_create(ht, old, hash);
586          e->n_objs = 1;
587      } else if (e->callsite->obj != old->callsite->obj) {
588          e->n_objs++;
589      }
590      e->ns += old->ns;
591      e->n_acqs += old->n_acqs;
592  }
593  
594  static void qsp_ht_delete(void *p, uint32_t h, void *htp)
595  {
596      g_free(p);
597  }
598  
599  static void qsp_mktree(GTree *tree, bool callsite_coalesce)
600  {
601      struct qht ht, coalesce_ht;
602      struct qht *htp;
603  
604      /*
605       * First, see if there's a prior snapshot, so that we read the global hash
606       * table _after_ the snapshot has been created, which guarantees that
607       * the entries we'll read will be a superset of the snapshot's entries.
608       *
609       * We must remain in an RCU read-side critical section until we're done
610       * with the snapshot.
611       */
612      WITH_RCU_READ_LOCK_GUARD() {
613          QSPSnapshot *snap = qatomic_rcu_read(&qsp_snapshot);
614  
615          /* Aggregate all results from the global hash table into a local one */
616          qht_init(&ht, qsp_entry_no_thread_cmp, QSP_INITIAL_SIZE,
617                   QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
618          qht_iter(&qsp_ht, qsp_aggregate, &ht);
619  
620          /* compute the difference wrt the snapshot, if any */
621          if (snap) {
622              qsp_diff(&snap->ht, &ht);
623          }
624      }
625  
626      htp = &ht;
627      if (callsite_coalesce) {
628          qht_init(&coalesce_ht, qsp_entry_no_thread_obj_cmp, QSP_INITIAL_SIZE,
629                   QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
630          qht_iter(&ht, qsp_iter_callsite_coalesce, &coalesce_ht);
631  
632          /* free the previous hash table, and point htp to coalesce_ht */
633          qht_iter(&ht, qsp_ht_delete, NULL);
634          qht_destroy(&ht);
635          htp = &coalesce_ht;
636      }
637  
638      /* sort the hash table elements by using a tree */
639      qht_iter(htp, qsp_sort, tree);
640  
641      /* free the hash table, but keep the elements (those are in the tree now) */
642      qht_destroy(htp);
643  }
644  
645  /* free string with g_free */
646  static char *qsp_at(const QSPCallSite *callsite)
647  {
648      GString *s = g_string_new(NULL);
649      const char *shortened;
650  
651      /* remove the absolute path to qemu */
652      if (unlikely(strlen(callsite->file) < qsp_qemu_path_len)) {
653          shortened = callsite->file;
654      } else {
655          shortened = callsite->file + qsp_qemu_path_len;
656      }
657      g_string_append_printf(s, "%s:%u", shortened, callsite->line);
658      return g_string_free(s, FALSE);
659  }
660  
661  struct QSPReportEntry {
662      const void *obj;
663      char *callsite_at;
664      const char *typename;
665      double time_s;
666      double ns_avg;
667      uint64_t n_acqs;
668      unsigned int n_objs;
669  };
670  typedef struct QSPReportEntry QSPReportEntry;
671  
672  struct QSPReport {
673      QSPReportEntry *entries;
674      size_t n_entries;
675      size_t max_n_entries;
676  };
677  typedef struct QSPReport QSPReport;
678  
679  static gboolean qsp_tree_report(gpointer key, gpointer value, gpointer udata)
680  {
681      const QSPEntry *e = key;
682      QSPReport *report = udata;
683      QSPReportEntry *entry;
684  
685      if (report->n_entries == report->max_n_entries) {
686          return TRUE;
687      }
688      entry = &report->entries[report->n_entries];
689      report->n_entries++;
690  
691      entry->obj = e->callsite->obj;
692      entry->n_objs = e->n_objs;
693      entry->callsite_at = qsp_at(e->callsite);
694      entry->typename = qsp_typenames[e->callsite->type];
695      entry->time_s = e->ns * 1e-9;
696      entry->n_acqs = e->n_acqs;
697      entry->ns_avg = e->n_acqs ? e->ns / e->n_acqs : 0;
698      return FALSE;
699  }
700  
701  static void pr_report(const QSPReport *rep)
702  {
703      char *dashes;
704      size_t max_len = 0;
705      int callsite_len = 0;
706      int callsite_rspace;
707      int n_dashes;
708      size_t i;
709  
710      /* find out the maximum length of all 'callsite' fields */
711      for (i = 0; i < rep->n_entries; i++) {
712          const QSPReportEntry *e = &rep->entries[i];
713          size_t len = strlen(e->callsite_at);
714  
715          if (len > max_len) {
716              max_len = len;
717          }
718      }
719  
720      callsite_len = MAX(max_len, strlen("Call site"));
721      /* white space to leave to the right of "Call site" */
722      callsite_rspace = callsite_len - strlen("Call site");
723  
724      qemu_printf("Type               Object  Call site%*s  Wait Time (s)  "
725                  "       Count  Average (us)\n", callsite_rspace, "");
726  
727      /* build a horizontal rule with dashes */
728      n_dashes = 79 + callsite_rspace;
729      dashes = g_malloc(n_dashes + 1);
730      memset(dashes, '-', n_dashes);
731      dashes[n_dashes] = '\0';
732      qemu_printf("%s\n", dashes);
733  
734      for (i = 0; i < rep->n_entries; i++) {
735          const QSPReportEntry *e = &rep->entries[i];
736          GString *s = g_string_new(NULL);
737  
738          g_string_append_printf(s, "%-9s  ", e->typename);
739          if (e->n_objs > 1) {
740              g_string_append_printf(s, "[%12u]", e->n_objs);
741          } else {
742              g_string_append_printf(s, "%14p", e->obj);
743          }
744          g_string_append_printf(s, "  %s%*s  %13.5f  %12" PRIu64 "  %12.2f\n",
745                                 e->callsite_at,
746                                 callsite_len - (int)strlen(e->callsite_at), "",
747                                 e->time_s, e->n_acqs, e->ns_avg * 1e-3);
748          qemu_printf("%s", s->str);
749          g_string_free(s, TRUE);
750      }
751  
752      qemu_printf("%s\n", dashes);
753      g_free(dashes);
754  }
755  
756  static void report_destroy(QSPReport *rep)
757  {
758      size_t i;
759  
760      for (i = 0; i < rep->n_entries; i++) {
761          QSPReportEntry *e = &rep->entries[i];
762  
763          g_free(e->callsite_at);
764      }
765      g_free(rep->entries);
766  }
767  
768  void qsp_report(size_t max, enum QSPSortBy sort_by,
769                  bool callsite_coalesce)
770  {
771      GTree *tree = g_tree_new_full(qsp_tree_cmp, &sort_by, g_free, NULL);
772      QSPReport rep;
773  
774      qsp_init();
775  
776      rep.entries = g_new0(QSPReportEntry, max);
777      rep.n_entries = 0;
778      rep.max_n_entries = max;
779  
780      qsp_mktree(tree, callsite_coalesce);
781      g_tree_foreach(tree, qsp_tree_report, &rep);
782      g_tree_destroy(tree);
783  
784      pr_report(&rep);
785      report_destroy(&rep);
786  }
787  
788  static void qsp_snapshot_destroy(QSPSnapshot *snap)
789  {
790      qht_iter(&snap->ht, qsp_ht_delete, NULL);
791      qht_destroy(&snap->ht);
792      g_free(snap);
793  }
794  
795  void qsp_reset(void)
796  {
797      QSPSnapshot *new = g_new(QSPSnapshot, 1);
798      QSPSnapshot *old;
799  
800      qsp_init();
801  
802      qht_init(&new->ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
803               QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
804  
805      /* take a snapshot of the current state */
806      qht_iter(&qsp_ht, qsp_aggregate, &new->ht);
807  
808      /* replace the previous snapshot, if any */
809      old = qatomic_xchg(&qsp_snapshot, new);
810      if (old) {
811          call_rcu(old, qsp_snapshot_destroy, rcu);
812      }
813  }
814