xref: /openbmc/qemu/util/qht.c (revision 68f7b2be)
1 /*
2  * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads.
3  *
4  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5  *
6  * License: GNU GPL, version 2 or later.
7  *   See the COPYING file in the top-level directory.
8  *
9  * Assumptions:
10  * - NULL cannot be inserted/removed as a pointer value.
11  * - Trying to insert an already-existing hash-pointer pair is OK. However,
12  *   it is not OK to insert into the same hash table different hash-pointer
13  *   pairs that have the same pointer value, but not the hashes.
14  * - Lookups are performed under an RCU read-critical section; removals
15  *   must wait for a grace period to elapse before freeing removed objects.
16  *
17  * Features:
18  * - Reads (i.e. lookups and iterators) can be concurrent with other reads.
19  *   Lookups that are concurrent with writes to the same bucket will retry
20  *   via a seqlock; iterators acquire all bucket locks and therefore can be
21  *   concurrent with lookups and are serialized wrt writers.
22  * - Writes (i.e. insertions/removals) can be concurrent with writes to
23  *   different buckets; writes to the same bucket are serialized through a lock.
24  * - Optional auto-resizing: the hash table resizes up if the load surpasses
25  *   a certain threshold. Resizing is done concurrently with readers; writes
26  *   are serialized with the resize operation.
27  *
28  * The key structure is the bucket, which is cacheline-sized. Buckets
29  * contain a few hash values and pointers; the u32 hash values are stored in
30  * full so that resizing is fast. Having this structure instead of directly
31  * chaining items has two advantages:
32  * - Failed lookups fail fast, and touch a minimum number of cache lines.
33  * - Resizing the hash table with concurrent lookups is easy.
34  *
35  * There are two types of buckets:
36  * 1. "head" buckets are the ones allocated in the array of buckets in qht_map.
37  * 2. all "non-head" buckets (i.e. all others) are members of a chain that
38  *    starts from a head bucket.
39  * Note that the seqlock and spinlock of a head bucket applies to all buckets
40  * chained to it; these two fields are unused in non-head buckets.
41  *
42  * On removals, we move the last valid item in the chain to the position of the
43  * just-removed entry. This makes lookups slightly faster, since the moment an
44  * invalid entry is found, the (failed) lookup is over.
45  *
46  * Resizing is done by taking all bucket spinlocks (so that no other writers can
47  * race with us) and then copying all entries into a new hash map. Then, the
48  * ht->map pointer is set, and the old map is freed once no RCU readers can see
49  * it anymore.
50  *
51  * Writers check for concurrent resizes by comparing ht->map before and after
52  * acquiring their bucket lock. If they don't match, a resize has occurred
53  * while the bucket spinlock was being acquired.
54  *
55  * Related Work:
56  * - Idea of cacheline-sized buckets with full hashes taken from:
57  *   David, Guerraoui & Trigonakis, "Asynchronized Concurrency:
58  *   The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15.
59  * - Why not RCU-based hash tables? They would allow us to get rid of the
60  *   seqlock, but resizing would take forever since RCU read critical
61  *   sections in QEMU take quite a long time.
62  *   More info on relativistic hash tables:
63  *   + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash
64  *     Tables via Relativistic Programming", USENIX ATC'11.
65  *   + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
66  *     https://lwn.net/Articles/612021/
67  */
68 #include "qemu/osdep.h"
69 #include "qemu/qht.h"
70 #include "qemu/atomic.h"
71 #include "qemu/rcu.h"
72 #include "qemu/memalign.h"
73 
74 //#define QHT_DEBUG
75 
76 /*
77  * We want to avoid false sharing of cache lines. Most systems have 64-byte
78  * cache lines so we go with it for simplicity.
79  *
80  * Note that systems with smaller cache lines will be fine (the struct is
81  * almost 64-bytes); systems with larger cache lines might suffer from
82  * some false sharing.
83  */
84 #define QHT_BUCKET_ALIGN 64
85 
86 /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */
87 #if HOST_LONG_BITS == 32
88 #define QHT_BUCKET_ENTRIES 6
89 #else /* 64-bit */
90 #define QHT_BUCKET_ENTRIES 4
91 #endif
92 
93 enum qht_iter_type {
94     QHT_ITER_VOID,    /* do nothing; use retvoid */
95     QHT_ITER_RM,      /* remove element if retbool returns true */
96 };
97 
98 struct qht_iter {
99     union {
100         qht_iter_func_t retvoid;
101         qht_iter_bool_func_t retbool;
102     } f;
103     enum qht_iter_type type;
104 };
105 
106 /*
107  * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise
108  * the profiler (QSP) will deadlock.
109  */
qht_lock(struct qht * ht)110 static inline void qht_lock(struct qht *ht)
111 {
112     if (ht->mode & QHT_MODE_RAW_MUTEXES) {
113         qemu_mutex_lock__raw(&ht->lock);
114     } else {
115         qemu_mutex_lock(&ht->lock);
116     }
117 }
118 
qht_trylock(struct qht * ht)119 static inline int qht_trylock(struct qht *ht)
120 {
121     if (ht->mode & QHT_MODE_RAW_MUTEXES) {
122         return qemu_mutex_trylock__raw(&(ht)->lock);
123     }
124     return qemu_mutex_trylock(&(ht)->lock);
125 }
126 
127 /* this inline is not really necessary, but it helps keep code consistent */
qht_unlock(struct qht * ht)128 static inline void qht_unlock(struct qht *ht)
129 {
130     qemu_mutex_unlock(&ht->lock);
131 }
132 
133 /*
134  * Note: reading partially-updated pointers in @pointers could lead to
135  * segfaults. We thus access them with qatomic_read/set; this guarantees
136  * that the compiler makes all those accesses atomic. We also need the
137  * volatile-like behavior in qatomic_read, since otherwise the compiler
138  * might refetch the pointer.
139  * qatomic_read's are of course not necessary when the bucket lock is held.
140  *
141  * If both ht->lock and b->lock are grabbed, ht->lock should always
142  * be grabbed first.
143  */
144 struct qht_bucket {
145     QemuSpin lock;
146     QemuSeqLock sequence;
147     uint32_t hashes[QHT_BUCKET_ENTRIES];
148     void *pointers[QHT_BUCKET_ENTRIES];
149     struct qht_bucket *next;
150 } QEMU_ALIGNED(QHT_BUCKET_ALIGN);
151 
152 QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
153 
154 /*
155  * Under TSAN, we use striped locks instead of one lock per bucket chain.
156  * This avoids crashing under TSAN, since TSAN aborts the program if more than
157  * 64 locks are held (this is a hardcoded limit in TSAN).
158  * When resizing a QHT we grab all the buckets' locks, which can easily
159  * go over TSAN's limit. By using striped locks, we avoid this problem.
160  *
161  * Note: this number must be a power of two for easy index computation.
162  */
163 #define QHT_TSAN_BUCKET_LOCKS_BITS 4
164 #define QHT_TSAN_BUCKET_LOCKS (1 << QHT_TSAN_BUCKET_LOCKS_BITS)
165 
166 struct qht_tsan_lock {
167     QemuSpin lock;
168 } QEMU_ALIGNED(QHT_BUCKET_ALIGN);
169 
170 /**
171  * struct qht_map - structure to track an array of buckets
172  * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
173  *       find the whole struct.
174  * @buckets: array of head buckets. It is constant once the map is created.
175  * @n_buckets: number of head buckets. It is constant once the map is created.
176  * @n_added_buckets: number of added (i.e. "non-head") buckets
177  * @n_added_buckets_threshold: threshold to trigger an upward resize once the
178  *                             number of added buckets surpasses it.
179  * @tsan_bucket_locks: Array of striped locks to be used only under TSAN.
180  *
181  * Buckets are tracked in what we call a "map", i.e. this structure.
182  */
183 struct qht_map {
184     struct rcu_head rcu;
185     struct qht_bucket *buckets;
186     size_t n_buckets;
187     size_t n_added_buckets;
188     size_t n_added_buckets_threshold;
189 #ifdef CONFIG_TSAN
190     struct qht_tsan_lock tsan_bucket_locks[QHT_TSAN_BUCKET_LOCKS];
191 #endif
192 };
193 
194 /* trigger a resize when n_added_buckets > n_buckets / div */
195 #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
196 
197 static void qht_do_resize_reset(struct qht *ht, struct qht_map *new,
198                                 bool reset);
199 static void qht_grow_maybe(struct qht *ht);
200 
201 #ifdef QHT_DEBUG
202 
203 #define qht_debug_assert(X) do { assert(X); } while (0)
204 
qht_bucket_debug__locked(struct qht_bucket * b)205 static void qht_bucket_debug__locked(struct qht_bucket *b)
206 {
207     bool seen_empty = false;
208     bool corrupt = false;
209     int i;
210 
211     do {
212         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
213             if (b->pointers[i] == NULL) {
214                 seen_empty = true;
215                 continue;
216             }
217             if (seen_empty) {
218                 fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n",
219                         __func__, b, i, b->hashes[i], b->pointers[i]);
220                 corrupt = true;
221             }
222         }
223         b = b->next;
224     } while (b);
225     qht_debug_assert(!corrupt);
226 }
227 
qht_map_debug__all_locked(struct qht_map * map)228 static void qht_map_debug__all_locked(struct qht_map *map)
229 {
230     int i;
231 
232     for (i = 0; i < map->n_buckets; i++) {
233         qht_bucket_debug__locked(&map->buckets[i]);
234     }
235 }
236 #else
237 
238 #define qht_debug_assert(X) do { (void)(X); } while (0)
239 
qht_bucket_debug__locked(struct qht_bucket * b)240 static inline void qht_bucket_debug__locked(struct qht_bucket *b)
241 { }
242 
qht_map_debug__all_locked(struct qht_map * map)243 static inline void qht_map_debug__all_locked(struct qht_map *map)
244 { }
245 #endif /* QHT_DEBUG */
246 
qht_elems_to_buckets(size_t n_elems)247 static inline size_t qht_elems_to_buckets(size_t n_elems)
248 {
249     return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
250 }
251 
252 /*
253  * When using striped locks (i.e. under TSAN), we have to be careful not
254  * to operate on the same lock twice (e.g. when iterating through all buckets).
255  * We achieve this by operating only on each stripe's first matching lock.
256  */
qht_do_if_first_in_stripe(struct qht_map * map,struct qht_bucket * b,void (* func)(QemuSpin * spin))257 static inline void qht_do_if_first_in_stripe(struct qht_map *map,
258                                              struct qht_bucket *b,
259                                              void (*func)(QemuSpin *spin))
260 {
261 #ifdef CONFIG_TSAN
262     unsigned long bucket_idx = b - map->buckets;
263     bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0;
264     if (is_first_in_stripe) {
265         unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
266         func(&map->tsan_bucket_locks[lock_idx].lock);
267     }
268 #else
269     func(&b->lock);
270 #endif
271 }
272 
qht_bucket_lock_do(struct qht_map * map,struct qht_bucket * b,void (* func)(QemuSpin * lock))273 static inline void qht_bucket_lock_do(struct qht_map *map,
274                                       struct qht_bucket *b,
275                                       void (*func)(QemuSpin *lock))
276 {
277 #ifdef CONFIG_TSAN
278     unsigned long bucket_idx = b - map->buckets;
279     unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
280     func(&map->tsan_bucket_locks[lock_idx].lock);
281 #else
282     func(&b->lock);
283 #endif
284 }
285 
qht_bucket_lock(struct qht_map * map,struct qht_bucket * b)286 static inline void qht_bucket_lock(struct qht_map *map,
287                                    struct qht_bucket *b)
288 {
289     qht_bucket_lock_do(map, b, qemu_spin_lock);
290 }
291 
qht_bucket_unlock(struct qht_map * map,struct qht_bucket * b)292 static inline void qht_bucket_unlock(struct qht_map *map,
293                                      struct qht_bucket *b)
294 {
295     qht_bucket_lock_do(map, b, qemu_spin_unlock);
296 }
297 
qht_head_init(struct qht_map * map,struct qht_bucket * b)298 static inline void qht_head_init(struct qht_map *map, struct qht_bucket *b)
299 {
300     memset(b, 0, sizeof(*b));
301     qht_do_if_first_in_stripe(map, b, qemu_spin_init);
302     seqlock_init(&b->sequence);
303 }
304 
305 static inline
qht_map_to_bucket(const struct qht_map * map,uint32_t hash)306 struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash)
307 {
308     return &map->buckets[hash & (map->n_buckets - 1)];
309 }
310 
311 /* acquire all bucket locks from a map */
qht_map_lock_buckets(struct qht_map * map)312 static void qht_map_lock_buckets(struct qht_map *map)
313 {
314     size_t i;
315 
316     for (i = 0; i < map->n_buckets; i++) {
317         struct qht_bucket *b = &map->buckets[i];
318 
319         qht_do_if_first_in_stripe(map, b, qemu_spin_lock);
320     }
321 }
322 
qht_map_unlock_buckets(struct qht_map * map)323 static void qht_map_unlock_buckets(struct qht_map *map)
324 {
325     size_t i;
326 
327     for (i = 0; i < map->n_buckets; i++) {
328         struct qht_bucket *b = &map->buckets[i];
329 
330         qht_do_if_first_in_stripe(map, b, qemu_spin_unlock);
331     }
332 }
333 
334 /*
335  * Call with at least a bucket lock held.
336  * @map should be the value read before acquiring the lock (or locks).
337  */
qht_map_is_stale__locked(const struct qht * ht,const struct qht_map * map)338 static inline bool qht_map_is_stale__locked(const struct qht *ht,
339                                             const struct qht_map *map)
340 {
341     return map != ht->map;
342 }
343 
344 /*
345  * Grab all bucket locks, and set @pmap after making sure the map isn't stale.
346  *
347  * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference.
348  *
349  * Note: callers cannot have ht->lock held.
350  */
351 static inline
qht_map_lock_buckets__no_stale(struct qht * ht,struct qht_map ** pmap)352 void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
353 {
354     struct qht_map *map;
355 
356     map = qatomic_rcu_read(&ht->map);
357     qht_map_lock_buckets(map);
358     if (likely(!qht_map_is_stale__locked(ht, map))) {
359         *pmap = map;
360         return;
361     }
362     qht_map_unlock_buckets(map);
363 
364     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
365     qht_lock(ht);
366     map = ht->map;
367     qht_map_lock_buckets(map);
368     qht_unlock(ht);
369     *pmap = map;
370     return;
371 }
372 
373 /*
374  * Get a head bucket and lock it, making sure its parent map is not stale.
375  * @pmap is filled with a pointer to the bucket's parent map.
376  *
377  * Unlock with qht_bucket_unlock.
378  *
379  * Note: callers cannot have ht->lock held.
380  */
381 static inline
qht_bucket_lock__no_stale(struct qht * ht,uint32_t hash,struct qht_map ** pmap)382 struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
383                                              struct qht_map **pmap)
384 {
385     struct qht_bucket *b;
386     struct qht_map *map;
387 
388     map = qatomic_rcu_read(&ht->map);
389     b = qht_map_to_bucket(map, hash);
390 
391     qht_bucket_lock(map, b);
392     if (likely(!qht_map_is_stale__locked(ht, map))) {
393         *pmap = map;
394         return b;
395     }
396     qht_bucket_unlock(map, b);
397 
398     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
399     qht_lock(ht);
400     map = ht->map;
401     b = qht_map_to_bucket(map, hash);
402     qht_bucket_lock(map, b);
403     qht_unlock(ht);
404     *pmap = map;
405     return b;
406 }
407 
qht_map_needs_resize(const struct qht_map * map)408 static inline bool qht_map_needs_resize(const struct qht_map *map)
409 {
410     return qatomic_read(&map->n_added_buckets) >
411            map->n_added_buckets_threshold;
412 }
413 
qht_chain_destroy(struct qht_map * map,struct qht_bucket * head)414 static inline void qht_chain_destroy(struct qht_map *map,
415                                      struct qht_bucket *head)
416 {
417     struct qht_bucket *curr = head->next;
418     struct qht_bucket *prev;
419 
420     qht_do_if_first_in_stripe(map, head, qemu_spin_destroy);
421     while (curr) {
422         prev = curr;
423         curr = curr->next;
424         qemu_vfree(prev);
425     }
426 }
427 
428 /* pass only an orphan map */
qht_map_destroy(struct qht_map * map)429 static void qht_map_destroy(struct qht_map *map)
430 {
431     size_t i;
432 
433     for (i = 0; i < map->n_buckets; i++) {
434         qht_chain_destroy(map, &map->buckets[i]);
435     }
436     qemu_vfree(map->buckets);
437     g_free(map);
438 }
439 
qht_map_create(size_t n_buckets)440 static struct qht_map *qht_map_create(size_t n_buckets)
441 {
442     struct qht_map *map;
443     size_t i;
444 
445     map = g_malloc(sizeof(*map));
446     map->n_buckets = n_buckets;
447 
448     map->n_added_buckets = 0;
449     map->n_added_buckets_threshold = n_buckets /
450         QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV;
451 
452     /* let tiny hash tables to at least add one non-head bucket */
453     if (unlikely(map->n_added_buckets_threshold == 0)) {
454         map->n_added_buckets_threshold = 1;
455     }
456 
457     map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
458                                  sizeof(*map->buckets) * n_buckets);
459     for (i = 0; i < n_buckets; i++) {
460         qht_head_init(map, &map->buckets[i]);
461     }
462     return map;
463 }
464 
qht_init(struct qht * ht,qht_cmp_func_t cmp,size_t n_elems,unsigned int mode)465 void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
466               unsigned int mode)
467 {
468     struct qht_map *map;
469     size_t n_buckets = qht_elems_to_buckets(n_elems);
470 
471     g_assert(cmp);
472     ht->cmp = cmp;
473     ht->mode = mode;
474     qemu_mutex_init(&ht->lock);
475     map = qht_map_create(n_buckets);
476     qatomic_rcu_set(&ht->map, map);
477 }
478 
479 /* call only when there are no readers/writers left */
qht_destroy(struct qht * ht)480 void qht_destroy(struct qht *ht)
481 {
482     qht_map_destroy(ht->map);
483     memset(ht, 0, sizeof(*ht));
484 }
485 
qht_bucket_reset__locked(struct qht_bucket * head)486 static void qht_bucket_reset__locked(struct qht_bucket *head)
487 {
488     struct qht_bucket *b = head;
489     int i;
490 
491     seqlock_write_begin(&head->sequence);
492     do {
493         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
494             if (b->pointers[i] == NULL) {
495                 goto done;
496             }
497             qatomic_set(&b->hashes[i], 0);
498             qatomic_set(&b->pointers[i], NULL);
499         }
500         b = b->next;
501     } while (b);
502  done:
503     seqlock_write_end(&head->sequence);
504 }
505 
506 /* call with all bucket locks held */
qht_map_reset__all_locked(struct qht_map * map)507 static void qht_map_reset__all_locked(struct qht_map *map)
508 {
509     size_t i;
510 
511     for (i = 0; i < map->n_buckets; i++) {
512         qht_bucket_reset__locked(&map->buckets[i]);
513     }
514     qht_map_debug__all_locked(map);
515 }
516 
qht_reset(struct qht * ht)517 void qht_reset(struct qht *ht)
518 {
519     struct qht_map *map;
520 
521     qht_map_lock_buckets__no_stale(ht, &map);
522     qht_map_reset__all_locked(map);
523     qht_map_unlock_buckets(map);
524 }
525 
qht_do_resize(struct qht * ht,struct qht_map * new)526 static inline void qht_do_resize(struct qht *ht, struct qht_map *new)
527 {
528     qht_do_resize_reset(ht, new, false);
529 }
530 
qht_do_resize_and_reset(struct qht * ht,struct qht_map * new)531 static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new)
532 {
533     qht_do_resize_reset(ht, new, true);
534 }
535 
qht_reset_size(struct qht * ht,size_t n_elems)536 bool qht_reset_size(struct qht *ht, size_t n_elems)
537 {
538     struct qht_map *new = NULL;
539     struct qht_map *map;
540     size_t n_buckets;
541 
542     n_buckets = qht_elems_to_buckets(n_elems);
543 
544     qht_lock(ht);
545     map = ht->map;
546     if (n_buckets != map->n_buckets) {
547         new = qht_map_create(n_buckets);
548     }
549     qht_do_resize_and_reset(ht, new);
550     qht_unlock(ht);
551 
552     return !!new;
553 }
554 
555 static inline
qht_do_lookup(const struct qht_bucket * head,qht_lookup_func_t func,const void * userp,uint32_t hash)556 void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func,
557                     const void *userp, uint32_t hash)
558 {
559     const struct qht_bucket *b = head;
560     int i;
561 
562     do {
563         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
564             if (qatomic_read(&b->hashes[i]) == hash) {
565                 /* The pointer is dereferenced before seqlock_read_retry,
566                  * so (unlike qht_insert__locked) we need to use
567                  * qatomic_rcu_read here.
568                  */
569                 void *p = qatomic_rcu_read(&b->pointers[i]);
570 
571                 if (likely(p) && likely(func(p, userp))) {
572                     return p;
573                 }
574             }
575         }
576         b = qatomic_rcu_read(&b->next);
577     } while (b);
578 
579     return NULL;
580 }
581 
582 static __attribute__((noinline))
qht_lookup__slowpath(const struct qht_bucket * b,qht_lookup_func_t func,const void * userp,uint32_t hash)583 void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func,
584                            const void *userp, uint32_t hash)
585 {
586     unsigned int version;
587     void *ret;
588 
589     do {
590         version = seqlock_read_begin(&b->sequence);
591         ret = qht_do_lookup(b, func, userp, hash);
592     } while (seqlock_read_retry(&b->sequence, version));
593     return ret;
594 }
595 
qht_lookup_custom(const struct qht * ht,const void * userp,uint32_t hash,qht_lookup_func_t func)596 void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash,
597                         qht_lookup_func_t func)
598 {
599     const struct qht_bucket *b;
600     const struct qht_map *map;
601     unsigned int version;
602     void *ret;
603 
604     map = qatomic_rcu_read(&ht->map);
605     b = qht_map_to_bucket(map, hash);
606 
607     version = seqlock_read_begin(&b->sequence);
608     ret = qht_do_lookup(b, func, userp, hash);
609     if (likely(!seqlock_read_retry(&b->sequence, version))) {
610         return ret;
611     }
612     /*
613      * Removing the do/while from the fastpath gives a 4% perf. increase when
614      * running a 100%-lookup microbenchmark.
615      */
616     return qht_lookup__slowpath(b, func, userp, hash);
617 }
618 
qht_lookup(const struct qht * ht,const void * userp,uint32_t hash)619 void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash)
620 {
621     return qht_lookup_custom(ht, userp, hash, ht->cmp);
622 }
623 
624 /*
625  * call with head->lock held
626  * @ht is const since it is only used for ht->cmp()
627  */
qht_insert__locked(const struct qht * ht,struct qht_map * map,struct qht_bucket * head,void * p,uint32_t hash,bool * needs_resize)628 static void *qht_insert__locked(const struct qht *ht, struct qht_map *map,
629                                 struct qht_bucket *head, void *p, uint32_t hash,
630                                 bool *needs_resize)
631 {
632     struct qht_bucket *b = head;
633     struct qht_bucket *prev = NULL;
634     struct qht_bucket *new = NULL;
635     int i;
636 
637     do {
638         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
639             if (b->pointers[i]) {
640                 if (unlikely(b->hashes[i] == hash &&
641                              ht->cmp(b->pointers[i], p))) {
642                     return b->pointers[i];
643                 }
644             } else {
645                 goto found;
646             }
647         }
648         prev = b;
649         b = b->next;
650     } while (b);
651 
652     b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b));
653     memset(b, 0, sizeof(*b));
654     new = b;
655     i = 0;
656     qatomic_inc(&map->n_added_buckets);
657     if (unlikely(qht_map_needs_resize(map)) && needs_resize) {
658         *needs_resize = true;
659     }
660 
661  found:
662     /* found an empty key: acquire the seqlock and write */
663     seqlock_write_begin(&head->sequence);
664     if (new) {
665         qatomic_rcu_set(&prev->next, b);
666     }
667     /* smp_wmb() implicit in seqlock_write_begin.  */
668     qatomic_set(&b->hashes[i], hash);
669     qatomic_set(&b->pointers[i], p);
670     seqlock_write_end(&head->sequence);
671     return NULL;
672 }
673 
qht_grow_maybe(struct qht * ht)674 static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
675 {
676     struct qht_map *map;
677 
678     /*
679      * If the lock is taken it probably means there's an ongoing resize,
680      * so bail out.
681      */
682     if (qht_trylock(ht)) {
683         return;
684     }
685     map = ht->map;
686     /* another thread might have just performed the resize we were after */
687     if (qht_map_needs_resize(map)) {
688         struct qht_map *new = qht_map_create(map->n_buckets * 2);
689 
690         qht_do_resize(ht, new);
691     }
692     qht_unlock(ht);
693 }
694 
qht_insert(struct qht * ht,void * p,uint32_t hash,void ** existing)695 bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
696 {
697     struct qht_bucket *b;
698     struct qht_map *map;
699     bool needs_resize = false;
700     void *prev;
701 
702     /* NULL pointers are not supported */
703     qht_debug_assert(p);
704 
705     b = qht_bucket_lock__no_stale(ht, hash, &map);
706     prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
707     qht_bucket_debug__locked(b);
708     qht_bucket_unlock(map, b);
709 
710     if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
711         qht_grow_maybe(ht);
712     }
713     if (likely(prev == NULL)) {
714         return true;
715     }
716     if (existing) {
717         *existing = prev;
718     }
719     return false;
720 }
721 
qht_entry_is_last(const struct qht_bucket * b,int pos)722 static inline bool qht_entry_is_last(const struct qht_bucket *b, int pos)
723 {
724     if (pos == QHT_BUCKET_ENTRIES - 1) {
725         if (b->next == NULL) {
726             return true;
727         }
728         return b->next->pointers[0] == NULL;
729     }
730     return b->pointers[pos + 1] == NULL;
731 }
732 
733 static void
qht_entry_move(struct qht_bucket * to,int i,struct qht_bucket * from,int j)734 qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j)
735 {
736     qht_debug_assert(!(to == from && i == j));
737     qht_debug_assert(to->pointers[i]);
738     qht_debug_assert(from->pointers[j]);
739 
740     qatomic_set(&to->hashes[i], from->hashes[j]);
741     qatomic_set(&to->pointers[i], from->pointers[j]);
742 
743     qatomic_set(&from->hashes[j], 0);
744     qatomic_set(&from->pointers[j], NULL);
745 }
746 
747 /*
748  * Find the last valid entry in @orig, and swap it with @orig[pos], which has
749  * just been invalidated.
750  */
qht_bucket_remove_entry(struct qht_bucket * orig,int pos)751 static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
752 {
753     struct qht_bucket *b = orig;
754     struct qht_bucket *prev = NULL;
755     int i;
756 
757     if (qht_entry_is_last(orig, pos)) {
758         qatomic_set(&orig->hashes[pos], 0);
759         qatomic_set(&orig->pointers[pos], NULL);
760         return;
761     }
762     do {
763         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
764             if (b->pointers[i]) {
765                 continue;
766             }
767             if (i > 0) {
768                 return qht_entry_move(orig, pos, b, i - 1);
769             }
770             qht_debug_assert(prev);
771             return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
772         }
773         prev = b;
774         b = b->next;
775     } while (b);
776     /* no free entries other than orig[pos], so swap it with the last one */
777     qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
778 }
779 
780 /* call with b->lock held */
781 static inline
qht_remove__locked(struct qht_bucket * head,const void * p,uint32_t hash)782 bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash)
783 {
784     struct qht_bucket *b = head;
785     int i;
786 
787     do {
788         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
789             void *q = b->pointers[i];
790 
791             if (unlikely(q == NULL)) {
792                 return false;
793             }
794             if (q == p) {
795                 qht_debug_assert(b->hashes[i] == hash);
796                 seqlock_write_begin(&head->sequence);
797                 qht_bucket_remove_entry(b, i);
798                 seqlock_write_end(&head->sequence);
799                 return true;
800             }
801         }
802         b = b->next;
803     } while (b);
804     return false;
805 }
806 
qht_remove(struct qht * ht,const void * p,uint32_t hash)807 bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
808 {
809     struct qht_bucket *b;
810     struct qht_map *map;
811     bool ret;
812 
813     /* NULL pointers are not supported */
814     qht_debug_assert(p);
815 
816     b = qht_bucket_lock__no_stale(ht, hash, &map);
817     ret = qht_remove__locked(b, p, hash);
818     qht_bucket_debug__locked(b);
819     qht_bucket_unlock(map, b);
820     return ret;
821 }
822 
qht_bucket_iter(struct qht_bucket * head,const struct qht_iter * iter,void * userp)823 static inline void qht_bucket_iter(struct qht_bucket *head,
824                                    const struct qht_iter *iter, void *userp)
825 {
826     struct qht_bucket *b = head;
827     int i;
828 
829     do {
830         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
831             if (b->pointers[i] == NULL) {
832                 return;
833             }
834             switch (iter->type) {
835             case QHT_ITER_VOID:
836                 iter->f.retvoid(b->pointers[i], b->hashes[i], userp);
837                 break;
838             case QHT_ITER_RM:
839                 if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) {
840                     /* replace i with the last valid element in the bucket */
841                     seqlock_write_begin(&head->sequence);
842                     qht_bucket_remove_entry(b, i);
843                     seqlock_write_end(&head->sequence);
844                     qht_bucket_debug__locked(b);
845                     /* reevaluate i, since it just got replaced */
846                     i--;
847                     continue;
848                 }
849                 break;
850             default:
851                 g_assert_not_reached();
852             }
853         }
854         b = b->next;
855     } while (b);
856 }
857 
858 /* call with all of the map's locks held */
qht_map_iter__all_locked(struct qht_map * map,const struct qht_iter * iter,void * userp)859 static inline void qht_map_iter__all_locked(struct qht_map *map,
860                                             const struct qht_iter *iter,
861                                             void *userp)
862 {
863     size_t i;
864 
865     for (i = 0; i < map->n_buckets; i++) {
866         qht_bucket_iter(&map->buckets[i], iter, userp);
867     }
868 }
869 
870 static inline void
do_qht_iter(struct qht * ht,const struct qht_iter * iter,void * userp)871 do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp)
872 {
873     struct qht_map *map;
874 
875     map = qatomic_rcu_read(&ht->map);
876     qht_map_lock_buckets(map);
877     qht_map_iter__all_locked(map, iter, userp);
878     qht_map_unlock_buckets(map);
879 }
880 
qht_iter(struct qht * ht,qht_iter_func_t func,void * userp)881 void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp)
882 {
883     const struct qht_iter iter = {
884         .f.retvoid = func,
885         .type = QHT_ITER_VOID,
886     };
887 
888     do_qht_iter(ht, &iter, userp);
889 }
890 
qht_iter_remove(struct qht * ht,qht_iter_bool_func_t func,void * userp)891 void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp)
892 {
893     const struct qht_iter iter = {
894         .f.retbool = func,
895         .type = QHT_ITER_RM,
896     };
897 
898     do_qht_iter(ht, &iter, userp);
899 }
900 
901 struct qht_map_copy_data {
902     struct qht *ht;
903     struct qht_map *new;
904 };
905 
qht_map_copy(void * p,uint32_t hash,void * userp)906 static void qht_map_copy(void *p, uint32_t hash, void *userp)
907 {
908     struct qht_map_copy_data *data = userp;
909     struct qht *ht = data->ht;
910     struct qht_map *new = data->new;
911     struct qht_bucket *b = qht_map_to_bucket(new, hash);
912 
913     /* no need to acquire b->lock because no thread has seen this map yet */
914     qht_insert__locked(ht, new, b, p, hash, NULL);
915 }
916 
917 /*
918  * Atomically perform a resize and/or reset.
919  * Call with ht->lock held.
920  */
qht_do_resize_reset(struct qht * ht,struct qht_map * new,bool reset)921 static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset)
922 {
923     struct qht_map *old;
924     const struct qht_iter iter = {
925         .f.retvoid = qht_map_copy,
926         .type = QHT_ITER_VOID,
927     };
928     struct qht_map_copy_data data;
929 
930     old = ht->map;
931     qht_map_lock_buckets(old);
932 
933     if (reset) {
934         qht_map_reset__all_locked(old);
935     }
936 
937     if (new == NULL) {
938         qht_map_unlock_buckets(old);
939         return;
940     }
941 
942     g_assert(new->n_buckets != old->n_buckets);
943     data.ht = ht;
944     data.new = new;
945     qht_map_iter__all_locked(old, &iter, &data);
946     qht_map_debug__all_locked(new);
947 
948     qatomic_rcu_set(&ht->map, new);
949     qht_map_unlock_buckets(old);
950     call_rcu(old, qht_map_destroy, rcu);
951 }
952 
qht_resize(struct qht * ht,size_t n_elems)953 bool qht_resize(struct qht *ht, size_t n_elems)
954 {
955     size_t n_buckets = qht_elems_to_buckets(n_elems);
956     size_t ret = false;
957 
958     qht_lock(ht);
959     if (n_buckets != ht->map->n_buckets) {
960         struct qht_map *new;
961 
962         new = qht_map_create(n_buckets);
963         qht_do_resize(ht, new);
964         ret = true;
965     }
966     qht_unlock(ht);
967 
968     return ret;
969 }
970 
971 /* pass @stats to qht_statistics_destroy() when done */
qht_statistics_init(const struct qht * ht,struct qht_stats * stats)972 void qht_statistics_init(const struct qht *ht, struct qht_stats *stats)
973 {
974     const struct qht_map *map;
975     int i;
976 
977     map = qatomic_rcu_read(&ht->map);
978 
979     stats->used_head_buckets = 0;
980     stats->entries = 0;
981     qdist_init(&stats->chain);
982     qdist_init(&stats->occupancy);
983     /* bail out if the qht has not yet been initialized */
984     if (unlikely(map == NULL)) {
985         stats->head_buckets = 0;
986         return;
987     }
988     stats->head_buckets = map->n_buckets;
989 
990     for (i = 0; i < map->n_buckets; i++) {
991         const struct qht_bucket *head = &map->buckets[i];
992         const struct qht_bucket *b;
993         unsigned int version;
994         size_t buckets;
995         size_t entries;
996         int j;
997 
998         do {
999             version = seqlock_read_begin(&head->sequence);
1000             buckets = 0;
1001             entries = 0;
1002             b = head;
1003             do {
1004                 for (j = 0; j < QHT_BUCKET_ENTRIES; j++) {
1005                     if (qatomic_read(&b->pointers[j]) == NULL) {
1006                         break;
1007                     }
1008                     entries++;
1009                 }
1010                 buckets++;
1011                 b = qatomic_rcu_read(&b->next);
1012             } while (b);
1013         } while (seqlock_read_retry(&head->sequence, version));
1014 
1015         if (entries) {
1016             qdist_inc(&stats->chain, buckets);
1017             qdist_inc(&stats->occupancy,
1018                       (double)entries / QHT_BUCKET_ENTRIES / buckets);
1019             stats->used_head_buckets++;
1020             stats->entries += entries;
1021         } else {
1022             qdist_inc(&stats->occupancy, 0);
1023         }
1024     }
1025 }
1026 
qht_statistics_destroy(struct qht_stats * stats)1027 void qht_statistics_destroy(struct qht_stats *stats)
1028 {
1029     qdist_destroy(&stats->occupancy);
1030     qdist_destroy(&stats->chain);
1031 }
1032