xref: /openbmc/qemu/util/qht.c (revision e6c58299)
12e11264aSEmilio G. Cota /*
22e11264aSEmilio G. Cota  * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads.
32e11264aSEmilio G. Cota  *
42e11264aSEmilio G. Cota  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
52e11264aSEmilio G. Cota  *
62e11264aSEmilio G. Cota  * License: GNU GPL, version 2 or later.
72e11264aSEmilio G. Cota  *   See the COPYING file in the top-level directory.
82e11264aSEmilio G. Cota  *
92e11264aSEmilio G. Cota  * Assumptions:
102e11264aSEmilio G. Cota  * - NULL cannot be inserted/removed as a pointer value.
112e11264aSEmilio G. Cota  * - Trying to insert an already-existing hash-pointer pair is OK. However,
122e11264aSEmilio G. Cota  *   it is not OK to insert into the same hash table different hash-pointer
132e11264aSEmilio G. Cota  *   pairs that have the same pointer value, but not the hashes.
142e11264aSEmilio G. Cota  * - Lookups are performed under an RCU read-critical section; removals
152e11264aSEmilio G. Cota  *   must wait for a grace period to elapse before freeing removed objects.
162e11264aSEmilio G. Cota  *
172e11264aSEmilio G. Cota  * Features:
182e11264aSEmilio G. Cota  * - Reads (i.e. lookups and iterators) can be concurrent with other reads.
192e11264aSEmilio G. Cota  *   Lookups that are concurrent with writes to the same bucket will retry
202e11264aSEmilio G. Cota  *   via a seqlock; iterators acquire all bucket locks and therefore can be
212e11264aSEmilio G. Cota  *   concurrent with lookups and are serialized wrt writers.
222e11264aSEmilio G. Cota  * - Writes (i.e. insertions/removals) can be concurrent with writes to
232e11264aSEmilio G. Cota  *   different buckets; writes to the same bucket are serialized through a lock.
242e11264aSEmilio G. Cota  * - Optional auto-resizing: the hash table resizes up if the load surpasses
252e11264aSEmilio G. Cota  *   a certain threshold. Resizing is done concurrently with readers; writes
262e11264aSEmilio G. Cota  *   are serialized with the resize operation.
272e11264aSEmilio G. Cota  *
282e11264aSEmilio G. Cota  * The key structure is the bucket, which is cacheline-sized. Buckets
292e11264aSEmilio G. Cota  * contain a few hash values and pointers; the u32 hash values are stored in
302e11264aSEmilio G. Cota  * full so that resizing is fast. Having this structure instead of directly
312e11264aSEmilio G. Cota  * chaining items has two advantages:
322e11264aSEmilio G. Cota  * - Failed lookups fail fast, and touch a minimum number of cache lines.
332e11264aSEmilio G. Cota  * - Resizing the hash table with concurrent lookups is easy.
342e11264aSEmilio G. Cota  *
352e11264aSEmilio G. Cota  * There are two types of buckets:
362e11264aSEmilio G. Cota  * 1. "head" buckets are the ones allocated in the array of buckets in qht_map.
372e11264aSEmilio G. Cota  * 2. all "non-head" buckets (i.e. all others) are members of a chain that
382e11264aSEmilio G. Cota  *    starts from a head bucket.
392e11264aSEmilio G. Cota  * Note that the seqlock and spinlock of a head bucket applies to all buckets
402e11264aSEmilio G. Cota  * chained to it; these two fields are unused in non-head buckets.
412e11264aSEmilio G. Cota  *
422e11264aSEmilio G. Cota  * On removals, we move the last valid item in the chain to the position of the
432e11264aSEmilio G. Cota  * just-removed entry. This makes lookups slightly faster, since the moment an
442e11264aSEmilio G. Cota  * invalid entry is found, the (failed) lookup is over.
452e11264aSEmilio G. Cota  *
462e11264aSEmilio G. Cota  * Resizing is done by taking all bucket spinlocks (so that no other writers can
472e11264aSEmilio G. Cota  * race with us) and then copying all entries into a new hash map. Then, the
482e11264aSEmilio G. Cota  * ht->map pointer is set, and the old map is freed once no RCU readers can see
492e11264aSEmilio G. Cota  * it anymore.
502e11264aSEmilio G. Cota  *
512e11264aSEmilio G. Cota  * Writers check for concurrent resizes by comparing ht->map before and after
522e11264aSEmilio G. Cota  * acquiring their bucket lock. If they don't match, a resize has occured
532e11264aSEmilio G. Cota  * while the bucket spinlock was being acquired.
542e11264aSEmilio G. Cota  *
552e11264aSEmilio G. Cota  * Related Work:
562e11264aSEmilio G. Cota  * - Idea of cacheline-sized buckets with full hashes taken from:
572e11264aSEmilio G. Cota  *   David, Guerraoui & Trigonakis, "Asynchronized Concurrency:
582e11264aSEmilio G. Cota  *   The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15.
592e11264aSEmilio G. Cota  * - Why not RCU-based hash tables? They would allow us to get rid of the
602e11264aSEmilio G. Cota  *   seqlock, but resizing would take forever since RCU read critical
612e11264aSEmilio G. Cota  *   sections in QEMU take quite a long time.
622e11264aSEmilio G. Cota  *   More info on relativistic hash tables:
632e11264aSEmilio G. Cota  *   + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash
642e11264aSEmilio G. Cota  *     Tables via Relativistic Programming", USENIX ATC'11.
652e11264aSEmilio G. Cota  *   + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
662e11264aSEmilio G. Cota  *     https://lwn.net/Articles/612021/
672e11264aSEmilio G. Cota  */
68e9abfcb5SPaolo Bonzini #include "qemu/osdep.h"
692e11264aSEmilio G. Cota #include "qemu/qht.h"
702e11264aSEmilio G. Cota #include "qemu/atomic.h"
712e11264aSEmilio G. Cota #include "qemu/rcu.h"
722e11264aSEmilio G. Cota 
732e11264aSEmilio G. Cota //#define QHT_DEBUG
742e11264aSEmilio G. Cota 
752e11264aSEmilio G. Cota /*
762e11264aSEmilio G. Cota  * We want to avoid false sharing of cache lines. Most systems have 64-byte
772e11264aSEmilio G. Cota  * cache lines so we go with it for simplicity.
782e11264aSEmilio G. Cota  *
792e11264aSEmilio G. Cota  * Note that systems with smaller cache lines will be fine (the struct is
802e11264aSEmilio G. Cota  * almost 64-bytes); systems with larger cache lines might suffer from
812e11264aSEmilio G. Cota  * some false sharing.
822e11264aSEmilio G. Cota  */
832e11264aSEmilio G. Cota #define QHT_BUCKET_ALIGN 64
842e11264aSEmilio G. Cota 
852e11264aSEmilio G. Cota /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */
862e11264aSEmilio G. Cota #if HOST_LONG_BITS == 32
872e11264aSEmilio G. Cota #define QHT_BUCKET_ENTRIES 6
882e11264aSEmilio G. Cota #else /* 64-bit */
892e11264aSEmilio G. Cota #define QHT_BUCKET_ENTRIES 4
902e11264aSEmilio G. Cota #endif
912e11264aSEmilio G. Cota 
9269d55e9cSEmilio G. Cota enum qht_iter_type {
9369d55e9cSEmilio G. Cota     QHT_ITER_VOID,    /* do nothing; use retvoid */
9469d55e9cSEmilio G. Cota     QHT_ITER_RM,      /* remove element if retbool returns true */
9569d55e9cSEmilio G. Cota };
9669d55e9cSEmilio G. Cota 
9769d55e9cSEmilio G. Cota struct qht_iter {
9869d55e9cSEmilio G. Cota     union {
9969d55e9cSEmilio G. Cota         qht_iter_func_t retvoid;
10069d55e9cSEmilio G. Cota         qht_iter_bool_func_t retbool;
10169d55e9cSEmilio G. Cota     } f;
10269d55e9cSEmilio G. Cota     enum qht_iter_type type;
10369d55e9cSEmilio G. Cota };
10469d55e9cSEmilio G. Cota 
1052e11264aSEmilio G. Cota /*
106fe9959a2SEmilio G. Cota  * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise
107fe9959a2SEmilio G. Cota  * the profiler (QSP) will deadlock.
108fe9959a2SEmilio G. Cota  */
109fe9959a2SEmilio G. Cota static inline void qht_lock(struct qht *ht)
110fe9959a2SEmilio G. Cota {
111fe9959a2SEmilio G. Cota     if (ht->mode & QHT_MODE_RAW_MUTEXES) {
112fe9959a2SEmilio G. Cota         qemu_mutex_lock__raw(&ht->lock);
113fe9959a2SEmilio G. Cota     } else {
114fe9959a2SEmilio G. Cota         qemu_mutex_lock(&ht->lock);
115fe9959a2SEmilio G. Cota     }
116fe9959a2SEmilio G. Cota }
117fe9959a2SEmilio G. Cota 
118fe9959a2SEmilio G. Cota static inline int qht_trylock(struct qht *ht)
119fe9959a2SEmilio G. Cota {
120fe9959a2SEmilio G. Cota     if (ht->mode & QHT_MODE_RAW_MUTEXES) {
121fe9959a2SEmilio G. Cota         return qemu_mutex_trylock__raw(&(ht)->lock);
122fe9959a2SEmilio G. Cota     }
123fe9959a2SEmilio G. Cota     return qemu_mutex_trylock(&(ht)->lock);
124fe9959a2SEmilio G. Cota }
125fe9959a2SEmilio G. Cota 
126fe9959a2SEmilio G. Cota /* this inline is not really necessary, but it helps keep code consistent */
127fe9959a2SEmilio G. Cota static inline void qht_unlock(struct qht *ht)
128fe9959a2SEmilio G. Cota {
129fe9959a2SEmilio G. Cota     qemu_mutex_unlock(&ht->lock);
130fe9959a2SEmilio G. Cota }
131fe9959a2SEmilio G. Cota 
132fe9959a2SEmilio G. Cota /*
1332e11264aSEmilio G. Cota  * Note: reading partially-updated pointers in @pointers could lead to
1342e11264aSEmilio G. Cota  * segfaults. We thus access them with atomic_read/set; this guarantees
1352e11264aSEmilio G. Cota  * that the compiler makes all those accesses atomic. We also need the
1362e11264aSEmilio G. Cota  * volatile-like behavior in atomic_read, since otherwise the compiler
1372e11264aSEmilio G. Cota  * might refetch the pointer.
1382e11264aSEmilio G. Cota  * atomic_read's are of course not necessary when the bucket lock is held.
1392e11264aSEmilio G. Cota  *
1402e11264aSEmilio G. Cota  * If both ht->lock and b->lock are grabbed, ht->lock should always
1412e11264aSEmilio G. Cota  * be grabbed first.
1422e11264aSEmilio G. Cota  */
1432e11264aSEmilio G. Cota struct qht_bucket {
1442e11264aSEmilio G. Cota     QemuSpin lock;
1452e11264aSEmilio G. Cota     QemuSeqLock sequence;
1462e11264aSEmilio G. Cota     uint32_t hashes[QHT_BUCKET_ENTRIES];
1472e11264aSEmilio G. Cota     void *pointers[QHT_BUCKET_ENTRIES];
1482e11264aSEmilio G. Cota     struct qht_bucket *next;
1492e11264aSEmilio G. Cota } QEMU_ALIGNED(QHT_BUCKET_ALIGN);
1502e11264aSEmilio G. Cota 
1512e11264aSEmilio G. Cota QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
1522e11264aSEmilio G. Cota 
1532e11264aSEmilio G. Cota /**
1542e11264aSEmilio G. Cota  * struct qht_map - structure to track an array of buckets
1552e11264aSEmilio G. Cota  * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
1562e11264aSEmilio G. Cota  *       find the whole struct.
1572e11264aSEmilio G. Cota  * @buckets: array of head buckets. It is constant once the map is created.
1582e11264aSEmilio G. Cota  * @n_buckets: number of head buckets. It is constant once the map is created.
1592e11264aSEmilio G. Cota  * @n_added_buckets: number of added (i.e. "non-head") buckets
1602e11264aSEmilio G. Cota  * @n_added_buckets_threshold: threshold to trigger an upward resize once the
1612e11264aSEmilio G. Cota  *                             number of added buckets surpasses it.
1622e11264aSEmilio G. Cota  *
1632e11264aSEmilio G. Cota  * Buckets are tracked in what we call a "map", i.e. this structure.
1642e11264aSEmilio G. Cota  */
1652e11264aSEmilio G. Cota struct qht_map {
1662e11264aSEmilio G. Cota     struct rcu_head rcu;
1672e11264aSEmilio G. Cota     struct qht_bucket *buckets;
1682e11264aSEmilio G. Cota     size_t n_buckets;
1692e11264aSEmilio G. Cota     size_t n_added_buckets;
1702e11264aSEmilio G. Cota     size_t n_added_buckets_threshold;
1712e11264aSEmilio G. Cota };
1722e11264aSEmilio G. Cota 
1732e11264aSEmilio G. Cota /* trigger a resize when n_added_buckets > n_buckets / div */
1742e11264aSEmilio G. Cota #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
1752e11264aSEmilio G. Cota 
17676b553b3SEmilio G. Cota static void qht_do_resize_reset(struct qht *ht, struct qht_map *new,
17776b553b3SEmilio G. Cota                                 bool reset);
1782e11264aSEmilio G. Cota static void qht_grow_maybe(struct qht *ht);
1792e11264aSEmilio G. Cota 
1802e11264aSEmilio G. Cota #ifdef QHT_DEBUG
1812e11264aSEmilio G. Cota 
1822e11264aSEmilio G. Cota #define qht_debug_assert(X) do { assert(X); } while (0)
1832e11264aSEmilio G. Cota 
1842e11264aSEmilio G. Cota static void qht_bucket_debug__locked(struct qht_bucket *b)
1852e11264aSEmilio G. Cota {
1862e11264aSEmilio G. Cota     bool seen_empty = false;
1872e11264aSEmilio G. Cota     bool corrupt = false;
1882e11264aSEmilio G. Cota     int i;
1892e11264aSEmilio G. Cota 
1902e11264aSEmilio G. Cota     do {
1912e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
1922e11264aSEmilio G. Cota             if (b->pointers[i] == NULL) {
1932e11264aSEmilio G. Cota                 seen_empty = true;
1942e11264aSEmilio G. Cota                 continue;
1952e11264aSEmilio G. Cota             }
1962e11264aSEmilio G. Cota             if (seen_empty) {
1972e11264aSEmilio G. Cota                 fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n",
1982e11264aSEmilio G. Cota                         __func__, b, i, b->hashes[i], b->pointers[i]);
1992e11264aSEmilio G. Cota                 corrupt = true;
2002e11264aSEmilio G. Cota             }
2012e11264aSEmilio G. Cota         }
2022e11264aSEmilio G. Cota         b = b->next;
2032e11264aSEmilio G. Cota     } while (b);
2042e11264aSEmilio G. Cota     qht_debug_assert(!corrupt);
2052e11264aSEmilio G. Cota }
2062e11264aSEmilio G. Cota 
2072e11264aSEmilio G. Cota static void qht_map_debug__all_locked(struct qht_map *map)
2082e11264aSEmilio G. Cota {
2092e11264aSEmilio G. Cota     int i;
2102e11264aSEmilio G. Cota 
2112e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
2122e11264aSEmilio G. Cota         qht_bucket_debug__locked(&map->buckets[i]);
2132e11264aSEmilio G. Cota     }
2142e11264aSEmilio G. Cota }
2152e11264aSEmilio G. Cota #else
2162e11264aSEmilio G. Cota 
2172e11264aSEmilio G. Cota #define qht_debug_assert(X) do { (void)(X); } while (0)
2182e11264aSEmilio G. Cota 
2192e11264aSEmilio G. Cota static inline void qht_bucket_debug__locked(struct qht_bucket *b)
2202e11264aSEmilio G. Cota { }
2212e11264aSEmilio G. Cota 
2222e11264aSEmilio G. Cota static inline void qht_map_debug__all_locked(struct qht_map *map)
2232e11264aSEmilio G. Cota { }
2242e11264aSEmilio G. Cota #endif /* QHT_DEBUG */
2252e11264aSEmilio G. Cota 
2262e11264aSEmilio G. Cota static inline size_t qht_elems_to_buckets(size_t n_elems)
2272e11264aSEmilio G. Cota {
2282e11264aSEmilio G. Cota     return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
2292e11264aSEmilio G. Cota }
2302e11264aSEmilio G. Cota 
2312e11264aSEmilio G. Cota static inline void qht_head_init(struct qht_bucket *b)
2322e11264aSEmilio G. Cota {
2332e11264aSEmilio G. Cota     memset(b, 0, sizeof(*b));
2342e11264aSEmilio G. Cota     qemu_spin_init(&b->lock);
2352e11264aSEmilio G. Cota     seqlock_init(&b->sequence);
2362e11264aSEmilio G. Cota }
2372e11264aSEmilio G. Cota 
2382e11264aSEmilio G. Cota static inline
239*e6c58299SEmilio G. Cota struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash)
2402e11264aSEmilio G. Cota {
2412e11264aSEmilio G. Cota     return &map->buckets[hash & (map->n_buckets - 1)];
2422e11264aSEmilio G. Cota }
2432e11264aSEmilio G. Cota 
2442e11264aSEmilio G. Cota /* acquire all bucket locks from a map */
2452e11264aSEmilio G. Cota static void qht_map_lock_buckets(struct qht_map *map)
2462e11264aSEmilio G. Cota {
2472e11264aSEmilio G. Cota     size_t i;
2482e11264aSEmilio G. Cota 
2492e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
2502e11264aSEmilio G. Cota         struct qht_bucket *b = &map->buckets[i];
2512e11264aSEmilio G. Cota 
2522e11264aSEmilio G. Cota         qemu_spin_lock(&b->lock);
2532e11264aSEmilio G. Cota     }
2542e11264aSEmilio G. Cota }
2552e11264aSEmilio G. Cota 
2562e11264aSEmilio G. Cota static void qht_map_unlock_buckets(struct qht_map *map)
2572e11264aSEmilio G. Cota {
2582e11264aSEmilio G. Cota     size_t i;
2592e11264aSEmilio G. Cota 
2602e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
2612e11264aSEmilio G. Cota         struct qht_bucket *b = &map->buckets[i];
2622e11264aSEmilio G. Cota 
2632e11264aSEmilio G. Cota         qemu_spin_unlock(&b->lock);
2642e11264aSEmilio G. Cota     }
2652e11264aSEmilio G. Cota }
2662e11264aSEmilio G. Cota 
2672e11264aSEmilio G. Cota /*
2682e11264aSEmilio G. Cota  * Call with at least a bucket lock held.
2692e11264aSEmilio G. Cota  * @map should be the value read before acquiring the lock (or locks).
2702e11264aSEmilio G. Cota  */
2712e11264aSEmilio G. Cota static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map)
2722e11264aSEmilio G. Cota {
2732e11264aSEmilio G. Cota     return map != ht->map;
2742e11264aSEmilio G. Cota }
2752e11264aSEmilio G. Cota 
2762e11264aSEmilio G. Cota /*
2772e11264aSEmilio G. Cota  * Grab all bucket locks, and set @pmap after making sure the map isn't stale.
2782e11264aSEmilio G. Cota  *
2792e11264aSEmilio G. Cota  * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference.
2802e11264aSEmilio G. Cota  *
2812e11264aSEmilio G. Cota  * Note: callers cannot have ht->lock held.
2822e11264aSEmilio G. Cota  */
2832e11264aSEmilio G. Cota static inline
2842e11264aSEmilio G. Cota void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
2852e11264aSEmilio G. Cota {
2862e11264aSEmilio G. Cota     struct qht_map *map;
2872e11264aSEmilio G. Cota 
2882e11264aSEmilio G. Cota     map = atomic_rcu_read(&ht->map);
2892e11264aSEmilio G. Cota     qht_map_lock_buckets(map);
2902e11264aSEmilio G. Cota     if (likely(!qht_map_is_stale__locked(ht, map))) {
2912e11264aSEmilio G. Cota         *pmap = map;
2922e11264aSEmilio G. Cota         return;
2932e11264aSEmilio G. Cota     }
2942e11264aSEmilio G. Cota     qht_map_unlock_buckets(map);
2952e11264aSEmilio G. Cota 
2962e11264aSEmilio G. Cota     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
297fe9959a2SEmilio G. Cota     qht_lock(ht);
2982e11264aSEmilio G. Cota     map = ht->map;
2992e11264aSEmilio G. Cota     qht_map_lock_buckets(map);
300fe9959a2SEmilio G. Cota     qht_unlock(ht);
3012e11264aSEmilio G. Cota     *pmap = map;
3022e11264aSEmilio G. Cota     return;
3032e11264aSEmilio G. Cota }
3042e11264aSEmilio G. Cota 
3052e11264aSEmilio G. Cota /*
3062e11264aSEmilio G. Cota  * Get a head bucket and lock it, making sure its parent map is not stale.
3072e11264aSEmilio G. Cota  * @pmap is filled with a pointer to the bucket's parent map.
3082e11264aSEmilio G. Cota  *
3092e11264aSEmilio G. Cota  * Unlock with qemu_spin_unlock(&b->lock).
3102e11264aSEmilio G. Cota  *
3112e11264aSEmilio G. Cota  * Note: callers cannot have ht->lock held.
3122e11264aSEmilio G. Cota  */
3132e11264aSEmilio G. Cota static inline
3142e11264aSEmilio G. Cota struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
3152e11264aSEmilio G. Cota                                              struct qht_map **pmap)
3162e11264aSEmilio G. Cota {
3172e11264aSEmilio G. Cota     struct qht_bucket *b;
3182e11264aSEmilio G. Cota     struct qht_map *map;
3192e11264aSEmilio G. Cota 
3202e11264aSEmilio G. Cota     map = atomic_rcu_read(&ht->map);
3212e11264aSEmilio G. Cota     b = qht_map_to_bucket(map, hash);
3222e11264aSEmilio G. Cota 
3232e11264aSEmilio G. Cota     qemu_spin_lock(&b->lock);
3242e11264aSEmilio G. Cota     if (likely(!qht_map_is_stale__locked(ht, map))) {
3252e11264aSEmilio G. Cota         *pmap = map;
3262e11264aSEmilio G. Cota         return b;
3272e11264aSEmilio G. Cota     }
3282e11264aSEmilio G. Cota     qemu_spin_unlock(&b->lock);
3292e11264aSEmilio G. Cota 
3302e11264aSEmilio G. Cota     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
331fe9959a2SEmilio G. Cota     qht_lock(ht);
3322e11264aSEmilio G. Cota     map = ht->map;
3332e11264aSEmilio G. Cota     b = qht_map_to_bucket(map, hash);
3342e11264aSEmilio G. Cota     qemu_spin_lock(&b->lock);
335fe9959a2SEmilio G. Cota     qht_unlock(ht);
3362e11264aSEmilio G. Cota     *pmap = map;
3372e11264aSEmilio G. Cota     return b;
3382e11264aSEmilio G. Cota }
3392e11264aSEmilio G. Cota 
3402e11264aSEmilio G. Cota static inline bool qht_map_needs_resize(struct qht_map *map)
3412e11264aSEmilio G. Cota {
3422e11264aSEmilio G. Cota     return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold;
3432e11264aSEmilio G. Cota }
3442e11264aSEmilio G. Cota 
3452e11264aSEmilio G. Cota static inline void qht_chain_destroy(struct qht_bucket *head)
3462e11264aSEmilio G. Cota {
3472e11264aSEmilio G. Cota     struct qht_bucket *curr = head->next;
3482e11264aSEmilio G. Cota     struct qht_bucket *prev;
3492e11264aSEmilio G. Cota 
3502e11264aSEmilio G. Cota     while (curr) {
3512e11264aSEmilio G. Cota         prev = curr;
3522e11264aSEmilio G. Cota         curr = curr->next;
3532e11264aSEmilio G. Cota         qemu_vfree(prev);
3542e11264aSEmilio G. Cota     }
3552e11264aSEmilio G. Cota }
3562e11264aSEmilio G. Cota 
3572e11264aSEmilio G. Cota /* pass only an orphan map */
3582e11264aSEmilio G. Cota static void qht_map_destroy(struct qht_map *map)
3592e11264aSEmilio G. Cota {
3602e11264aSEmilio G. Cota     size_t i;
3612e11264aSEmilio G. Cota 
3622e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
3632e11264aSEmilio G. Cota         qht_chain_destroy(&map->buckets[i]);
3642e11264aSEmilio G. Cota     }
3652e11264aSEmilio G. Cota     qemu_vfree(map->buckets);
3662e11264aSEmilio G. Cota     g_free(map);
3672e11264aSEmilio G. Cota }
3682e11264aSEmilio G. Cota 
3692e11264aSEmilio G. Cota static struct qht_map *qht_map_create(size_t n_buckets)
3702e11264aSEmilio G. Cota {
3712e11264aSEmilio G. Cota     struct qht_map *map;
3722e11264aSEmilio G. Cota     size_t i;
3732e11264aSEmilio G. Cota 
3742e11264aSEmilio G. Cota     map = g_malloc(sizeof(*map));
3752e11264aSEmilio G. Cota     map->n_buckets = n_buckets;
3762e11264aSEmilio G. Cota 
3772e11264aSEmilio G. Cota     map->n_added_buckets = 0;
3782e11264aSEmilio G. Cota     map->n_added_buckets_threshold = n_buckets /
3792e11264aSEmilio G. Cota         QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV;
3802e11264aSEmilio G. Cota 
3812e11264aSEmilio G. Cota     /* let tiny hash tables to at least add one non-head bucket */
3822e11264aSEmilio G. Cota     if (unlikely(map->n_added_buckets_threshold == 0)) {
3832e11264aSEmilio G. Cota         map->n_added_buckets_threshold = 1;
3842e11264aSEmilio G. Cota     }
3852e11264aSEmilio G. Cota 
3862e11264aSEmilio G. Cota     map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
3872e11264aSEmilio G. Cota                                  sizeof(*map->buckets) * n_buckets);
3882e11264aSEmilio G. Cota     for (i = 0; i < n_buckets; i++) {
3892e11264aSEmilio G. Cota         qht_head_init(&map->buckets[i]);
3902e11264aSEmilio G. Cota     }
3912e11264aSEmilio G. Cota     return map;
3922e11264aSEmilio G. Cota }
3932e11264aSEmilio G. Cota 
39461b8cef1SEmilio G. Cota void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
39561b8cef1SEmilio G. Cota               unsigned int mode)
3962e11264aSEmilio G. Cota {
3972e11264aSEmilio G. Cota     struct qht_map *map;
3982e11264aSEmilio G. Cota     size_t n_buckets = qht_elems_to_buckets(n_elems);
3992e11264aSEmilio G. Cota 
40061b8cef1SEmilio G. Cota     g_assert(cmp);
40161b8cef1SEmilio G. Cota     ht->cmp = cmp;
4022e11264aSEmilio G. Cota     ht->mode = mode;
4032e11264aSEmilio G. Cota     qemu_mutex_init(&ht->lock);
4042e11264aSEmilio G. Cota     map = qht_map_create(n_buckets);
4052e11264aSEmilio G. Cota     atomic_rcu_set(&ht->map, map);
4062e11264aSEmilio G. Cota }
4072e11264aSEmilio G. Cota 
4082e11264aSEmilio G. Cota /* call only when there are no readers/writers left */
4092e11264aSEmilio G. Cota void qht_destroy(struct qht *ht)
4102e11264aSEmilio G. Cota {
4112e11264aSEmilio G. Cota     qht_map_destroy(ht->map);
4122e11264aSEmilio G. Cota     memset(ht, 0, sizeof(*ht));
4132e11264aSEmilio G. Cota }
4142e11264aSEmilio G. Cota 
4152e11264aSEmilio G. Cota static void qht_bucket_reset__locked(struct qht_bucket *head)
4162e11264aSEmilio G. Cota {
4172e11264aSEmilio G. Cota     struct qht_bucket *b = head;
4182e11264aSEmilio G. Cota     int i;
4192e11264aSEmilio G. Cota 
4202e11264aSEmilio G. Cota     seqlock_write_begin(&head->sequence);
4212e11264aSEmilio G. Cota     do {
4222e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
4232e11264aSEmilio G. Cota             if (b->pointers[i] == NULL) {
4242e11264aSEmilio G. Cota                 goto done;
4252e11264aSEmilio G. Cota             }
426a8906439SAlex Bennée             atomic_set(&b->hashes[i], 0);
4272e11264aSEmilio G. Cota             atomic_set(&b->pointers[i], NULL);
4282e11264aSEmilio G. Cota         }
4292e11264aSEmilio G. Cota         b = b->next;
4302e11264aSEmilio G. Cota     } while (b);
4312e11264aSEmilio G. Cota  done:
4322e11264aSEmilio G. Cota     seqlock_write_end(&head->sequence);
4332e11264aSEmilio G. Cota }
4342e11264aSEmilio G. Cota 
4352e11264aSEmilio G. Cota /* call with all bucket locks held */
4362e11264aSEmilio G. Cota static void qht_map_reset__all_locked(struct qht_map *map)
4372e11264aSEmilio G. Cota {
4382e11264aSEmilio G. Cota     size_t i;
4392e11264aSEmilio G. Cota 
4402e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
4412e11264aSEmilio G. Cota         qht_bucket_reset__locked(&map->buckets[i]);
4422e11264aSEmilio G. Cota     }
4432e11264aSEmilio G. Cota     qht_map_debug__all_locked(map);
4442e11264aSEmilio G. Cota }
4452e11264aSEmilio G. Cota 
4462e11264aSEmilio G. Cota void qht_reset(struct qht *ht)
4472e11264aSEmilio G. Cota {
4482e11264aSEmilio G. Cota     struct qht_map *map;
4492e11264aSEmilio G. Cota 
4502e11264aSEmilio G. Cota     qht_map_lock_buckets__no_stale(ht, &map);
4512e11264aSEmilio G. Cota     qht_map_reset__all_locked(map);
4522e11264aSEmilio G. Cota     qht_map_unlock_buckets(map);
4532e11264aSEmilio G. Cota }
4542e11264aSEmilio G. Cota 
45576b553b3SEmilio G. Cota static inline void qht_do_resize(struct qht *ht, struct qht_map *new)
45676b553b3SEmilio G. Cota {
45776b553b3SEmilio G. Cota     qht_do_resize_reset(ht, new, false);
45876b553b3SEmilio G. Cota }
45976b553b3SEmilio G. Cota 
46076b553b3SEmilio G. Cota static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new)
46176b553b3SEmilio G. Cota {
46276b553b3SEmilio G. Cota     qht_do_resize_reset(ht, new, true);
46376b553b3SEmilio G. Cota }
46476b553b3SEmilio G. Cota 
4652e11264aSEmilio G. Cota bool qht_reset_size(struct qht *ht, size_t n_elems)
4662e11264aSEmilio G. Cota {
467f555a9d0SEmilio G. Cota     struct qht_map *new = NULL;
4682e11264aSEmilio G. Cota     struct qht_map *map;
4692e11264aSEmilio G. Cota     size_t n_buckets;
4702e11264aSEmilio G. Cota 
4712e11264aSEmilio G. Cota     n_buckets = qht_elems_to_buckets(n_elems);
4722e11264aSEmilio G. Cota 
473fe9959a2SEmilio G. Cota     qht_lock(ht);
4742e11264aSEmilio G. Cota     map = ht->map;
4752e11264aSEmilio G. Cota     if (n_buckets != map->n_buckets) {
4762e11264aSEmilio G. Cota         new = qht_map_create(n_buckets);
4772e11264aSEmilio G. Cota     }
47876b553b3SEmilio G. Cota     qht_do_resize_and_reset(ht, new);
479fe9959a2SEmilio G. Cota     qht_unlock(ht);
4802e11264aSEmilio G. Cota 
481f555a9d0SEmilio G. Cota     return !!new;
4822e11264aSEmilio G. Cota }
4832e11264aSEmilio G. Cota 
4842e11264aSEmilio G. Cota static inline
485*e6c58299SEmilio G. Cota void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func,
4862e11264aSEmilio G. Cota                     const void *userp, uint32_t hash)
4872e11264aSEmilio G. Cota {
488*e6c58299SEmilio G. Cota     const struct qht_bucket *b = head;
4892e11264aSEmilio G. Cota     int i;
4902e11264aSEmilio G. Cota 
4912e11264aSEmilio G. Cota     do {
4922e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
493a8906439SAlex Bennée             if (atomic_read(&b->hashes[i]) == hash) {
49434506b30SPaolo Bonzini                 /* The pointer is dereferenced before seqlock_read_retry,
49534506b30SPaolo Bonzini                  * so (unlike qht_insert__locked) we need to use
49634506b30SPaolo Bonzini                  * atomic_rcu_read here.
49734506b30SPaolo Bonzini                  */
49834506b30SPaolo Bonzini                 void *p = atomic_rcu_read(&b->pointers[i]);
4992e11264aSEmilio G. Cota 
5002e11264aSEmilio G. Cota                 if (likely(p) && likely(func(p, userp))) {
5012e11264aSEmilio G. Cota                     return p;
5022e11264aSEmilio G. Cota                 }
5032e11264aSEmilio G. Cota             }
5042e11264aSEmilio G. Cota         }
5052e11264aSEmilio G. Cota         b = atomic_rcu_read(&b->next);
5062e11264aSEmilio G. Cota     } while (b);
5072e11264aSEmilio G. Cota 
5082e11264aSEmilio G. Cota     return NULL;
5092e11264aSEmilio G. Cota }
5102e11264aSEmilio G. Cota 
5112e11264aSEmilio G. Cota static __attribute__((noinline))
512*e6c58299SEmilio G. Cota void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func,
5132e11264aSEmilio G. Cota                            const void *userp, uint32_t hash)
5142e11264aSEmilio G. Cota {
5152e11264aSEmilio G. Cota     unsigned int version;
5162e11264aSEmilio G. Cota     void *ret;
5172e11264aSEmilio G. Cota 
5182e11264aSEmilio G. Cota     do {
5192e11264aSEmilio G. Cota         version = seqlock_read_begin(&b->sequence);
5202e11264aSEmilio G. Cota         ret = qht_do_lookup(b, func, userp, hash);
5212e11264aSEmilio G. Cota     } while (seqlock_read_retry(&b->sequence, version));
5222e11264aSEmilio G. Cota     return ret;
5232e11264aSEmilio G. Cota }
5242e11264aSEmilio G. Cota 
525*e6c58299SEmilio G. Cota void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash,
52661b8cef1SEmilio G. Cota                         qht_lookup_func_t func)
5272e11264aSEmilio G. Cota {
528*e6c58299SEmilio G. Cota     const struct qht_bucket *b;
529*e6c58299SEmilio G. Cota     const struct qht_map *map;
5302e11264aSEmilio G. Cota     unsigned int version;
5312e11264aSEmilio G. Cota     void *ret;
5322e11264aSEmilio G. Cota 
5332e11264aSEmilio G. Cota     map = atomic_rcu_read(&ht->map);
5342e11264aSEmilio G. Cota     b = qht_map_to_bucket(map, hash);
5352e11264aSEmilio G. Cota 
5362e11264aSEmilio G. Cota     version = seqlock_read_begin(&b->sequence);
5372e11264aSEmilio G. Cota     ret = qht_do_lookup(b, func, userp, hash);
5382e11264aSEmilio G. Cota     if (likely(!seqlock_read_retry(&b->sequence, version))) {
5392e11264aSEmilio G. Cota         return ret;
5402e11264aSEmilio G. Cota     }
5412e11264aSEmilio G. Cota     /*
5422e11264aSEmilio G. Cota      * Removing the do/while from the fastpath gives a 4% perf. increase when
5432e11264aSEmilio G. Cota      * running a 100%-lookup microbenchmark.
5442e11264aSEmilio G. Cota      */
5452e11264aSEmilio G. Cota     return qht_lookup__slowpath(b, func, userp, hash);
5462e11264aSEmilio G. Cota }
5472e11264aSEmilio G. Cota 
548*e6c58299SEmilio G. Cota void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash)
54961b8cef1SEmilio G. Cota {
55061b8cef1SEmilio G. Cota     return qht_lookup_custom(ht, userp, hash, ht->cmp);
55161b8cef1SEmilio G. Cota }
55261b8cef1SEmilio G. Cota 
5532e11264aSEmilio G. Cota /* call with head->lock held */
55432359d52SEmilio G. Cota static void *qht_insert__locked(struct qht *ht, struct qht_map *map,
5552e11264aSEmilio G. Cota                                 struct qht_bucket *head, void *p, uint32_t hash,
5562e11264aSEmilio G. Cota                                 bool *needs_resize)
5572e11264aSEmilio G. Cota {
5582e11264aSEmilio G. Cota     struct qht_bucket *b = head;
5592e11264aSEmilio G. Cota     struct qht_bucket *prev = NULL;
5602e11264aSEmilio G. Cota     struct qht_bucket *new = NULL;
5612e11264aSEmilio G. Cota     int i;
5622e11264aSEmilio G. Cota 
5632e11264aSEmilio G. Cota     do {
5642e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
5652e11264aSEmilio G. Cota             if (b->pointers[i]) {
56632359d52SEmilio G. Cota                 if (unlikely(b->hashes[i] == hash &&
56732359d52SEmilio G. Cota                              ht->cmp(b->pointers[i], p))) {
56832359d52SEmilio G. Cota                     return b->pointers[i];
5692e11264aSEmilio G. Cota                 }
5702e11264aSEmilio G. Cota             } else {
5712e11264aSEmilio G. Cota                 goto found;
5722e11264aSEmilio G. Cota             }
5732e11264aSEmilio G. Cota         }
5742e11264aSEmilio G. Cota         prev = b;
5752e11264aSEmilio G. Cota         b = b->next;
5762e11264aSEmilio G. Cota     } while (b);
5772e11264aSEmilio G. Cota 
5782e11264aSEmilio G. Cota     b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b));
5792e11264aSEmilio G. Cota     memset(b, 0, sizeof(*b));
5802e11264aSEmilio G. Cota     new = b;
5812e11264aSEmilio G. Cota     i = 0;
5822e11264aSEmilio G. Cota     atomic_inc(&map->n_added_buckets);
5832e11264aSEmilio G. Cota     if (unlikely(qht_map_needs_resize(map)) && needs_resize) {
5842e11264aSEmilio G. Cota         *needs_resize = true;
5852e11264aSEmilio G. Cota     }
5862e11264aSEmilio G. Cota 
5872e11264aSEmilio G. Cota  found:
5882e11264aSEmilio G. Cota     /* found an empty key: acquire the seqlock and write */
5892e11264aSEmilio G. Cota     seqlock_write_begin(&head->sequence);
5902e11264aSEmilio G. Cota     if (new) {
5912e11264aSEmilio G. Cota         atomic_rcu_set(&prev->next, b);
5922e11264aSEmilio G. Cota     }
59334506b30SPaolo Bonzini     /* smp_wmb() implicit in seqlock_write_begin.  */
594a8906439SAlex Bennée     atomic_set(&b->hashes[i], hash);
5952e11264aSEmilio G. Cota     atomic_set(&b->pointers[i], p);
5962e11264aSEmilio G. Cota     seqlock_write_end(&head->sequence);
59732359d52SEmilio G. Cota     return NULL;
5982e11264aSEmilio G. Cota }
5992e11264aSEmilio G. Cota 
6002e11264aSEmilio G. Cota static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
6012e11264aSEmilio G. Cota {
6022e11264aSEmilio G. Cota     struct qht_map *map;
6032e11264aSEmilio G. Cota 
6042e11264aSEmilio G. Cota     /*
6052e11264aSEmilio G. Cota      * If the lock is taken it probably means there's an ongoing resize,
6062e11264aSEmilio G. Cota      * so bail out.
6072e11264aSEmilio G. Cota      */
608fe9959a2SEmilio G. Cota     if (qht_trylock(ht)) {
6092e11264aSEmilio G. Cota         return;
6102e11264aSEmilio G. Cota     }
6112e11264aSEmilio G. Cota     map = ht->map;
6122e11264aSEmilio G. Cota     /* another thread might have just performed the resize we were after */
6132e11264aSEmilio G. Cota     if (qht_map_needs_resize(map)) {
6142e11264aSEmilio G. Cota         struct qht_map *new = qht_map_create(map->n_buckets * 2);
6152e11264aSEmilio G. Cota 
6162e11264aSEmilio G. Cota         qht_do_resize(ht, new);
6172e11264aSEmilio G. Cota     }
618fe9959a2SEmilio G. Cota     qht_unlock(ht);
6192e11264aSEmilio G. Cota }
6202e11264aSEmilio G. Cota 
62132359d52SEmilio G. Cota bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
6222e11264aSEmilio G. Cota {
6232e11264aSEmilio G. Cota     struct qht_bucket *b;
6242e11264aSEmilio G. Cota     struct qht_map *map;
6252e11264aSEmilio G. Cota     bool needs_resize = false;
62632359d52SEmilio G. Cota     void *prev;
6272e11264aSEmilio G. Cota 
6282e11264aSEmilio G. Cota     /* NULL pointers are not supported */
6292e11264aSEmilio G. Cota     qht_debug_assert(p);
6302e11264aSEmilio G. Cota 
6312e11264aSEmilio G. Cota     b = qht_bucket_lock__no_stale(ht, hash, &map);
63232359d52SEmilio G. Cota     prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
6332e11264aSEmilio G. Cota     qht_bucket_debug__locked(b);
6342e11264aSEmilio G. Cota     qemu_spin_unlock(&b->lock);
6352e11264aSEmilio G. Cota 
6362e11264aSEmilio G. Cota     if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
6372e11264aSEmilio G. Cota         qht_grow_maybe(ht);
6382e11264aSEmilio G. Cota     }
63932359d52SEmilio G. Cota     if (likely(prev == NULL)) {
64032359d52SEmilio G. Cota         return true;
64132359d52SEmilio G. Cota     }
64232359d52SEmilio G. Cota     if (existing) {
64332359d52SEmilio G. Cota         *existing = prev;
64432359d52SEmilio G. Cota     }
64532359d52SEmilio G. Cota     return false;
6462e11264aSEmilio G. Cota }
6472e11264aSEmilio G. Cota 
6482e11264aSEmilio G. Cota static inline bool qht_entry_is_last(struct qht_bucket *b, int pos)
6492e11264aSEmilio G. Cota {
6502e11264aSEmilio G. Cota     if (pos == QHT_BUCKET_ENTRIES - 1) {
6512e11264aSEmilio G. Cota         if (b->next == NULL) {
6522e11264aSEmilio G. Cota             return true;
6532e11264aSEmilio G. Cota         }
6542e11264aSEmilio G. Cota         return b->next->pointers[0] == NULL;
6552e11264aSEmilio G. Cota     }
6562e11264aSEmilio G. Cota     return b->pointers[pos + 1] == NULL;
6572e11264aSEmilio G. Cota }
6582e11264aSEmilio G. Cota 
6592e11264aSEmilio G. Cota static void
6602e11264aSEmilio G. Cota qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j)
6612e11264aSEmilio G. Cota {
6622e11264aSEmilio G. Cota     qht_debug_assert(!(to == from && i == j));
6632e11264aSEmilio G. Cota     qht_debug_assert(to->pointers[i]);
6642e11264aSEmilio G. Cota     qht_debug_assert(from->pointers[j]);
6652e11264aSEmilio G. Cota 
666a8906439SAlex Bennée     atomic_set(&to->hashes[i], from->hashes[j]);
6672e11264aSEmilio G. Cota     atomic_set(&to->pointers[i], from->pointers[j]);
6682e11264aSEmilio G. Cota 
669a8906439SAlex Bennée     atomic_set(&from->hashes[j], 0);
6702e11264aSEmilio G. Cota     atomic_set(&from->pointers[j], NULL);
6712e11264aSEmilio G. Cota }
6722e11264aSEmilio G. Cota 
6732e11264aSEmilio G. Cota /*
6749650ad3eSEmilio G. Cota  * Find the last valid entry in @orig, and swap it with @orig[pos], which has
6752e11264aSEmilio G. Cota  * just been invalidated.
6762e11264aSEmilio G. Cota  */
6772e11264aSEmilio G. Cota static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
6782e11264aSEmilio G. Cota {
6792e11264aSEmilio G. Cota     struct qht_bucket *b = orig;
6802e11264aSEmilio G. Cota     struct qht_bucket *prev = NULL;
6812e11264aSEmilio G. Cota     int i;
6822e11264aSEmilio G. Cota 
6832e11264aSEmilio G. Cota     if (qht_entry_is_last(orig, pos)) {
6842e11264aSEmilio G. Cota         orig->hashes[pos] = 0;
6852e11264aSEmilio G. Cota         atomic_set(&orig->pointers[pos], NULL);
6862e11264aSEmilio G. Cota         return;
6872e11264aSEmilio G. Cota     }
6882e11264aSEmilio G. Cota     do {
6892e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
6902e11264aSEmilio G. Cota             if (b->pointers[i]) {
6912e11264aSEmilio G. Cota                 continue;
6922e11264aSEmilio G. Cota             }
6932e11264aSEmilio G. Cota             if (i > 0) {
6942e11264aSEmilio G. Cota                 return qht_entry_move(orig, pos, b, i - 1);
6952e11264aSEmilio G. Cota             }
6962e11264aSEmilio G. Cota             qht_debug_assert(prev);
6972e11264aSEmilio G. Cota             return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
6982e11264aSEmilio G. Cota         }
6992e11264aSEmilio G. Cota         prev = b;
7002e11264aSEmilio G. Cota         b = b->next;
7012e11264aSEmilio G. Cota     } while (b);
7022e11264aSEmilio G. Cota     /* no free entries other than orig[pos], so swap it with the last one */
7032e11264aSEmilio G. Cota     qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
7042e11264aSEmilio G. Cota }
7052e11264aSEmilio G. Cota 
7062e11264aSEmilio G. Cota /* call with b->lock held */
7072e11264aSEmilio G. Cota static inline
708e2f07efaSEmilio G. Cota bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash)
7092e11264aSEmilio G. Cota {
7102e11264aSEmilio G. Cota     struct qht_bucket *b = head;
7112e11264aSEmilio G. Cota     int i;
7122e11264aSEmilio G. Cota 
7132e11264aSEmilio G. Cota     do {
7142e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
7152e11264aSEmilio G. Cota             void *q = b->pointers[i];
7162e11264aSEmilio G. Cota 
7172e11264aSEmilio G. Cota             if (unlikely(q == NULL)) {
7182e11264aSEmilio G. Cota                 return false;
7192e11264aSEmilio G. Cota             }
7202e11264aSEmilio G. Cota             if (q == p) {
7212e11264aSEmilio G. Cota                 qht_debug_assert(b->hashes[i] == hash);
7222e11264aSEmilio G. Cota                 seqlock_write_begin(&head->sequence);
7232e11264aSEmilio G. Cota                 qht_bucket_remove_entry(b, i);
7242e11264aSEmilio G. Cota                 seqlock_write_end(&head->sequence);
7252e11264aSEmilio G. Cota                 return true;
7262e11264aSEmilio G. Cota             }
7272e11264aSEmilio G. Cota         }
7282e11264aSEmilio G. Cota         b = b->next;
7292e11264aSEmilio G. Cota     } while (b);
7302e11264aSEmilio G. Cota     return false;
7312e11264aSEmilio G. Cota }
7322e11264aSEmilio G. Cota 
7332e11264aSEmilio G. Cota bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
7342e11264aSEmilio G. Cota {
7352e11264aSEmilio G. Cota     struct qht_bucket *b;
7362e11264aSEmilio G. Cota     struct qht_map *map;
7372e11264aSEmilio G. Cota     bool ret;
7382e11264aSEmilio G. Cota 
7392e11264aSEmilio G. Cota     /* NULL pointers are not supported */
7402e11264aSEmilio G. Cota     qht_debug_assert(p);
7412e11264aSEmilio G. Cota 
7422e11264aSEmilio G. Cota     b = qht_bucket_lock__no_stale(ht, hash, &map);
743e2f07efaSEmilio G. Cota     ret = qht_remove__locked(b, p, hash);
7442e11264aSEmilio G. Cota     qht_bucket_debug__locked(b);
7452e11264aSEmilio G. Cota     qemu_spin_unlock(&b->lock);
7462e11264aSEmilio G. Cota     return ret;
7472e11264aSEmilio G. Cota }
7482e11264aSEmilio G. Cota 
74978255ba2SEmilio G. Cota static inline void qht_bucket_iter(struct qht_bucket *head,
75069d55e9cSEmilio G. Cota                                    const struct qht_iter *iter, void *userp)
7512e11264aSEmilio G. Cota {
75269d55e9cSEmilio G. Cota     struct qht_bucket *b = head;
7532e11264aSEmilio G. Cota     int i;
7542e11264aSEmilio G. Cota 
7552e11264aSEmilio G. Cota     do {
7562e11264aSEmilio G. Cota         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
7572e11264aSEmilio G. Cota             if (b->pointers[i] == NULL) {
7582e11264aSEmilio G. Cota                 return;
7592e11264aSEmilio G. Cota             }
76069d55e9cSEmilio G. Cota             switch (iter->type) {
76169d55e9cSEmilio G. Cota             case QHT_ITER_VOID:
76278255ba2SEmilio G. Cota                 iter->f.retvoid(b->pointers[i], b->hashes[i], userp);
76369d55e9cSEmilio G. Cota                 break;
76469d55e9cSEmilio G. Cota             case QHT_ITER_RM:
76578255ba2SEmilio G. Cota                 if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) {
76669d55e9cSEmilio G. Cota                     /* replace i with the last valid element in the bucket */
76769d55e9cSEmilio G. Cota                     seqlock_write_begin(&head->sequence);
76869d55e9cSEmilio G. Cota                     qht_bucket_remove_entry(b, i);
76969d55e9cSEmilio G. Cota                     seqlock_write_end(&head->sequence);
77069d55e9cSEmilio G. Cota                     qht_bucket_debug__locked(b);
77169d55e9cSEmilio G. Cota                     /* reevaluate i, since it just got replaced */
77269d55e9cSEmilio G. Cota                     i--;
77369d55e9cSEmilio G. Cota                     continue;
77469d55e9cSEmilio G. Cota                 }
77569d55e9cSEmilio G. Cota                 break;
77669d55e9cSEmilio G. Cota             default:
77769d55e9cSEmilio G. Cota                 g_assert_not_reached();
77869d55e9cSEmilio G. Cota             }
7792e11264aSEmilio G. Cota         }
7802e11264aSEmilio G. Cota         b = b->next;
7812e11264aSEmilio G. Cota     } while (b);
7822e11264aSEmilio G. Cota }
7832e11264aSEmilio G. Cota 
7842e11264aSEmilio G. Cota /* call with all of the map's locks held */
78578255ba2SEmilio G. Cota static inline void qht_map_iter__all_locked(struct qht_map *map,
78669d55e9cSEmilio G. Cota                                             const struct qht_iter *iter,
78769d55e9cSEmilio G. Cota                                             void *userp)
7882e11264aSEmilio G. Cota {
7892e11264aSEmilio G. Cota     size_t i;
7902e11264aSEmilio G. Cota 
7912e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
79278255ba2SEmilio G. Cota         qht_bucket_iter(&map->buckets[i], iter, userp);
7932e11264aSEmilio G. Cota     }
7942e11264aSEmilio G. Cota }
7952e11264aSEmilio G. Cota 
79669d55e9cSEmilio G. Cota static inline void
79769d55e9cSEmilio G. Cota do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp)
7982e11264aSEmilio G. Cota {
7992e11264aSEmilio G. Cota     struct qht_map *map;
8002e11264aSEmilio G. Cota 
8012e11264aSEmilio G. Cota     map = atomic_rcu_read(&ht->map);
8022e11264aSEmilio G. Cota     qht_map_lock_buckets(map);
80378255ba2SEmilio G. Cota     qht_map_iter__all_locked(map, iter, userp);
8042e11264aSEmilio G. Cota     qht_map_unlock_buckets(map);
8052e11264aSEmilio G. Cota }
8062e11264aSEmilio G. Cota 
80769d55e9cSEmilio G. Cota void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp)
80869d55e9cSEmilio G. Cota {
80969d55e9cSEmilio G. Cota     const struct qht_iter iter = {
81069d55e9cSEmilio G. Cota         .f.retvoid = func,
81169d55e9cSEmilio G. Cota         .type = QHT_ITER_VOID,
81269d55e9cSEmilio G. Cota     };
81369d55e9cSEmilio G. Cota 
81469d55e9cSEmilio G. Cota     do_qht_iter(ht, &iter, userp);
81569d55e9cSEmilio G. Cota }
81669d55e9cSEmilio G. Cota 
81769d55e9cSEmilio G. Cota void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp)
81869d55e9cSEmilio G. Cota {
81969d55e9cSEmilio G. Cota     const struct qht_iter iter = {
82069d55e9cSEmilio G. Cota         .f.retbool = func,
82169d55e9cSEmilio G. Cota         .type = QHT_ITER_RM,
82269d55e9cSEmilio G. Cota     };
82369d55e9cSEmilio G. Cota 
82469d55e9cSEmilio G. Cota     do_qht_iter(ht, &iter, userp);
82569d55e9cSEmilio G. Cota }
82669d55e9cSEmilio G. Cota 
82778255ba2SEmilio G. Cota struct qht_map_copy_data {
82878255ba2SEmilio G. Cota     struct qht *ht;
82978255ba2SEmilio G. Cota     struct qht_map *new;
83078255ba2SEmilio G. Cota };
83178255ba2SEmilio G. Cota 
83278255ba2SEmilio G. Cota static void qht_map_copy(void *p, uint32_t hash, void *userp)
8332e11264aSEmilio G. Cota {
83478255ba2SEmilio G. Cota     struct qht_map_copy_data *data = userp;
83578255ba2SEmilio G. Cota     struct qht *ht = data->ht;
83678255ba2SEmilio G. Cota     struct qht_map *new = data->new;
8372e11264aSEmilio G. Cota     struct qht_bucket *b = qht_map_to_bucket(new, hash);
8382e11264aSEmilio G. Cota 
8392e11264aSEmilio G. Cota     /* no need to acquire b->lock because no thread has seen this map yet */
8402e11264aSEmilio G. Cota     qht_insert__locked(ht, new, b, p, hash, NULL);
8412e11264aSEmilio G. Cota }
8422e11264aSEmilio G. Cota 
8432e11264aSEmilio G. Cota /*
84476b553b3SEmilio G. Cota  * Atomically perform a resize and/or reset.
84576b553b3SEmilio G. Cota  * Call with ht->lock held.
8462e11264aSEmilio G. Cota  */
84776b553b3SEmilio G. Cota static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset)
8482e11264aSEmilio G. Cota {
8492e11264aSEmilio G. Cota     struct qht_map *old;
85069d55e9cSEmilio G. Cota     const struct qht_iter iter = {
85169d55e9cSEmilio G. Cota         .f.retvoid = qht_map_copy,
85269d55e9cSEmilio G. Cota         .type = QHT_ITER_VOID,
85369d55e9cSEmilio G. Cota     };
85478255ba2SEmilio G. Cota     struct qht_map_copy_data data;
8552e11264aSEmilio G. Cota 
8562e11264aSEmilio G. Cota     old = ht->map;
85776b553b3SEmilio G. Cota     qht_map_lock_buckets(old);
8582e11264aSEmilio G. Cota 
85976b553b3SEmilio G. Cota     if (reset) {
86076b553b3SEmilio G. Cota         qht_map_reset__all_locked(old);
86176b553b3SEmilio G. Cota     }
86276b553b3SEmilio G. Cota 
86376b553b3SEmilio G. Cota     if (new == NULL) {
86476b553b3SEmilio G. Cota         qht_map_unlock_buckets(old);
86576b553b3SEmilio G. Cota         return;
86676b553b3SEmilio G. Cota     }
86776b553b3SEmilio G. Cota 
868719a3077SMarkus Armbruster     g_assert(new->n_buckets != old->n_buckets);
86978255ba2SEmilio G. Cota     data.ht = ht;
87078255ba2SEmilio G. Cota     data.new = new;
87178255ba2SEmilio G. Cota     qht_map_iter__all_locked(old, &iter, &data);
8722e11264aSEmilio G. Cota     qht_map_debug__all_locked(new);
8732e11264aSEmilio G. Cota 
8742e11264aSEmilio G. Cota     atomic_rcu_set(&ht->map, new);
87576b553b3SEmilio G. Cota     qht_map_unlock_buckets(old);
8762e11264aSEmilio G. Cota     call_rcu(old, qht_map_destroy, rcu);
8772e11264aSEmilio G. Cota }
8782e11264aSEmilio G. Cota 
8792e11264aSEmilio G. Cota bool qht_resize(struct qht *ht, size_t n_elems)
8802e11264aSEmilio G. Cota {
8812e11264aSEmilio G. Cota     size_t n_buckets = qht_elems_to_buckets(n_elems);
8822e11264aSEmilio G. Cota     size_t ret = false;
8832e11264aSEmilio G. Cota 
884fe9959a2SEmilio G. Cota     qht_lock(ht);
8852e11264aSEmilio G. Cota     if (n_buckets != ht->map->n_buckets) {
8862e11264aSEmilio G. Cota         struct qht_map *new;
8872e11264aSEmilio G. Cota 
8882e11264aSEmilio G. Cota         new = qht_map_create(n_buckets);
8892e11264aSEmilio G. Cota         qht_do_resize(ht, new);
8902e11264aSEmilio G. Cota         ret = true;
8912e11264aSEmilio G. Cota     }
892fe9959a2SEmilio G. Cota     qht_unlock(ht);
8932e11264aSEmilio G. Cota 
8942e11264aSEmilio G. Cota     return ret;
8952e11264aSEmilio G. Cota }
8962e11264aSEmilio G. Cota 
8972e11264aSEmilio G. Cota /* pass @stats to qht_statistics_destroy() when done */
8982e11264aSEmilio G. Cota void qht_statistics_init(struct qht *ht, struct qht_stats *stats)
8992e11264aSEmilio G. Cota {
9002e11264aSEmilio G. Cota     struct qht_map *map;
9012e11264aSEmilio G. Cota     int i;
9022e11264aSEmilio G. Cota 
9032e11264aSEmilio G. Cota     map = atomic_rcu_read(&ht->map);
9042e11264aSEmilio G. Cota 
9052e11264aSEmilio G. Cota     stats->used_head_buckets = 0;
9062e11264aSEmilio G. Cota     stats->entries = 0;
9072e11264aSEmilio G. Cota     qdist_init(&stats->chain);
9082e11264aSEmilio G. Cota     qdist_init(&stats->occupancy);
9097266ae91SEmilio G. Cota     /* bail out if the qht has not yet been initialized */
9107266ae91SEmilio G. Cota     if (unlikely(map == NULL)) {
9117266ae91SEmilio G. Cota         stats->head_buckets = 0;
9127266ae91SEmilio G. Cota         return;
9137266ae91SEmilio G. Cota     }
9147266ae91SEmilio G. Cota     stats->head_buckets = map->n_buckets;
9152e11264aSEmilio G. Cota 
9162e11264aSEmilio G. Cota     for (i = 0; i < map->n_buckets; i++) {
9172e11264aSEmilio G. Cota         struct qht_bucket *head = &map->buckets[i];
9182e11264aSEmilio G. Cota         struct qht_bucket *b;
9192e11264aSEmilio G. Cota         unsigned int version;
9202e11264aSEmilio G. Cota         size_t buckets;
9212e11264aSEmilio G. Cota         size_t entries;
9222e11264aSEmilio G. Cota         int j;
9232e11264aSEmilio G. Cota 
9242e11264aSEmilio G. Cota         do {
9252e11264aSEmilio G. Cota             version = seqlock_read_begin(&head->sequence);
9262e11264aSEmilio G. Cota             buckets = 0;
9272e11264aSEmilio G. Cota             entries = 0;
9282e11264aSEmilio G. Cota             b = head;
9292e11264aSEmilio G. Cota             do {
9302e11264aSEmilio G. Cota                 for (j = 0; j < QHT_BUCKET_ENTRIES; j++) {
9312e11264aSEmilio G. Cota                     if (atomic_read(&b->pointers[j]) == NULL) {
9322e11264aSEmilio G. Cota                         break;
9332e11264aSEmilio G. Cota                     }
9342e11264aSEmilio G. Cota                     entries++;
9352e11264aSEmilio G. Cota                 }
9362e11264aSEmilio G. Cota                 buckets++;
9372e11264aSEmilio G. Cota                 b = atomic_rcu_read(&b->next);
9382e11264aSEmilio G. Cota             } while (b);
9392e11264aSEmilio G. Cota         } while (seqlock_read_retry(&head->sequence, version));
9402e11264aSEmilio G. Cota 
9412e11264aSEmilio G. Cota         if (entries) {
9422e11264aSEmilio G. Cota             qdist_inc(&stats->chain, buckets);
9432e11264aSEmilio G. Cota             qdist_inc(&stats->occupancy,
9442e11264aSEmilio G. Cota                       (double)entries / QHT_BUCKET_ENTRIES / buckets);
9452e11264aSEmilio G. Cota             stats->used_head_buckets++;
9462e11264aSEmilio G. Cota             stats->entries += entries;
9472e11264aSEmilio G. Cota         } else {
9482e11264aSEmilio G. Cota             qdist_inc(&stats->occupancy, 0);
9492e11264aSEmilio G. Cota         }
9502e11264aSEmilio G. Cota     }
9512e11264aSEmilio G. Cota }
9522e11264aSEmilio G. Cota 
9532e11264aSEmilio G. Cota void qht_statistics_destroy(struct qht_stats *stats)
9542e11264aSEmilio G. Cota {
9552e11264aSEmilio G. Cota     qdist_destroy(&stats->occupancy);
9562e11264aSEmilio G. Cota     qdist_destroy(&stats->chain);
9572e11264aSEmilio G. Cota }
958