12e11264aSEmilio G. Cota /* 22e11264aSEmilio G. Cota * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads. 32e11264aSEmilio G. Cota * 42e11264aSEmilio G. Cota * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> 52e11264aSEmilio G. Cota * 62e11264aSEmilio G. Cota * License: GNU GPL, version 2 or later. 72e11264aSEmilio G. Cota * See the COPYING file in the top-level directory. 82e11264aSEmilio G. Cota * 92e11264aSEmilio G. Cota * Assumptions: 102e11264aSEmilio G. Cota * - NULL cannot be inserted/removed as a pointer value. 112e11264aSEmilio G. Cota * - Trying to insert an already-existing hash-pointer pair is OK. However, 122e11264aSEmilio G. Cota * it is not OK to insert into the same hash table different hash-pointer 132e11264aSEmilio G. Cota * pairs that have the same pointer value, but not the hashes. 142e11264aSEmilio G. Cota * - Lookups are performed under an RCU read-critical section; removals 152e11264aSEmilio G. Cota * must wait for a grace period to elapse before freeing removed objects. 162e11264aSEmilio G. Cota * 172e11264aSEmilio G. Cota * Features: 182e11264aSEmilio G. Cota * - Reads (i.e. lookups and iterators) can be concurrent with other reads. 192e11264aSEmilio G. Cota * Lookups that are concurrent with writes to the same bucket will retry 202e11264aSEmilio G. Cota * via a seqlock; iterators acquire all bucket locks and therefore can be 212e11264aSEmilio G. Cota * concurrent with lookups and are serialized wrt writers. 222e11264aSEmilio G. Cota * - Writes (i.e. insertions/removals) can be concurrent with writes to 232e11264aSEmilio G. Cota * different buckets; writes to the same bucket are serialized through a lock. 242e11264aSEmilio G. Cota * - Optional auto-resizing: the hash table resizes up if the load surpasses 252e11264aSEmilio G. Cota * a certain threshold. Resizing is done concurrently with readers; writes 262e11264aSEmilio G. Cota * are serialized with the resize operation. 272e11264aSEmilio G. Cota * 282e11264aSEmilio G. Cota * The key structure is the bucket, which is cacheline-sized. Buckets 292e11264aSEmilio G. Cota * contain a few hash values and pointers; the u32 hash values are stored in 302e11264aSEmilio G. Cota * full so that resizing is fast. Having this structure instead of directly 312e11264aSEmilio G. Cota * chaining items has two advantages: 322e11264aSEmilio G. Cota * - Failed lookups fail fast, and touch a minimum number of cache lines. 332e11264aSEmilio G. Cota * - Resizing the hash table with concurrent lookups is easy. 342e11264aSEmilio G. Cota * 352e11264aSEmilio G. Cota * There are two types of buckets: 362e11264aSEmilio G. Cota * 1. "head" buckets are the ones allocated in the array of buckets in qht_map. 372e11264aSEmilio G. Cota * 2. all "non-head" buckets (i.e. all others) are members of a chain that 382e11264aSEmilio G. Cota * starts from a head bucket. 392e11264aSEmilio G. Cota * Note that the seqlock and spinlock of a head bucket applies to all buckets 402e11264aSEmilio G. Cota * chained to it; these two fields are unused in non-head buckets. 412e11264aSEmilio G. Cota * 422e11264aSEmilio G. Cota * On removals, we move the last valid item in the chain to the position of the 432e11264aSEmilio G. Cota * just-removed entry. This makes lookups slightly faster, since the moment an 442e11264aSEmilio G. Cota * invalid entry is found, the (failed) lookup is over. 452e11264aSEmilio G. Cota * 462e11264aSEmilio G. Cota * Resizing is done by taking all bucket spinlocks (so that no other writers can 472e11264aSEmilio G. Cota * race with us) and then copying all entries into a new hash map. Then, the 482e11264aSEmilio G. Cota * ht->map pointer is set, and the old map is freed once no RCU readers can see 492e11264aSEmilio G. Cota * it anymore. 502e11264aSEmilio G. Cota * 512e11264aSEmilio G. Cota * Writers check for concurrent resizes by comparing ht->map before and after 522e11264aSEmilio G. Cota * acquiring their bucket lock. If they don't match, a resize has occured 532e11264aSEmilio G. Cota * while the bucket spinlock was being acquired. 542e11264aSEmilio G. Cota * 552e11264aSEmilio G. Cota * Related Work: 562e11264aSEmilio G. Cota * - Idea of cacheline-sized buckets with full hashes taken from: 572e11264aSEmilio G. Cota * David, Guerraoui & Trigonakis, "Asynchronized Concurrency: 582e11264aSEmilio G. Cota * The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15. 592e11264aSEmilio G. Cota * - Why not RCU-based hash tables? They would allow us to get rid of the 602e11264aSEmilio G. Cota * seqlock, but resizing would take forever since RCU read critical 612e11264aSEmilio G. Cota * sections in QEMU take quite a long time. 622e11264aSEmilio G. Cota * More info on relativistic hash tables: 632e11264aSEmilio G. Cota * + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash 642e11264aSEmilio G. Cota * Tables via Relativistic Programming", USENIX ATC'11. 652e11264aSEmilio G. Cota * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. 662e11264aSEmilio G. Cota * https://lwn.net/Articles/612021/ 672e11264aSEmilio G. Cota */ 68e9abfcb5SPaolo Bonzini #include "qemu/osdep.h" 692e11264aSEmilio G. Cota #include "qemu/qht.h" 702e11264aSEmilio G. Cota #include "qemu/atomic.h" 712e11264aSEmilio G. Cota #include "qemu/rcu.h" 722e11264aSEmilio G. Cota 732e11264aSEmilio G. Cota //#define QHT_DEBUG 742e11264aSEmilio G. Cota 752e11264aSEmilio G. Cota /* 762e11264aSEmilio G. Cota * We want to avoid false sharing of cache lines. Most systems have 64-byte 772e11264aSEmilio G. Cota * cache lines so we go with it for simplicity. 782e11264aSEmilio G. Cota * 792e11264aSEmilio G. Cota * Note that systems with smaller cache lines will be fine (the struct is 802e11264aSEmilio G. Cota * almost 64-bytes); systems with larger cache lines might suffer from 812e11264aSEmilio G. Cota * some false sharing. 822e11264aSEmilio G. Cota */ 832e11264aSEmilio G. Cota #define QHT_BUCKET_ALIGN 64 842e11264aSEmilio G. Cota 852e11264aSEmilio G. Cota /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */ 862e11264aSEmilio G. Cota #if HOST_LONG_BITS == 32 872e11264aSEmilio G. Cota #define QHT_BUCKET_ENTRIES 6 882e11264aSEmilio G. Cota #else /* 64-bit */ 892e11264aSEmilio G. Cota #define QHT_BUCKET_ENTRIES 4 902e11264aSEmilio G. Cota #endif 912e11264aSEmilio G. Cota 9269d55e9cSEmilio G. Cota enum qht_iter_type { 9369d55e9cSEmilio G. Cota QHT_ITER_VOID, /* do nothing; use retvoid */ 9469d55e9cSEmilio G. Cota QHT_ITER_RM, /* remove element if retbool returns true */ 9569d55e9cSEmilio G. Cota }; 9669d55e9cSEmilio G. Cota 9769d55e9cSEmilio G. Cota struct qht_iter { 9869d55e9cSEmilio G. Cota union { 9969d55e9cSEmilio G. Cota qht_iter_func_t retvoid; 10069d55e9cSEmilio G. Cota qht_iter_bool_func_t retbool; 10169d55e9cSEmilio G. Cota } f; 10269d55e9cSEmilio G. Cota enum qht_iter_type type; 10369d55e9cSEmilio G. Cota }; 10469d55e9cSEmilio G. Cota 1052e11264aSEmilio G. Cota /* 106fe9959a2SEmilio G. Cota * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise 107fe9959a2SEmilio G. Cota * the profiler (QSP) will deadlock. 108fe9959a2SEmilio G. Cota */ 109fe9959a2SEmilio G. Cota static inline void qht_lock(struct qht *ht) 110fe9959a2SEmilio G. Cota { 111fe9959a2SEmilio G. Cota if (ht->mode & QHT_MODE_RAW_MUTEXES) { 112fe9959a2SEmilio G. Cota qemu_mutex_lock__raw(&ht->lock); 113fe9959a2SEmilio G. Cota } else { 114fe9959a2SEmilio G. Cota qemu_mutex_lock(&ht->lock); 115fe9959a2SEmilio G. Cota } 116fe9959a2SEmilio G. Cota } 117fe9959a2SEmilio G. Cota 118fe9959a2SEmilio G. Cota static inline int qht_trylock(struct qht *ht) 119fe9959a2SEmilio G. Cota { 120fe9959a2SEmilio G. Cota if (ht->mode & QHT_MODE_RAW_MUTEXES) { 121fe9959a2SEmilio G. Cota return qemu_mutex_trylock__raw(&(ht)->lock); 122fe9959a2SEmilio G. Cota } 123fe9959a2SEmilio G. Cota return qemu_mutex_trylock(&(ht)->lock); 124fe9959a2SEmilio G. Cota } 125fe9959a2SEmilio G. Cota 126fe9959a2SEmilio G. Cota /* this inline is not really necessary, but it helps keep code consistent */ 127fe9959a2SEmilio G. Cota static inline void qht_unlock(struct qht *ht) 128fe9959a2SEmilio G. Cota { 129fe9959a2SEmilio G. Cota qemu_mutex_unlock(&ht->lock); 130fe9959a2SEmilio G. Cota } 131fe9959a2SEmilio G. Cota 132fe9959a2SEmilio G. Cota /* 1332e11264aSEmilio G. Cota * Note: reading partially-updated pointers in @pointers could lead to 1342e11264aSEmilio G. Cota * segfaults. We thus access them with atomic_read/set; this guarantees 1352e11264aSEmilio G. Cota * that the compiler makes all those accesses atomic. We also need the 1362e11264aSEmilio G. Cota * volatile-like behavior in atomic_read, since otherwise the compiler 1372e11264aSEmilio G. Cota * might refetch the pointer. 1382e11264aSEmilio G. Cota * atomic_read's are of course not necessary when the bucket lock is held. 1392e11264aSEmilio G. Cota * 1402e11264aSEmilio G. Cota * If both ht->lock and b->lock are grabbed, ht->lock should always 1412e11264aSEmilio G. Cota * be grabbed first. 1422e11264aSEmilio G. Cota */ 1432e11264aSEmilio G. Cota struct qht_bucket { 1442e11264aSEmilio G. Cota QemuSpin lock; 1452e11264aSEmilio G. Cota QemuSeqLock sequence; 1462e11264aSEmilio G. Cota uint32_t hashes[QHT_BUCKET_ENTRIES]; 1472e11264aSEmilio G. Cota void *pointers[QHT_BUCKET_ENTRIES]; 1482e11264aSEmilio G. Cota struct qht_bucket *next; 1492e11264aSEmilio G. Cota } QEMU_ALIGNED(QHT_BUCKET_ALIGN); 1502e11264aSEmilio G. Cota 1512e11264aSEmilio G. Cota QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN); 1522e11264aSEmilio G. Cota 1532e11264aSEmilio G. Cota /** 1542e11264aSEmilio G. Cota * struct qht_map - structure to track an array of buckets 1552e11264aSEmilio G. Cota * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind 1562e11264aSEmilio G. Cota * find the whole struct. 1572e11264aSEmilio G. Cota * @buckets: array of head buckets. It is constant once the map is created. 1582e11264aSEmilio G. Cota * @n_buckets: number of head buckets. It is constant once the map is created. 1592e11264aSEmilio G. Cota * @n_added_buckets: number of added (i.e. "non-head") buckets 1602e11264aSEmilio G. Cota * @n_added_buckets_threshold: threshold to trigger an upward resize once the 1612e11264aSEmilio G. Cota * number of added buckets surpasses it. 1622e11264aSEmilio G. Cota * 1632e11264aSEmilio G. Cota * Buckets are tracked in what we call a "map", i.e. this structure. 1642e11264aSEmilio G. Cota */ 1652e11264aSEmilio G. Cota struct qht_map { 1662e11264aSEmilio G. Cota struct rcu_head rcu; 1672e11264aSEmilio G. Cota struct qht_bucket *buckets; 1682e11264aSEmilio G. Cota size_t n_buckets; 1692e11264aSEmilio G. Cota size_t n_added_buckets; 1702e11264aSEmilio G. Cota size_t n_added_buckets_threshold; 1712e11264aSEmilio G. Cota }; 1722e11264aSEmilio G. Cota 1732e11264aSEmilio G. Cota /* trigger a resize when n_added_buckets > n_buckets / div */ 1742e11264aSEmilio G. Cota #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 1752e11264aSEmilio G. Cota 17676b553b3SEmilio G. Cota static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, 17776b553b3SEmilio G. Cota bool reset); 1782e11264aSEmilio G. Cota static void qht_grow_maybe(struct qht *ht); 1792e11264aSEmilio G. Cota 1802e11264aSEmilio G. Cota #ifdef QHT_DEBUG 1812e11264aSEmilio G. Cota 1822e11264aSEmilio G. Cota #define qht_debug_assert(X) do { assert(X); } while (0) 1832e11264aSEmilio G. Cota 1842e11264aSEmilio G. Cota static void qht_bucket_debug__locked(struct qht_bucket *b) 1852e11264aSEmilio G. Cota { 1862e11264aSEmilio G. Cota bool seen_empty = false; 1872e11264aSEmilio G. Cota bool corrupt = false; 1882e11264aSEmilio G. Cota int i; 1892e11264aSEmilio G. Cota 1902e11264aSEmilio G. Cota do { 1912e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 1922e11264aSEmilio G. Cota if (b->pointers[i] == NULL) { 1932e11264aSEmilio G. Cota seen_empty = true; 1942e11264aSEmilio G. Cota continue; 1952e11264aSEmilio G. Cota } 1962e11264aSEmilio G. Cota if (seen_empty) { 1972e11264aSEmilio G. Cota fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n", 1982e11264aSEmilio G. Cota __func__, b, i, b->hashes[i], b->pointers[i]); 1992e11264aSEmilio G. Cota corrupt = true; 2002e11264aSEmilio G. Cota } 2012e11264aSEmilio G. Cota } 2022e11264aSEmilio G. Cota b = b->next; 2032e11264aSEmilio G. Cota } while (b); 2042e11264aSEmilio G. Cota qht_debug_assert(!corrupt); 2052e11264aSEmilio G. Cota } 2062e11264aSEmilio G. Cota 2072e11264aSEmilio G. Cota static void qht_map_debug__all_locked(struct qht_map *map) 2082e11264aSEmilio G. Cota { 2092e11264aSEmilio G. Cota int i; 2102e11264aSEmilio G. Cota 2112e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 2122e11264aSEmilio G. Cota qht_bucket_debug__locked(&map->buckets[i]); 2132e11264aSEmilio G. Cota } 2142e11264aSEmilio G. Cota } 2152e11264aSEmilio G. Cota #else 2162e11264aSEmilio G. Cota 2172e11264aSEmilio G. Cota #define qht_debug_assert(X) do { (void)(X); } while (0) 2182e11264aSEmilio G. Cota 2192e11264aSEmilio G. Cota static inline void qht_bucket_debug__locked(struct qht_bucket *b) 2202e11264aSEmilio G. Cota { } 2212e11264aSEmilio G. Cota 2222e11264aSEmilio G. Cota static inline void qht_map_debug__all_locked(struct qht_map *map) 2232e11264aSEmilio G. Cota { } 2242e11264aSEmilio G. Cota #endif /* QHT_DEBUG */ 2252e11264aSEmilio G. Cota 2262e11264aSEmilio G. Cota static inline size_t qht_elems_to_buckets(size_t n_elems) 2272e11264aSEmilio G. Cota { 2282e11264aSEmilio G. Cota return pow2ceil(n_elems / QHT_BUCKET_ENTRIES); 2292e11264aSEmilio G. Cota } 2302e11264aSEmilio G. Cota 2312e11264aSEmilio G. Cota static inline void qht_head_init(struct qht_bucket *b) 2322e11264aSEmilio G. Cota { 2332e11264aSEmilio G. Cota memset(b, 0, sizeof(*b)); 2342e11264aSEmilio G. Cota qemu_spin_init(&b->lock); 2352e11264aSEmilio G. Cota seqlock_init(&b->sequence); 2362e11264aSEmilio G. Cota } 2372e11264aSEmilio G. Cota 2382e11264aSEmilio G. Cota static inline 239*e6c58299SEmilio G. Cota struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash) 2402e11264aSEmilio G. Cota { 2412e11264aSEmilio G. Cota return &map->buckets[hash & (map->n_buckets - 1)]; 2422e11264aSEmilio G. Cota } 2432e11264aSEmilio G. Cota 2442e11264aSEmilio G. Cota /* acquire all bucket locks from a map */ 2452e11264aSEmilio G. Cota static void qht_map_lock_buckets(struct qht_map *map) 2462e11264aSEmilio G. Cota { 2472e11264aSEmilio G. Cota size_t i; 2482e11264aSEmilio G. Cota 2492e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 2502e11264aSEmilio G. Cota struct qht_bucket *b = &map->buckets[i]; 2512e11264aSEmilio G. Cota 2522e11264aSEmilio G. Cota qemu_spin_lock(&b->lock); 2532e11264aSEmilio G. Cota } 2542e11264aSEmilio G. Cota } 2552e11264aSEmilio G. Cota 2562e11264aSEmilio G. Cota static void qht_map_unlock_buckets(struct qht_map *map) 2572e11264aSEmilio G. Cota { 2582e11264aSEmilio G. Cota size_t i; 2592e11264aSEmilio G. Cota 2602e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 2612e11264aSEmilio G. Cota struct qht_bucket *b = &map->buckets[i]; 2622e11264aSEmilio G. Cota 2632e11264aSEmilio G. Cota qemu_spin_unlock(&b->lock); 2642e11264aSEmilio G. Cota } 2652e11264aSEmilio G. Cota } 2662e11264aSEmilio G. Cota 2672e11264aSEmilio G. Cota /* 2682e11264aSEmilio G. Cota * Call with at least a bucket lock held. 2692e11264aSEmilio G. Cota * @map should be the value read before acquiring the lock (or locks). 2702e11264aSEmilio G. Cota */ 2712e11264aSEmilio G. Cota static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map) 2722e11264aSEmilio G. Cota { 2732e11264aSEmilio G. Cota return map != ht->map; 2742e11264aSEmilio G. Cota } 2752e11264aSEmilio G. Cota 2762e11264aSEmilio G. Cota /* 2772e11264aSEmilio G. Cota * Grab all bucket locks, and set @pmap after making sure the map isn't stale. 2782e11264aSEmilio G. Cota * 2792e11264aSEmilio G. Cota * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference. 2802e11264aSEmilio G. Cota * 2812e11264aSEmilio G. Cota * Note: callers cannot have ht->lock held. 2822e11264aSEmilio G. Cota */ 2832e11264aSEmilio G. Cota static inline 2842e11264aSEmilio G. Cota void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) 2852e11264aSEmilio G. Cota { 2862e11264aSEmilio G. Cota struct qht_map *map; 2872e11264aSEmilio G. Cota 2882e11264aSEmilio G. Cota map = atomic_rcu_read(&ht->map); 2892e11264aSEmilio G. Cota qht_map_lock_buckets(map); 2902e11264aSEmilio G. Cota if (likely(!qht_map_is_stale__locked(ht, map))) { 2912e11264aSEmilio G. Cota *pmap = map; 2922e11264aSEmilio G. Cota return; 2932e11264aSEmilio G. Cota } 2942e11264aSEmilio G. Cota qht_map_unlock_buckets(map); 2952e11264aSEmilio G. Cota 2962e11264aSEmilio G. Cota /* we raced with a resize; acquire ht->lock to see the updated ht->map */ 297fe9959a2SEmilio G. Cota qht_lock(ht); 2982e11264aSEmilio G. Cota map = ht->map; 2992e11264aSEmilio G. Cota qht_map_lock_buckets(map); 300fe9959a2SEmilio G. Cota qht_unlock(ht); 3012e11264aSEmilio G. Cota *pmap = map; 3022e11264aSEmilio G. Cota return; 3032e11264aSEmilio G. Cota } 3042e11264aSEmilio G. Cota 3052e11264aSEmilio G. Cota /* 3062e11264aSEmilio G. Cota * Get a head bucket and lock it, making sure its parent map is not stale. 3072e11264aSEmilio G. Cota * @pmap is filled with a pointer to the bucket's parent map. 3082e11264aSEmilio G. Cota * 3092e11264aSEmilio G. Cota * Unlock with qemu_spin_unlock(&b->lock). 3102e11264aSEmilio G. Cota * 3112e11264aSEmilio G. Cota * Note: callers cannot have ht->lock held. 3122e11264aSEmilio G. Cota */ 3132e11264aSEmilio G. Cota static inline 3142e11264aSEmilio G. Cota struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, 3152e11264aSEmilio G. Cota struct qht_map **pmap) 3162e11264aSEmilio G. Cota { 3172e11264aSEmilio G. Cota struct qht_bucket *b; 3182e11264aSEmilio G. Cota struct qht_map *map; 3192e11264aSEmilio G. Cota 3202e11264aSEmilio G. Cota map = atomic_rcu_read(&ht->map); 3212e11264aSEmilio G. Cota b = qht_map_to_bucket(map, hash); 3222e11264aSEmilio G. Cota 3232e11264aSEmilio G. Cota qemu_spin_lock(&b->lock); 3242e11264aSEmilio G. Cota if (likely(!qht_map_is_stale__locked(ht, map))) { 3252e11264aSEmilio G. Cota *pmap = map; 3262e11264aSEmilio G. Cota return b; 3272e11264aSEmilio G. Cota } 3282e11264aSEmilio G. Cota qemu_spin_unlock(&b->lock); 3292e11264aSEmilio G. Cota 3302e11264aSEmilio G. Cota /* we raced with a resize; acquire ht->lock to see the updated ht->map */ 331fe9959a2SEmilio G. Cota qht_lock(ht); 3322e11264aSEmilio G. Cota map = ht->map; 3332e11264aSEmilio G. Cota b = qht_map_to_bucket(map, hash); 3342e11264aSEmilio G. Cota qemu_spin_lock(&b->lock); 335fe9959a2SEmilio G. Cota qht_unlock(ht); 3362e11264aSEmilio G. Cota *pmap = map; 3372e11264aSEmilio G. Cota return b; 3382e11264aSEmilio G. Cota } 3392e11264aSEmilio G. Cota 3402e11264aSEmilio G. Cota static inline bool qht_map_needs_resize(struct qht_map *map) 3412e11264aSEmilio G. Cota { 3422e11264aSEmilio G. Cota return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold; 3432e11264aSEmilio G. Cota } 3442e11264aSEmilio G. Cota 3452e11264aSEmilio G. Cota static inline void qht_chain_destroy(struct qht_bucket *head) 3462e11264aSEmilio G. Cota { 3472e11264aSEmilio G. Cota struct qht_bucket *curr = head->next; 3482e11264aSEmilio G. Cota struct qht_bucket *prev; 3492e11264aSEmilio G. Cota 3502e11264aSEmilio G. Cota while (curr) { 3512e11264aSEmilio G. Cota prev = curr; 3522e11264aSEmilio G. Cota curr = curr->next; 3532e11264aSEmilio G. Cota qemu_vfree(prev); 3542e11264aSEmilio G. Cota } 3552e11264aSEmilio G. Cota } 3562e11264aSEmilio G. Cota 3572e11264aSEmilio G. Cota /* pass only an orphan map */ 3582e11264aSEmilio G. Cota static void qht_map_destroy(struct qht_map *map) 3592e11264aSEmilio G. Cota { 3602e11264aSEmilio G. Cota size_t i; 3612e11264aSEmilio G. Cota 3622e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 3632e11264aSEmilio G. Cota qht_chain_destroy(&map->buckets[i]); 3642e11264aSEmilio G. Cota } 3652e11264aSEmilio G. Cota qemu_vfree(map->buckets); 3662e11264aSEmilio G. Cota g_free(map); 3672e11264aSEmilio G. Cota } 3682e11264aSEmilio G. Cota 3692e11264aSEmilio G. Cota static struct qht_map *qht_map_create(size_t n_buckets) 3702e11264aSEmilio G. Cota { 3712e11264aSEmilio G. Cota struct qht_map *map; 3722e11264aSEmilio G. Cota size_t i; 3732e11264aSEmilio G. Cota 3742e11264aSEmilio G. Cota map = g_malloc(sizeof(*map)); 3752e11264aSEmilio G. Cota map->n_buckets = n_buckets; 3762e11264aSEmilio G. Cota 3772e11264aSEmilio G. Cota map->n_added_buckets = 0; 3782e11264aSEmilio G. Cota map->n_added_buckets_threshold = n_buckets / 3792e11264aSEmilio G. Cota QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV; 3802e11264aSEmilio G. Cota 3812e11264aSEmilio G. Cota /* let tiny hash tables to at least add one non-head bucket */ 3822e11264aSEmilio G. Cota if (unlikely(map->n_added_buckets_threshold == 0)) { 3832e11264aSEmilio G. Cota map->n_added_buckets_threshold = 1; 3842e11264aSEmilio G. Cota } 3852e11264aSEmilio G. Cota 3862e11264aSEmilio G. Cota map->buckets = qemu_memalign(QHT_BUCKET_ALIGN, 3872e11264aSEmilio G. Cota sizeof(*map->buckets) * n_buckets); 3882e11264aSEmilio G. Cota for (i = 0; i < n_buckets; i++) { 3892e11264aSEmilio G. Cota qht_head_init(&map->buckets[i]); 3902e11264aSEmilio G. Cota } 3912e11264aSEmilio G. Cota return map; 3922e11264aSEmilio G. Cota } 3932e11264aSEmilio G. Cota 39461b8cef1SEmilio G. Cota void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems, 39561b8cef1SEmilio G. Cota unsigned int mode) 3962e11264aSEmilio G. Cota { 3972e11264aSEmilio G. Cota struct qht_map *map; 3982e11264aSEmilio G. Cota size_t n_buckets = qht_elems_to_buckets(n_elems); 3992e11264aSEmilio G. Cota 40061b8cef1SEmilio G. Cota g_assert(cmp); 40161b8cef1SEmilio G. Cota ht->cmp = cmp; 4022e11264aSEmilio G. Cota ht->mode = mode; 4032e11264aSEmilio G. Cota qemu_mutex_init(&ht->lock); 4042e11264aSEmilio G. Cota map = qht_map_create(n_buckets); 4052e11264aSEmilio G. Cota atomic_rcu_set(&ht->map, map); 4062e11264aSEmilio G. Cota } 4072e11264aSEmilio G. Cota 4082e11264aSEmilio G. Cota /* call only when there are no readers/writers left */ 4092e11264aSEmilio G. Cota void qht_destroy(struct qht *ht) 4102e11264aSEmilio G. Cota { 4112e11264aSEmilio G. Cota qht_map_destroy(ht->map); 4122e11264aSEmilio G. Cota memset(ht, 0, sizeof(*ht)); 4132e11264aSEmilio G. Cota } 4142e11264aSEmilio G. Cota 4152e11264aSEmilio G. Cota static void qht_bucket_reset__locked(struct qht_bucket *head) 4162e11264aSEmilio G. Cota { 4172e11264aSEmilio G. Cota struct qht_bucket *b = head; 4182e11264aSEmilio G. Cota int i; 4192e11264aSEmilio G. Cota 4202e11264aSEmilio G. Cota seqlock_write_begin(&head->sequence); 4212e11264aSEmilio G. Cota do { 4222e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 4232e11264aSEmilio G. Cota if (b->pointers[i] == NULL) { 4242e11264aSEmilio G. Cota goto done; 4252e11264aSEmilio G. Cota } 426a8906439SAlex Bennée atomic_set(&b->hashes[i], 0); 4272e11264aSEmilio G. Cota atomic_set(&b->pointers[i], NULL); 4282e11264aSEmilio G. Cota } 4292e11264aSEmilio G. Cota b = b->next; 4302e11264aSEmilio G. Cota } while (b); 4312e11264aSEmilio G. Cota done: 4322e11264aSEmilio G. Cota seqlock_write_end(&head->sequence); 4332e11264aSEmilio G. Cota } 4342e11264aSEmilio G. Cota 4352e11264aSEmilio G. Cota /* call with all bucket locks held */ 4362e11264aSEmilio G. Cota static void qht_map_reset__all_locked(struct qht_map *map) 4372e11264aSEmilio G. Cota { 4382e11264aSEmilio G. Cota size_t i; 4392e11264aSEmilio G. Cota 4402e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 4412e11264aSEmilio G. Cota qht_bucket_reset__locked(&map->buckets[i]); 4422e11264aSEmilio G. Cota } 4432e11264aSEmilio G. Cota qht_map_debug__all_locked(map); 4442e11264aSEmilio G. Cota } 4452e11264aSEmilio G. Cota 4462e11264aSEmilio G. Cota void qht_reset(struct qht *ht) 4472e11264aSEmilio G. Cota { 4482e11264aSEmilio G. Cota struct qht_map *map; 4492e11264aSEmilio G. Cota 4502e11264aSEmilio G. Cota qht_map_lock_buckets__no_stale(ht, &map); 4512e11264aSEmilio G. Cota qht_map_reset__all_locked(map); 4522e11264aSEmilio G. Cota qht_map_unlock_buckets(map); 4532e11264aSEmilio G. Cota } 4542e11264aSEmilio G. Cota 45576b553b3SEmilio G. Cota static inline void qht_do_resize(struct qht *ht, struct qht_map *new) 45676b553b3SEmilio G. Cota { 45776b553b3SEmilio G. Cota qht_do_resize_reset(ht, new, false); 45876b553b3SEmilio G. Cota } 45976b553b3SEmilio G. Cota 46076b553b3SEmilio G. Cota static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new) 46176b553b3SEmilio G. Cota { 46276b553b3SEmilio G. Cota qht_do_resize_reset(ht, new, true); 46376b553b3SEmilio G. Cota } 46476b553b3SEmilio G. Cota 4652e11264aSEmilio G. Cota bool qht_reset_size(struct qht *ht, size_t n_elems) 4662e11264aSEmilio G. Cota { 467f555a9d0SEmilio G. Cota struct qht_map *new = NULL; 4682e11264aSEmilio G. Cota struct qht_map *map; 4692e11264aSEmilio G. Cota size_t n_buckets; 4702e11264aSEmilio G. Cota 4712e11264aSEmilio G. Cota n_buckets = qht_elems_to_buckets(n_elems); 4722e11264aSEmilio G. Cota 473fe9959a2SEmilio G. Cota qht_lock(ht); 4742e11264aSEmilio G. Cota map = ht->map; 4752e11264aSEmilio G. Cota if (n_buckets != map->n_buckets) { 4762e11264aSEmilio G. Cota new = qht_map_create(n_buckets); 4772e11264aSEmilio G. Cota } 47876b553b3SEmilio G. Cota qht_do_resize_and_reset(ht, new); 479fe9959a2SEmilio G. Cota qht_unlock(ht); 4802e11264aSEmilio G. Cota 481f555a9d0SEmilio G. Cota return !!new; 4822e11264aSEmilio G. Cota } 4832e11264aSEmilio G. Cota 4842e11264aSEmilio G. Cota static inline 485*e6c58299SEmilio G. Cota void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func, 4862e11264aSEmilio G. Cota const void *userp, uint32_t hash) 4872e11264aSEmilio G. Cota { 488*e6c58299SEmilio G. Cota const struct qht_bucket *b = head; 4892e11264aSEmilio G. Cota int i; 4902e11264aSEmilio G. Cota 4912e11264aSEmilio G. Cota do { 4922e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 493a8906439SAlex Bennée if (atomic_read(&b->hashes[i]) == hash) { 49434506b30SPaolo Bonzini /* The pointer is dereferenced before seqlock_read_retry, 49534506b30SPaolo Bonzini * so (unlike qht_insert__locked) we need to use 49634506b30SPaolo Bonzini * atomic_rcu_read here. 49734506b30SPaolo Bonzini */ 49834506b30SPaolo Bonzini void *p = atomic_rcu_read(&b->pointers[i]); 4992e11264aSEmilio G. Cota 5002e11264aSEmilio G. Cota if (likely(p) && likely(func(p, userp))) { 5012e11264aSEmilio G. Cota return p; 5022e11264aSEmilio G. Cota } 5032e11264aSEmilio G. Cota } 5042e11264aSEmilio G. Cota } 5052e11264aSEmilio G. Cota b = atomic_rcu_read(&b->next); 5062e11264aSEmilio G. Cota } while (b); 5072e11264aSEmilio G. Cota 5082e11264aSEmilio G. Cota return NULL; 5092e11264aSEmilio G. Cota } 5102e11264aSEmilio G. Cota 5112e11264aSEmilio G. Cota static __attribute__((noinline)) 512*e6c58299SEmilio G. Cota void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func, 5132e11264aSEmilio G. Cota const void *userp, uint32_t hash) 5142e11264aSEmilio G. Cota { 5152e11264aSEmilio G. Cota unsigned int version; 5162e11264aSEmilio G. Cota void *ret; 5172e11264aSEmilio G. Cota 5182e11264aSEmilio G. Cota do { 5192e11264aSEmilio G. Cota version = seqlock_read_begin(&b->sequence); 5202e11264aSEmilio G. Cota ret = qht_do_lookup(b, func, userp, hash); 5212e11264aSEmilio G. Cota } while (seqlock_read_retry(&b->sequence, version)); 5222e11264aSEmilio G. Cota return ret; 5232e11264aSEmilio G. Cota } 5242e11264aSEmilio G. Cota 525*e6c58299SEmilio G. Cota void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash, 52661b8cef1SEmilio G. Cota qht_lookup_func_t func) 5272e11264aSEmilio G. Cota { 528*e6c58299SEmilio G. Cota const struct qht_bucket *b; 529*e6c58299SEmilio G. Cota const struct qht_map *map; 5302e11264aSEmilio G. Cota unsigned int version; 5312e11264aSEmilio G. Cota void *ret; 5322e11264aSEmilio G. Cota 5332e11264aSEmilio G. Cota map = atomic_rcu_read(&ht->map); 5342e11264aSEmilio G. Cota b = qht_map_to_bucket(map, hash); 5352e11264aSEmilio G. Cota 5362e11264aSEmilio G. Cota version = seqlock_read_begin(&b->sequence); 5372e11264aSEmilio G. Cota ret = qht_do_lookup(b, func, userp, hash); 5382e11264aSEmilio G. Cota if (likely(!seqlock_read_retry(&b->sequence, version))) { 5392e11264aSEmilio G. Cota return ret; 5402e11264aSEmilio G. Cota } 5412e11264aSEmilio G. Cota /* 5422e11264aSEmilio G. Cota * Removing the do/while from the fastpath gives a 4% perf. increase when 5432e11264aSEmilio G. Cota * running a 100%-lookup microbenchmark. 5442e11264aSEmilio G. Cota */ 5452e11264aSEmilio G. Cota return qht_lookup__slowpath(b, func, userp, hash); 5462e11264aSEmilio G. Cota } 5472e11264aSEmilio G. Cota 548*e6c58299SEmilio G. Cota void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash) 54961b8cef1SEmilio G. Cota { 55061b8cef1SEmilio G. Cota return qht_lookup_custom(ht, userp, hash, ht->cmp); 55161b8cef1SEmilio G. Cota } 55261b8cef1SEmilio G. Cota 5532e11264aSEmilio G. Cota /* call with head->lock held */ 55432359d52SEmilio G. Cota static void *qht_insert__locked(struct qht *ht, struct qht_map *map, 5552e11264aSEmilio G. Cota struct qht_bucket *head, void *p, uint32_t hash, 5562e11264aSEmilio G. Cota bool *needs_resize) 5572e11264aSEmilio G. Cota { 5582e11264aSEmilio G. Cota struct qht_bucket *b = head; 5592e11264aSEmilio G. Cota struct qht_bucket *prev = NULL; 5602e11264aSEmilio G. Cota struct qht_bucket *new = NULL; 5612e11264aSEmilio G. Cota int i; 5622e11264aSEmilio G. Cota 5632e11264aSEmilio G. Cota do { 5642e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 5652e11264aSEmilio G. Cota if (b->pointers[i]) { 56632359d52SEmilio G. Cota if (unlikely(b->hashes[i] == hash && 56732359d52SEmilio G. Cota ht->cmp(b->pointers[i], p))) { 56832359d52SEmilio G. Cota return b->pointers[i]; 5692e11264aSEmilio G. Cota } 5702e11264aSEmilio G. Cota } else { 5712e11264aSEmilio G. Cota goto found; 5722e11264aSEmilio G. Cota } 5732e11264aSEmilio G. Cota } 5742e11264aSEmilio G. Cota prev = b; 5752e11264aSEmilio G. Cota b = b->next; 5762e11264aSEmilio G. Cota } while (b); 5772e11264aSEmilio G. Cota 5782e11264aSEmilio G. Cota b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b)); 5792e11264aSEmilio G. Cota memset(b, 0, sizeof(*b)); 5802e11264aSEmilio G. Cota new = b; 5812e11264aSEmilio G. Cota i = 0; 5822e11264aSEmilio G. Cota atomic_inc(&map->n_added_buckets); 5832e11264aSEmilio G. Cota if (unlikely(qht_map_needs_resize(map)) && needs_resize) { 5842e11264aSEmilio G. Cota *needs_resize = true; 5852e11264aSEmilio G. Cota } 5862e11264aSEmilio G. Cota 5872e11264aSEmilio G. Cota found: 5882e11264aSEmilio G. Cota /* found an empty key: acquire the seqlock and write */ 5892e11264aSEmilio G. Cota seqlock_write_begin(&head->sequence); 5902e11264aSEmilio G. Cota if (new) { 5912e11264aSEmilio G. Cota atomic_rcu_set(&prev->next, b); 5922e11264aSEmilio G. Cota } 59334506b30SPaolo Bonzini /* smp_wmb() implicit in seqlock_write_begin. */ 594a8906439SAlex Bennée atomic_set(&b->hashes[i], hash); 5952e11264aSEmilio G. Cota atomic_set(&b->pointers[i], p); 5962e11264aSEmilio G. Cota seqlock_write_end(&head->sequence); 59732359d52SEmilio G. Cota return NULL; 5982e11264aSEmilio G. Cota } 5992e11264aSEmilio G. Cota 6002e11264aSEmilio G. Cota static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) 6012e11264aSEmilio G. Cota { 6022e11264aSEmilio G. Cota struct qht_map *map; 6032e11264aSEmilio G. Cota 6042e11264aSEmilio G. Cota /* 6052e11264aSEmilio G. Cota * If the lock is taken it probably means there's an ongoing resize, 6062e11264aSEmilio G. Cota * so bail out. 6072e11264aSEmilio G. Cota */ 608fe9959a2SEmilio G. Cota if (qht_trylock(ht)) { 6092e11264aSEmilio G. Cota return; 6102e11264aSEmilio G. Cota } 6112e11264aSEmilio G. Cota map = ht->map; 6122e11264aSEmilio G. Cota /* another thread might have just performed the resize we were after */ 6132e11264aSEmilio G. Cota if (qht_map_needs_resize(map)) { 6142e11264aSEmilio G. Cota struct qht_map *new = qht_map_create(map->n_buckets * 2); 6152e11264aSEmilio G. Cota 6162e11264aSEmilio G. Cota qht_do_resize(ht, new); 6172e11264aSEmilio G. Cota } 618fe9959a2SEmilio G. Cota qht_unlock(ht); 6192e11264aSEmilio G. Cota } 6202e11264aSEmilio G. Cota 62132359d52SEmilio G. Cota bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing) 6222e11264aSEmilio G. Cota { 6232e11264aSEmilio G. Cota struct qht_bucket *b; 6242e11264aSEmilio G. Cota struct qht_map *map; 6252e11264aSEmilio G. Cota bool needs_resize = false; 62632359d52SEmilio G. Cota void *prev; 6272e11264aSEmilio G. Cota 6282e11264aSEmilio G. Cota /* NULL pointers are not supported */ 6292e11264aSEmilio G. Cota qht_debug_assert(p); 6302e11264aSEmilio G. Cota 6312e11264aSEmilio G. Cota b = qht_bucket_lock__no_stale(ht, hash, &map); 63232359d52SEmilio G. Cota prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize); 6332e11264aSEmilio G. Cota qht_bucket_debug__locked(b); 6342e11264aSEmilio G. Cota qemu_spin_unlock(&b->lock); 6352e11264aSEmilio G. Cota 6362e11264aSEmilio G. Cota if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) { 6372e11264aSEmilio G. Cota qht_grow_maybe(ht); 6382e11264aSEmilio G. Cota } 63932359d52SEmilio G. Cota if (likely(prev == NULL)) { 64032359d52SEmilio G. Cota return true; 64132359d52SEmilio G. Cota } 64232359d52SEmilio G. Cota if (existing) { 64332359d52SEmilio G. Cota *existing = prev; 64432359d52SEmilio G. Cota } 64532359d52SEmilio G. Cota return false; 6462e11264aSEmilio G. Cota } 6472e11264aSEmilio G. Cota 6482e11264aSEmilio G. Cota static inline bool qht_entry_is_last(struct qht_bucket *b, int pos) 6492e11264aSEmilio G. Cota { 6502e11264aSEmilio G. Cota if (pos == QHT_BUCKET_ENTRIES - 1) { 6512e11264aSEmilio G. Cota if (b->next == NULL) { 6522e11264aSEmilio G. Cota return true; 6532e11264aSEmilio G. Cota } 6542e11264aSEmilio G. Cota return b->next->pointers[0] == NULL; 6552e11264aSEmilio G. Cota } 6562e11264aSEmilio G. Cota return b->pointers[pos + 1] == NULL; 6572e11264aSEmilio G. Cota } 6582e11264aSEmilio G. Cota 6592e11264aSEmilio G. Cota static void 6602e11264aSEmilio G. Cota qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) 6612e11264aSEmilio G. Cota { 6622e11264aSEmilio G. Cota qht_debug_assert(!(to == from && i == j)); 6632e11264aSEmilio G. Cota qht_debug_assert(to->pointers[i]); 6642e11264aSEmilio G. Cota qht_debug_assert(from->pointers[j]); 6652e11264aSEmilio G. Cota 666a8906439SAlex Bennée atomic_set(&to->hashes[i], from->hashes[j]); 6672e11264aSEmilio G. Cota atomic_set(&to->pointers[i], from->pointers[j]); 6682e11264aSEmilio G. Cota 669a8906439SAlex Bennée atomic_set(&from->hashes[j], 0); 6702e11264aSEmilio G. Cota atomic_set(&from->pointers[j], NULL); 6712e11264aSEmilio G. Cota } 6722e11264aSEmilio G. Cota 6732e11264aSEmilio G. Cota /* 6749650ad3eSEmilio G. Cota * Find the last valid entry in @orig, and swap it with @orig[pos], which has 6752e11264aSEmilio G. Cota * just been invalidated. 6762e11264aSEmilio G. Cota */ 6772e11264aSEmilio G. Cota static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) 6782e11264aSEmilio G. Cota { 6792e11264aSEmilio G. Cota struct qht_bucket *b = orig; 6802e11264aSEmilio G. Cota struct qht_bucket *prev = NULL; 6812e11264aSEmilio G. Cota int i; 6822e11264aSEmilio G. Cota 6832e11264aSEmilio G. Cota if (qht_entry_is_last(orig, pos)) { 6842e11264aSEmilio G. Cota orig->hashes[pos] = 0; 6852e11264aSEmilio G. Cota atomic_set(&orig->pointers[pos], NULL); 6862e11264aSEmilio G. Cota return; 6872e11264aSEmilio G. Cota } 6882e11264aSEmilio G. Cota do { 6892e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 6902e11264aSEmilio G. Cota if (b->pointers[i]) { 6912e11264aSEmilio G. Cota continue; 6922e11264aSEmilio G. Cota } 6932e11264aSEmilio G. Cota if (i > 0) { 6942e11264aSEmilio G. Cota return qht_entry_move(orig, pos, b, i - 1); 6952e11264aSEmilio G. Cota } 6962e11264aSEmilio G. Cota qht_debug_assert(prev); 6972e11264aSEmilio G. Cota return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); 6982e11264aSEmilio G. Cota } 6992e11264aSEmilio G. Cota prev = b; 7002e11264aSEmilio G. Cota b = b->next; 7012e11264aSEmilio G. Cota } while (b); 7022e11264aSEmilio G. Cota /* no free entries other than orig[pos], so swap it with the last one */ 7032e11264aSEmilio G. Cota qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); 7042e11264aSEmilio G. Cota } 7052e11264aSEmilio G. Cota 7062e11264aSEmilio G. Cota /* call with b->lock held */ 7072e11264aSEmilio G. Cota static inline 708e2f07efaSEmilio G. Cota bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash) 7092e11264aSEmilio G. Cota { 7102e11264aSEmilio G. Cota struct qht_bucket *b = head; 7112e11264aSEmilio G. Cota int i; 7122e11264aSEmilio G. Cota 7132e11264aSEmilio G. Cota do { 7142e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 7152e11264aSEmilio G. Cota void *q = b->pointers[i]; 7162e11264aSEmilio G. Cota 7172e11264aSEmilio G. Cota if (unlikely(q == NULL)) { 7182e11264aSEmilio G. Cota return false; 7192e11264aSEmilio G. Cota } 7202e11264aSEmilio G. Cota if (q == p) { 7212e11264aSEmilio G. Cota qht_debug_assert(b->hashes[i] == hash); 7222e11264aSEmilio G. Cota seqlock_write_begin(&head->sequence); 7232e11264aSEmilio G. Cota qht_bucket_remove_entry(b, i); 7242e11264aSEmilio G. Cota seqlock_write_end(&head->sequence); 7252e11264aSEmilio G. Cota return true; 7262e11264aSEmilio G. Cota } 7272e11264aSEmilio G. Cota } 7282e11264aSEmilio G. Cota b = b->next; 7292e11264aSEmilio G. Cota } while (b); 7302e11264aSEmilio G. Cota return false; 7312e11264aSEmilio G. Cota } 7322e11264aSEmilio G. Cota 7332e11264aSEmilio G. Cota bool qht_remove(struct qht *ht, const void *p, uint32_t hash) 7342e11264aSEmilio G. Cota { 7352e11264aSEmilio G. Cota struct qht_bucket *b; 7362e11264aSEmilio G. Cota struct qht_map *map; 7372e11264aSEmilio G. Cota bool ret; 7382e11264aSEmilio G. Cota 7392e11264aSEmilio G. Cota /* NULL pointers are not supported */ 7402e11264aSEmilio G. Cota qht_debug_assert(p); 7412e11264aSEmilio G. Cota 7422e11264aSEmilio G. Cota b = qht_bucket_lock__no_stale(ht, hash, &map); 743e2f07efaSEmilio G. Cota ret = qht_remove__locked(b, p, hash); 7442e11264aSEmilio G. Cota qht_bucket_debug__locked(b); 7452e11264aSEmilio G. Cota qemu_spin_unlock(&b->lock); 7462e11264aSEmilio G. Cota return ret; 7472e11264aSEmilio G. Cota } 7482e11264aSEmilio G. Cota 74978255ba2SEmilio G. Cota static inline void qht_bucket_iter(struct qht_bucket *head, 75069d55e9cSEmilio G. Cota const struct qht_iter *iter, void *userp) 7512e11264aSEmilio G. Cota { 75269d55e9cSEmilio G. Cota struct qht_bucket *b = head; 7532e11264aSEmilio G. Cota int i; 7542e11264aSEmilio G. Cota 7552e11264aSEmilio G. Cota do { 7562e11264aSEmilio G. Cota for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 7572e11264aSEmilio G. Cota if (b->pointers[i] == NULL) { 7582e11264aSEmilio G. Cota return; 7592e11264aSEmilio G. Cota } 76069d55e9cSEmilio G. Cota switch (iter->type) { 76169d55e9cSEmilio G. Cota case QHT_ITER_VOID: 76278255ba2SEmilio G. Cota iter->f.retvoid(b->pointers[i], b->hashes[i], userp); 76369d55e9cSEmilio G. Cota break; 76469d55e9cSEmilio G. Cota case QHT_ITER_RM: 76578255ba2SEmilio G. Cota if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) { 76669d55e9cSEmilio G. Cota /* replace i with the last valid element in the bucket */ 76769d55e9cSEmilio G. Cota seqlock_write_begin(&head->sequence); 76869d55e9cSEmilio G. Cota qht_bucket_remove_entry(b, i); 76969d55e9cSEmilio G. Cota seqlock_write_end(&head->sequence); 77069d55e9cSEmilio G. Cota qht_bucket_debug__locked(b); 77169d55e9cSEmilio G. Cota /* reevaluate i, since it just got replaced */ 77269d55e9cSEmilio G. Cota i--; 77369d55e9cSEmilio G. Cota continue; 77469d55e9cSEmilio G. Cota } 77569d55e9cSEmilio G. Cota break; 77669d55e9cSEmilio G. Cota default: 77769d55e9cSEmilio G. Cota g_assert_not_reached(); 77869d55e9cSEmilio G. Cota } 7792e11264aSEmilio G. Cota } 7802e11264aSEmilio G. Cota b = b->next; 7812e11264aSEmilio G. Cota } while (b); 7822e11264aSEmilio G. Cota } 7832e11264aSEmilio G. Cota 7842e11264aSEmilio G. Cota /* call with all of the map's locks held */ 78578255ba2SEmilio G. Cota static inline void qht_map_iter__all_locked(struct qht_map *map, 78669d55e9cSEmilio G. Cota const struct qht_iter *iter, 78769d55e9cSEmilio G. Cota void *userp) 7882e11264aSEmilio G. Cota { 7892e11264aSEmilio G. Cota size_t i; 7902e11264aSEmilio G. Cota 7912e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 79278255ba2SEmilio G. Cota qht_bucket_iter(&map->buckets[i], iter, userp); 7932e11264aSEmilio G. Cota } 7942e11264aSEmilio G. Cota } 7952e11264aSEmilio G. Cota 79669d55e9cSEmilio G. Cota static inline void 79769d55e9cSEmilio G. Cota do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp) 7982e11264aSEmilio G. Cota { 7992e11264aSEmilio G. Cota struct qht_map *map; 8002e11264aSEmilio G. Cota 8012e11264aSEmilio G. Cota map = atomic_rcu_read(&ht->map); 8022e11264aSEmilio G. Cota qht_map_lock_buckets(map); 80378255ba2SEmilio G. Cota qht_map_iter__all_locked(map, iter, userp); 8042e11264aSEmilio G. Cota qht_map_unlock_buckets(map); 8052e11264aSEmilio G. Cota } 8062e11264aSEmilio G. Cota 80769d55e9cSEmilio G. Cota void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) 80869d55e9cSEmilio G. Cota { 80969d55e9cSEmilio G. Cota const struct qht_iter iter = { 81069d55e9cSEmilio G. Cota .f.retvoid = func, 81169d55e9cSEmilio G. Cota .type = QHT_ITER_VOID, 81269d55e9cSEmilio G. Cota }; 81369d55e9cSEmilio G. Cota 81469d55e9cSEmilio G. Cota do_qht_iter(ht, &iter, userp); 81569d55e9cSEmilio G. Cota } 81669d55e9cSEmilio G. Cota 81769d55e9cSEmilio G. Cota void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp) 81869d55e9cSEmilio G. Cota { 81969d55e9cSEmilio G. Cota const struct qht_iter iter = { 82069d55e9cSEmilio G. Cota .f.retbool = func, 82169d55e9cSEmilio G. Cota .type = QHT_ITER_RM, 82269d55e9cSEmilio G. Cota }; 82369d55e9cSEmilio G. Cota 82469d55e9cSEmilio G. Cota do_qht_iter(ht, &iter, userp); 82569d55e9cSEmilio G. Cota } 82669d55e9cSEmilio G. Cota 82778255ba2SEmilio G. Cota struct qht_map_copy_data { 82878255ba2SEmilio G. Cota struct qht *ht; 82978255ba2SEmilio G. Cota struct qht_map *new; 83078255ba2SEmilio G. Cota }; 83178255ba2SEmilio G. Cota 83278255ba2SEmilio G. Cota static void qht_map_copy(void *p, uint32_t hash, void *userp) 8332e11264aSEmilio G. Cota { 83478255ba2SEmilio G. Cota struct qht_map_copy_data *data = userp; 83578255ba2SEmilio G. Cota struct qht *ht = data->ht; 83678255ba2SEmilio G. Cota struct qht_map *new = data->new; 8372e11264aSEmilio G. Cota struct qht_bucket *b = qht_map_to_bucket(new, hash); 8382e11264aSEmilio G. Cota 8392e11264aSEmilio G. Cota /* no need to acquire b->lock because no thread has seen this map yet */ 8402e11264aSEmilio G. Cota qht_insert__locked(ht, new, b, p, hash, NULL); 8412e11264aSEmilio G. Cota } 8422e11264aSEmilio G. Cota 8432e11264aSEmilio G. Cota /* 84476b553b3SEmilio G. Cota * Atomically perform a resize and/or reset. 84576b553b3SEmilio G. Cota * Call with ht->lock held. 8462e11264aSEmilio G. Cota */ 84776b553b3SEmilio G. Cota static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) 8482e11264aSEmilio G. Cota { 8492e11264aSEmilio G. Cota struct qht_map *old; 85069d55e9cSEmilio G. Cota const struct qht_iter iter = { 85169d55e9cSEmilio G. Cota .f.retvoid = qht_map_copy, 85269d55e9cSEmilio G. Cota .type = QHT_ITER_VOID, 85369d55e9cSEmilio G. Cota }; 85478255ba2SEmilio G. Cota struct qht_map_copy_data data; 8552e11264aSEmilio G. Cota 8562e11264aSEmilio G. Cota old = ht->map; 85776b553b3SEmilio G. Cota qht_map_lock_buckets(old); 8582e11264aSEmilio G. Cota 85976b553b3SEmilio G. Cota if (reset) { 86076b553b3SEmilio G. Cota qht_map_reset__all_locked(old); 86176b553b3SEmilio G. Cota } 86276b553b3SEmilio G. Cota 86376b553b3SEmilio G. Cota if (new == NULL) { 86476b553b3SEmilio G. Cota qht_map_unlock_buckets(old); 86576b553b3SEmilio G. Cota return; 86676b553b3SEmilio G. Cota } 86776b553b3SEmilio G. Cota 868719a3077SMarkus Armbruster g_assert(new->n_buckets != old->n_buckets); 86978255ba2SEmilio G. Cota data.ht = ht; 87078255ba2SEmilio G. Cota data.new = new; 87178255ba2SEmilio G. Cota qht_map_iter__all_locked(old, &iter, &data); 8722e11264aSEmilio G. Cota qht_map_debug__all_locked(new); 8732e11264aSEmilio G. Cota 8742e11264aSEmilio G. Cota atomic_rcu_set(&ht->map, new); 87576b553b3SEmilio G. Cota qht_map_unlock_buckets(old); 8762e11264aSEmilio G. Cota call_rcu(old, qht_map_destroy, rcu); 8772e11264aSEmilio G. Cota } 8782e11264aSEmilio G. Cota 8792e11264aSEmilio G. Cota bool qht_resize(struct qht *ht, size_t n_elems) 8802e11264aSEmilio G. Cota { 8812e11264aSEmilio G. Cota size_t n_buckets = qht_elems_to_buckets(n_elems); 8822e11264aSEmilio G. Cota size_t ret = false; 8832e11264aSEmilio G. Cota 884fe9959a2SEmilio G. Cota qht_lock(ht); 8852e11264aSEmilio G. Cota if (n_buckets != ht->map->n_buckets) { 8862e11264aSEmilio G. Cota struct qht_map *new; 8872e11264aSEmilio G. Cota 8882e11264aSEmilio G. Cota new = qht_map_create(n_buckets); 8892e11264aSEmilio G. Cota qht_do_resize(ht, new); 8902e11264aSEmilio G. Cota ret = true; 8912e11264aSEmilio G. Cota } 892fe9959a2SEmilio G. Cota qht_unlock(ht); 8932e11264aSEmilio G. Cota 8942e11264aSEmilio G. Cota return ret; 8952e11264aSEmilio G. Cota } 8962e11264aSEmilio G. Cota 8972e11264aSEmilio G. Cota /* pass @stats to qht_statistics_destroy() when done */ 8982e11264aSEmilio G. Cota void qht_statistics_init(struct qht *ht, struct qht_stats *stats) 8992e11264aSEmilio G. Cota { 9002e11264aSEmilio G. Cota struct qht_map *map; 9012e11264aSEmilio G. Cota int i; 9022e11264aSEmilio G. Cota 9032e11264aSEmilio G. Cota map = atomic_rcu_read(&ht->map); 9042e11264aSEmilio G. Cota 9052e11264aSEmilio G. Cota stats->used_head_buckets = 0; 9062e11264aSEmilio G. Cota stats->entries = 0; 9072e11264aSEmilio G. Cota qdist_init(&stats->chain); 9082e11264aSEmilio G. Cota qdist_init(&stats->occupancy); 9097266ae91SEmilio G. Cota /* bail out if the qht has not yet been initialized */ 9107266ae91SEmilio G. Cota if (unlikely(map == NULL)) { 9117266ae91SEmilio G. Cota stats->head_buckets = 0; 9127266ae91SEmilio G. Cota return; 9137266ae91SEmilio G. Cota } 9147266ae91SEmilio G. Cota stats->head_buckets = map->n_buckets; 9152e11264aSEmilio G. Cota 9162e11264aSEmilio G. Cota for (i = 0; i < map->n_buckets; i++) { 9172e11264aSEmilio G. Cota struct qht_bucket *head = &map->buckets[i]; 9182e11264aSEmilio G. Cota struct qht_bucket *b; 9192e11264aSEmilio G. Cota unsigned int version; 9202e11264aSEmilio G. Cota size_t buckets; 9212e11264aSEmilio G. Cota size_t entries; 9222e11264aSEmilio G. Cota int j; 9232e11264aSEmilio G. Cota 9242e11264aSEmilio G. Cota do { 9252e11264aSEmilio G. Cota version = seqlock_read_begin(&head->sequence); 9262e11264aSEmilio G. Cota buckets = 0; 9272e11264aSEmilio G. Cota entries = 0; 9282e11264aSEmilio G. Cota b = head; 9292e11264aSEmilio G. Cota do { 9302e11264aSEmilio G. Cota for (j = 0; j < QHT_BUCKET_ENTRIES; j++) { 9312e11264aSEmilio G. Cota if (atomic_read(&b->pointers[j]) == NULL) { 9322e11264aSEmilio G. Cota break; 9332e11264aSEmilio G. Cota } 9342e11264aSEmilio G. Cota entries++; 9352e11264aSEmilio G. Cota } 9362e11264aSEmilio G. Cota buckets++; 9372e11264aSEmilio G. Cota b = atomic_rcu_read(&b->next); 9382e11264aSEmilio G. Cota } while (b); 9392e11264aSEmilio G. Cota } while (seqlock_read_retry(&head->sequence, version)); 9402e11264aSEmilio G. Cota 9412e11264aSEmilio G. Cota if (entries) { 9422e11264aSEmilio G. Cota qdist_inc(&stats->chain, buckets); 9432e11264aSEmilio G. Cota qdist_inc(&stats->occupancy, 9442e11264aSEmilio G. Cota (double)entries / QHT_BUCKET_ENTRIES / buckets); 9452e11264aSEmilio G. Cota stats->used_head_buckets++; 9462e11264aSEmilio G. Cota stats->entries += entries; 9472e11264aSEmilio G. Cota } else { 9482e11264aSEmilio G. Cota qdist_inc(&stats->occupancy, 0); 9492e11264aSEmilio G. Cota } 9502e11264aSEmilio G. Cota } 9512e11264aSEmilio G. Cota } 9522e11264aSEmilio G. Cota 9532e11264aSEmilio G. Cota void qht_statistics_destroy(struct qht_stats *stats) 9542e11264aSEmilio G. Cota { 9552e11264aSEmilio G. Cota qdist_destroy(&stats->occupancy); 9562e11264aSEmilio G. Cota qdist_destroy(&stats->chain); 9572e11264aSEmilio G. Cota } 958