1 /* 2 * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads. 3 * 4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> 5 * 6 * License: GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 * Assumptions: 10 * - NULL cannot be inserted/removed as a pointer value. 11 * - Trying to insert an already-existing hash-pointer pair is OK. However, 12 * it is not OK to insert into the same hash table different hash-pointer 13 * pairs that have the same pointer value, but not the hashes. 14 * - Lookups are performed under an RCU read-critical section; removals 15 * must wait for a grace period to elapse before freeing removed objects. 16 * 17 * Features: 18 * - Reads (i.e. lookups and iterators) can be concurrent with other reads. 19 * Lookups that are concurrent with writes to the same bucket will retry 20 * via a seqlock; iterators acquire all bucket locks and therefore can be 21 * concurrent with lookups and are serialized wrt writers. 22 * - Writes (i.e. insertions/removals) can be concurrent with writes to 23 * different buckets; writes to the same bucket are serialized through a lock. 24 * - Optional auto-resizing: the hash table resizes up if the load surpasses 25 * a certain threshold. Resizing is done concurrently with readers; writes 26 * are serialized with the resize operation. 27 * 28 * The key structure is the bucket, which is cacheline-sized. Buckets 29 * contain a few hash values and pointers; the u32 hash values are stored in 30 * full so that resizing is fast. Having this structure instead of directly 31 * chaining items has two advantages: 32 * - Failed lookups fail fast, and touch a minimum number of cache lines. 33 * - Resizing the hash table with concurrent lookups is easy. 34 * 35 * There are two types of buckets: 36 * 1. "head" buckets are the ones allocated in the array of buckets in qht_map. 37 * 2. all "non-head" buckets (i.e. all others) are members of a chain that 38 * starts from a head bucket. 39 * Note that the seqlock and spinlock of a head bucket applies to all buckets 40 * chained to it; these two fields are unused in non-head buckets. 41 * 42 * On removals, we move the last valid item in the chain to the position of the 43 * just-removed entry. This makes lookups slightly faster, since the moment an 44 * invalid entry is found, the (failed) lookup is over. 45 * 46 * Resizing is done by taking all bucket spinlocks (so that no other writers can 47 * race with us) and then copying all entries into a new hash map. Then, the 48 * ht->map pointer is set, and the old map is freed once no RCU readers can see 49 * it anymore. 50 * 51 * Writers check for concurrent resizes by comparing ht->map before and after 52 * acquiring their bucket lock. If they don't match, a resize has occured 53 * while the bucket spinlock was being acquired. 54 * 55 * Related Work: 56 * - Idea of cacheline-sized buckets with full hashes taken from: 57 * David, Guerraoui & Trigonakis, "Asynchronized Concurrency: 58 * The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15. 59 * - Why not RCU-based hash tables? They would allow us to get rid of the 60 * seqlock, but resizing would take forever since RCU read critical 61 * sections in QEMU take quite a long time. 62 * More info on relativistic hash tables: 63 * + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash 64 * Tables via Relativistic Programming", USENIX ATC'11. 65 * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. 66 * https://lwn.net/Articles/612021/ 67 */ 68 #include "qemu/osdep.h" 69 #include "qemu/qht.h" 70 #include "qemu/atomic.h" 71 #include "qemu/rcu.h" 72 73 //#define QHT_DEBUG 74 75 /* 76 * We want to avoid false sharing of cache lines. Most systems have 64-byte 77 * cache lines so we go with it for simplicity. 78 * 79 * Note that systems with smaller cache lines will be fine (the struct is 80 * almost 64-bytes); systems with larger cache lines might suffer from 81 * some false sharing. 82 */ 83 #define QHT_BUCKET_ALIGN 64 84 85 /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */ 86 #if HOST_LONG_BITS == 32 87 #define QHT_BUCKET_ENTRIES 6 88 #else /* 64-bit */ 89 #define QHT_BUCKET_ENTRIES 4 90 #endif 91 92 enum qht_iter_type { 93 QHT_ITER_VOID, /* do nothing; use retvoid */ 94 QHT_ITER_RM, /* remove element if retbool returns true */ 95 }; 96 97 struct qht_iter { 98 union { 99 qht_iter_func_t retvoid; 100 qht_iter_bool_func_t retbool; 101 } f; 102 enum qht_iter_type type; 103 }; 104 105 /* 106 * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise 107 * the profiler (QSP) will deadlock. 108 */ 109 static inline void qht_lock(struct qht *ht) 110 { 111 if (ht->mode & QHT_MODE_RAW_MUTEXES) { 112 qemu_mutex_lock__raw(&ht->lock); 113 } else { 114 qemu_mutex_lock(&ht->lock); 115 } 116 } 117 118 static inline int qht_trylock(struct qht *ht) 119 { 120 if (ht->mode & QHT_MODE_RAW_MUTEXES) { 121 return qemu_mutex_trylock__raw(&(ht)->lock); 122 } 123 return qemu_mutex_trylock(&(ht)->lock); 124 } 125 126 /* this inline is not really necessary, but it helps keep code consistent */ 127 static inline void qht_unlock(struct qht *ht) 128 { 129 qemu_mutex_unlock(&ht->lock); 130 } 131 132 /* 133 * Note: reading partially-updated pointers in @pointers could lead to 134 * segfaults. We thus access them with atomic_read/set; this guarantees 135 * that the compiler makes all those accesses atomic. We also need the 136 * volatile-like behavior in atomic_read, since otherwise the compiler 137 * might refetch the pointer. 138 * atomic_read's are of course not necessary when the bucket lock is held. 139 * 140 * If both ht->lock and b->lock are grabbed, ht->lock should always 141 * be grabbed first. 142 */ 143 struct qht_bucket { 144 QemuSpin lock; 145 QemuSeqLock sequence; 146 uint32_t hashes[QHT_BUCKET_ENTRIES]; 147 void *pointers[QHT_BUCKET_ENTRIES]; 148 struct qht_bucket *next; 149 } QEMU_ALIGNED(QHT_BUCKET_ALIGN); 150 151 QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN); 152 153 /** 154 * struct qht_map - structure to track an array of buckets 155 * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind 156 * find the whole struct. 157 * @buckets: array of head buckets. It is constant once the map is created. 158 * @n_buckets: number of head buckets. It is constant once the map is created. 159 * @n_added_buckets: number of added (i.e. "non-head") buckets 160 * @n_added_buckets_threshold: threshold to trigger an upward resize once the 161 * number of added buckets surpasses it. 162 * 163 * Buckets are tracked in what we call a "map", i.e. this structure. 164 */ 165 struct qht_map { 166 struct rcu_head rcu; 167 struct qht_bucket *buckets; 168 size_t n_buckets; 169 size_t n_added_buckets; 170 size_t n_added_buckets_threshold; 171 }; 172 173 /* trigger a resize when n_added_buckets > n_buckets / div */ 174 #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 175 176 static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, 177 bool reset); 178 static void qht_grow_maybe(struct qht *ht); 179 180 #ifdef QHT_DEBUG 181 182 #define qht_debug_assert(X) do { assert(X); } while (0) 183 184 static void qht_bucket_debug__locked(struct qht_bucket *b) 185 { 186 bool seen_empty = false; 187 bool corrupt = false; 188 int i; 189 190 do { 191 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 192 if (b->pointers[i] == NULL) { 193 seen_empty = true; 194 continue; 195 } 196 if (seen_empty) { 197 fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n", 198 __func__, b, i, b->hashes[i], b->pointers[i]); 199 corrupt = true; 200 } 201 } 202 b = b->next; 203 } while (b); 204 qht_debug_assert(!corrupt); 205 } 206 207 static void qht_map_debug__all_locked(struct qht_map *map) 208 { 209 int i; 210 211 for (i = 0; i < map->n_buckets; i++) { 212 qht_bucket_debug__locked(&map->buckets[i]); 213 } 214 } 215 #else 216 217 #define qht_debug_assert(X) do { (void)(X); } while (0) 218 219 static inline void qht_bucket_debug__locked(struct qht_bucket *b) 220 { } 221 222 static inline void qht_map_debug__all_locked(struct qht_map *map) 223 { } 224 #endif /* QHT_DEBUG */ 225 226 static inline size_t qht_elems_to_buckets(size_t n_elems) 227 { 228 return pow2ceil(n_elems / QHT_BUCKET_ENTRIES); 229 } 230 231 static inline void qht_head_init(struct qht_bucket *b) 232 { 233 memset(b, 0, sizeof(*b)); 234 qemu_spin_init(&b->lock); 235 seqlock_init(&b->sequence); 236 } 237 238 static inline 239 struct qht_bucket *qht_map_to_bucket(struct qht_map *map, uint32_t hash) 240 { 241 return &map->buckets[hash & (map->n_buckets - 1)]; 242 } 243 244 /* acquire all bucket locks from a map */ 245 static void qht_map_lock_buckets(struct qht_map *map) 246 { 247 size_t i; 248 249 for (i = 0; i < map->n_buckets; i++) { 250 struct qht_bucket *b = &map->buckets[i]; 251 252 qemu_spin_lock(&b->lock); 253 } 254 } 255 256 static void qht_map_unlock_buckets(struct qht_map *map) 257 { 258 size_t i; 259 260 for (i = 0; i < map->n_buckets; i++) { 261 struct qht_bucket *b = &map->buckets[i]; 262 263 qemu_spin_unlock(&b->lock); 264 } 265 } 266 267 /* 268 * Call with at least a bucket lock held. 269 * @map should be the value read before acquiring the lock (or locks). 270 */ 271 static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map) 272 { 273 return map != ht->map; 274 } 275 276 /* 277 * Grab all bucket locks, and set @pmap after making sure the map isn't stale. 278 * 279 * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference. 280 * 281 * Note: callers cannot have ht->lock held. 282 */ 283 static inline 284 void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) 285 { 286 struct qht_map *map; 287 288 map = atomic_rcu_read(&ht->map); 289 qht_map_lock_buckets(map); 290 if (likely(!qht_map_is_stale__locked(ht, map))) { 291 *pmap = map; 292 return; 293 } 294 qht_map_unlock_buckets(map); 295 296 /* we raced with a resize; acquire ht->lock to see the updated ht->map */ 297 qht_lock(ht); 298 map = ht->map; 299 qht_map_lock_buckets(map); 300 qht_unlock(ht); 301 *pmap = map; 302 return; 303 } 304 305 /* 306 * Get a head bucket and lock it, making sure its parent map is not stale. 307 * @pmap is filled with a pointer to the bucket's parent map. 308 * 309 * Unlock with qemu_spin_unlock(&b->lock). 310 * 311 * Note: callers cannot have ht->lock held. 312 */ 313 static inline 314 struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, 315 struct qht_map **pmap) 316 { 317 struct qht_bucket *b; 318 struct qht_map *map; 319 320 map = atomic_rcu_read(&ht->map); 321 b = qht_map_to_bucket(map, hash); 322 323 qemu_spin_lock(&b->lock); 324 if (likely(!qht_map_is_stale__locked(ht, map))) { 325 *pmap = map; 326 return b; 327 } 328 qemu_spin_unlock(&b->lock); 329 330 /* we raced with a resize; acquire ht->lock to see the updated ht->map */ 331 qht_lock(ht); 332 map = ht->map; 333 b = qht_map_to_bucket(map, hash); 334 qemu_spin_lock(&b->lock); 335 qht_unlock(ht); 336 *pmap = map; 337 return b; 338 } 339 340 static inline bool qht_map_needs_resize(struct qht_map *map) 341 { 342 return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold; 343 } 344 345 static inline void qht_chain_destroy(struct qht_bucket *head) 346 { 347 struct qht_bucket *curr = head->next; 348 struct qht_bucket *prev; 349 350 while (curr) { 351 prev = curr; 352 curr = curr->next; 353 qemu_vfree(prev); 354 } 355 } 356 357 /* pass only an orphan map */ 358 static void qht_map_destroy(struct qht_map *map) 359 { 360 size_t i; 361 362 for (i = 0; i < map->n_buckets; i++) { 363 qht_chain_destroy(&map->buckets[i]); 364 } 365 qemu_vfree(map->buckets); 366 g_free(map); 367 } 368 369 static struct qht_map *qht_map_create(size_t n_buckets) 370 { 371 struct qht_map *map; 372 size_t i; 373 374 map = g_malloc(sizeof(*map)); 375 map->n_buckets = n_buckets; 376 377 map->n_added_buckets = 0; 378 map->n_added_buckets_threshold = n_buckets / 379 QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV; 380 381 /* let tiny hash tables to at least add one non-head bucket */ 382 if (unlikely(map->n_added_buckets_threshold == 0)) { 383 map->n_added_buckets_threshold = 1; 384 } 385 386 map->buckets = qemu_memalign(QHT_BUCKET_ALIGN, 387 sizeof(*map->buckets) * n_buckets); 388 for (i = 0; i < n_buckets; i++) { 389 qht_head_init(&map->buckets[i]); 390 } 391 return map; 392 } 393 394 void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems, 395 unsigned int mode) 396 { 397 struct qht_map *map; 398 size_t n_buckets = qht_elems_to_buckets(n_elems); 399 400 g_assert(cmp); 401 ht->cmp = cmp; 402 ht->mode = mode; 403 qemu_mutex_init(&ht->lock); 404 map = qht_map_create(n_buckets); 405 atomic_rcu_set(&ht->map, map); 406 } 407 408 /* call only when there are no readers/writers left */ 409 void qht_destroy(struct qht *ht) 410 { 411 qht_map_destroy(ht->map); 412 memset(ht, 0, sizeof(*ht)); 413 } 414 415 static void qht_bucket_reset__locked(struct qht_bucket *head) 416 { 417 struct qht_bucket *b = head; 418 int i; 419 420 seqlock_write_begin(&head->sequence); 421 do { 422 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 423 if (b->pointers[i] == NULL) { 424 goto done; 425 } 426 atomic_set(&b->hashes[i], 0); 427 atomic_set(&b->pointers[i], NULL); 428 } 429 b = b->next; 430 } while (b); 431 done: 432 seqlock_write_end(&head->sequence); 433 } 434 435 /* call with all bucket locks held */ 436 static void qht_map_reset__all_locked(struct qht_map *map) 437 { 438 size_t i; 439 440 for (i = 0; i < map->n_buckets; i++) { 441 qht_bucket_reset__locked(&map->buckets[i]); 442 } 443 qht_map_debug__all_locked(map); 444 } 445 446 void qht_reset(struct qht *ht) 447 { 448 struct qht_map *map; 449 450 qht_map_lock_buckets__no_stale(ht, &map); 451 qht_map_reset__all_locked(map); 452 qht_map_unlock_buckets(map); 453 } 454 455 static inline void qht_do_resize(struct qht *ht, struct qht_map *new) 456 { 457 qht_do_resize_reset(ht, new, false); 458 } 459 460 static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new) 461 { 462 qht_do_resize_reset(ht, new, true); 463 } 464 465 bool qht_reset_size(struct qht *ht, size_t n_elems) 466 { 467 struct qht_map *new = NULL; 468 struct qht_map *map; 469 size_t n_buckets; 470 471 n_buckets = qht_elems_to_buckets(n_elems); 472 473 qht_lock(ht); 474 map = ht->map; 475 if (n_buckets != map->n_buckets) { 476 new = qht_map_create(n_buckets); 477 } 478 qht_do_resize_and_reset(ht, new); 479 qht_unlock(ht); 480 481 return !!new; 482 } 483 484 static inline 485 void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func, 486 const void *userp, uint32_t hash) 487 { 488 struct qht_bucket *b = head; 489 int i; 490 491 do { 492 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 493 if (atomic_read(&b->hashes[i]) == hash) { 494 /* The pointer is dereferenced before seqlock_read_retry, 495 * so (unlike qht_insert__locked) we need to use 496 * atomic_rcu_read here. 497 */ 498 void *p = atomic_rcu_read(&b->pointers[i]); 499 500 if (likely(p) && likely(func(p, userp))) { 501 return p; 502 } 503 } 504 } 505 b = atomic_rcu_read(&b->next); 506 } while (b); 507 508 return NULL; 509 } 510 511 static __attribute__((noinline)) 512 void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func, 513 const void *userp, uint32_t hash) 514 { 515 unsigned int version; 516 void *ret; 517 518 do { 519 version = seqlock_read_begin(&b->sequence); 520 ret = qht_do_lookup(b, func, userp, hash); 521 } while (seqlock_read_retry(&b->sequence, version)); 522 return ret; 523 } 524 525 void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash, 526 qht_lookup_func_t func) 527 { 528 struct qht_bucket *b; 529 struct qht_map *map; 530 unsigned int version; 531 void *ret; 532 533 map = atomic_rcu_read(&ht->map); 534 b = qht_map_to_bucket(map, hash); 535 536 version = seqlock_read_begin(&b->sequence); 537 ret = qht_do_lookup(b, func, userp, hash); 538 if (likely(!seqlock_read_retry(&b->sequence, version))) { 539 return ret; 540 } 541 /* 542 * Removing the do/while from the fastpath gives a 4% perf. increase when 543 * running a 100%-lookup microbenchmark. 544 */ 545 return qht_lookup__slowpath(b, func, userp, hash); 546 } 547 548 void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash) 549 { 550 return qht_lookup_custom(ht, userp, hash, ht->cmp); 551 } 552 553 /* call with head->lock held */ 554 static void *qht_insert__locked(struct qht *ht, struct qht_map *map, 555 struct qht_bucket *head, void *p, uint32_t hash, 556 bool *needs_resize) 557 { 558 struct qht_bucket *b = head; 559 struct qht_bucket *prev = NULL; 560 struct qht_bucket *new = NULL; 561 int i; 562 563 do { 564 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 565 if (b->pointers[i]) { 566 if (unlikely(b->hashes[i] == hash && 567 ht->cmp(b->pointers[i], p))) { 568 return b->pointers[i]; 569 } 570 } else { 571 goto found; 572 } 573 } 574 prev = b; 575 b = b->next; 576 } while (b); 577 578 b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b)); 579 memset(b, 0, sizeof(*b)); 580 new = b; 581 i = 0; 582 atomic_inc(&map->n_added_buckets); 583 if (unlikely(qht_map_needs_resize(map)) && needs_resize) { 584 *needs_resize = true; 585 } 586 587 found: 588 /* found an empty key: acquire the seqlock and write */ 589 seqlock_write_begin(&head->sequence); 590 if (new) { 591 atomic_rcu_set(&prev->next, b); 592 } 593 /* smp_wmb() implicit in seqlock_write_begin. */ 594 atomic_set(&b->hashes[i], hash); 595 atomic_set(&b->pointers[i], p); 596 seqlock_write_end(&head->sequence); 597 return NULL; 598 } 599 600 static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) 601 { 602 struct qht_map *map; 603 604 /* 605 * If the lock is taken it probably means there's an ongoing resize, 606 * so bail out. 607 */ 608 if (qht_trylock(ht)) { 609 return; 610 } 611 map = ht->map; 612 /* another thread might have just performed the resize we were after */ 613 if (qht_map_needs_resize(map)) { 614 struct qht_map *new = qht_map_create(map->n_buckets * 2); 615 616 qht_do_resize(ht, new); 617 } 618 qht_unlock(ht); 619 } 620 621 bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing) 622 { 623 struct qht_bucket *b; 624 struct qht_map *map; 625 bool needs_resize = false; 626 void *prev; 627 628 /* NULL pointers are not supported */ 629 qht_debug_assert(p); 630 631 b = qht_bucket_lock__no_stale(ht, hash, &map); 632 prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize); 633 qht_bucket_debug__locked(b); 634 qemu_spin_unlock(&b->lock); 635 636 if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) { 637 qht_grow_maybe(ht); 638 } 639 if (likely(prev == NULL)) { 640 return true; 641 } 642 if (existing) { 643 *existing = prev; 644 } 645 return false; 646 } 647 648 static inline bool qht_entry_is_last(struct qht_bucket *b, int pos) 649 { 650 if (pos == QHT_BUCKET_ENTRIES - 1) { 651 if (b->next == NULL) { 652 return true; 653 } 654 return b->next->pointers[0] == NULL; 655 } 656 return b->pointers[pos + 1] == NULL; 657 } 658 659 static void 660 qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) 661 { 662 qht_debug_assert(!(to == from && i == j)); 663 qht_debug_assert(to->pointers[i]); 664 qht_debug_assert(from->pointers[j]); 665 666 atomic_set(&to->hashes[i], from->hashes[j]); 667 atomic_set(&to->pointers[i], from->pointers[j]); 668 669 atomic_set(&from->hashes[j], 0); 670 atomic_set(&from->pointers[j], NULL); 671 } 672 673 /* 674 * Find the last valid entry in @head, and swap it with @orig[pos], which has 675 * just been invalidated. 676 */ 677 static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) 678 { 679 struct qht_bucket *b = orig; 680 struct qht_bucket *prev = NULL; 681 int i; 682 683 if (qht_entry_is_last(orig, pos)) { 684 orig->hashes[pos] = 0; 685 atomic_set(&orig->pointers[pos], NULL); 686 return; 687 } 688 do { 689 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 690 if (b->pointers[i]) { 691 continue; 692 } 693 if (i > 0) { 694 return qht_entry_move(orig, pos, b, i - 1); 695 } 696 qht_debug_assert(prev); 697 return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); 698 } 699 prev = b; 700 b = b->next; 701 } while (b); 702 /* no free entries other than orig[pos], so swap it with the last one */ 703 qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); 704 } 705 706 /* call with b->lock held */ 707 static inline 708 bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash) 709 { 710 struct qht_bucket *b = head; 711 int i; 712 713 do { 714 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 715 void *q = b->pointers[i]; 716 717 if (unlikely(q == NULL)) { 718 return false; 719 } 720 if (q == p) { 721 qht_debug_assert(b->hashes[i] == hash); 722 seqlock_write_begin(&head->sequence); 723 qht_bucket_remove_entry(b, i); 724 seqlock_write_end(&head->sequence); 725 return true; 726 } 727 } 728 b = b->next; 729 } while (b); 730 return false; 731 } 732 733 bool qht_remove(struct qht *ht, const void *p, uint32_t hash) 734 { 735 struct qht_bucket *b; 736 struct qht_map *map; 737 bool ret; 738 739 /* NULL pointers are not supported */ 740 qht_debug_assert(p); 741 742 b = qht_bucket_lock__no_stale(ht, hash, &map); 743 ret = qht_remove__locked(b, p, hash); 744 qht_bucket_debug__locked(b); 745 qemu_spin_unlock(&b->lock); 746 return ret; 747 } 748 749 static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *head, 750 const struct qht_iter *iter, void *userp) 751 { 752 struct qht_bucket *b = head; 753 int i; 754 755 do { 756 for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { 757 if (b->pointers[i] == NULL) { 758 return; 759 } 760 switch (iter->type) { 761 case QHT_ITER_VOID: 762 iter->f.retvoid(ht, b->pointers[i], b->hashes[i], userp); 763 break; 764 case QHT_ITER_RM: 765 if (iter->f.retbool(ht, b->pointers[i], b->hashes[i], userp)) { 766 /* replace i with the last valid element in the bucket */ 767 seqlock_write_begin(&head->sequence); 768 qht_bucket_remove_entry(b, i); 769 seqlock_write_end(&head->sequence); 770 qht_bucket_debug__locked(b); 771 /* reevaluate i, since it just got replaced */ 772 i--; 773 continue; 774 } 775 break; 776 default: 777 g_assert_not_reached(); 778 } 779 } 780 b = b->next; 781 } while (b); 782 } 783 784 /* call with all of the map's locks held */ 785 static inline void qht_map_iter__all_locked(struct qht *ht, struct qht_map *map, 786 const struct qht_iter *iter, 787 void *userp) 788 { 789 size_t i; 790 791 for (i = 0; i < map->n_buckets; i++) { 792 qht_bucket_iter(ht, &map->buckets[i], iter, userp); 793 } 794 } 795 796 static inline void 797 do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp) 798 { 799 struct qht_map *map; 800 801 map = atomic_rcu_read(&ht->map); 802 qht_map_lock_buckets(map); 803 /* Note: ht here is merely for carrying ht->mode; ht->map won't be read */ 804 qht_map_iter__all_locked(ht, map, iter, userp); 805 qht_map_unlock_buckets(map); 806 } 807 808 void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) 809 { 810 const struct qht_iter iter = { 811 .f.retvoid = func, 812 .type = QHT_ITER_VOID, 813 }; 814 815 do_qht_iter(ht, &iter, userp); 816 } 817 818 void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp) 819 { 820 const struct qht_iter iter = { 821 .f.retbool = func, 822 .type = QHT_ITER_RM, 823 }; 824 825 do_qht_iter(ht, &iter, userp); 826 } 827 828 static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp) 829 { 830 struct qht_map *new = userp; 831 struct qht_bucket *b = qht_map_to_bucket(new, hash); 832 833 /* no need to acquire b->lock because no thread has seen this map yet */ 834 qht_insert__locked(ht, new, b, p, hash, NULL); 835 } 836 837 /* 838 * Atomically perform a resize and/or reset. 839 * Call with ht->lock held. 840 */ 841 static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) 842 { 843 struct qht_map *old; 844 const struct qht_iter iter = { 845 .f.retvoid = qht_map_copy, 846 .type = QHT_ITER_VOID, 847 }; 848 849 old = ht->map; 850 qht_map_lock_buckets(old); 851 852 if (reset) { 853 qht_map_reset__all_locked(old); 854 } 855 856 if (new == NULL) { 857 qht_map_unlock_buckets(old); 858 return; 859 } 860 861 g_assert(new->n_buckets != old->n_buckets); 862 qht_map_iter__all_locked(ht, old, &iter, new); 863 qht_map_debug__all_locked(new); 864 865 atomic_rcu_set(&ht->map, new); 866 qht_map_unlock_buckets(old); 867 call_rcu(old, qht_map_destroy, rcu); 868 } 869 870 bool qht_resize(struct qht *ht, size_t n_elems) 871 { 872 size_t n_buckets = qht_elems_to_buckets(n_elems); 873 size_t ret = false; 874 875 qht_lock(ht); 876 if (n_buckets != ht->map->n_buckets) { 877 struct qht_map *new; 878 879 new = qht_map_create(n_buckets); 880 qht_do_resize(ht, new); 881 ret = true; 882 } 883 qht_unlock(ht); 884 885 return ret; 886 } 887 888 /* pass @stats to qht_statistics_destroy() when done */ 889 void qht_statistics_init(struct qht *ht, struct qht_stats *stats) 890 { 891 struct qht_map *map; 892 int i; 893 894 map = atomic_rcu_read(&ht->map); 895 896 stats->used_head_buckets = 0; 897 stats->entries = 0; 898 qdist_init(&stats->chain); 899 qdist_init(&stats->occupancy); 900 /* bail out if the qht has not yet been initialized */ 901 if (unlikely(map == NULL)) { 902 stats->head_buckets = 0; 903 return; 904 } 905 stats->head_buckets = map->n_buckets; 906 907 for (i = 0; i < map->n_buckets; i++) { 908 struct qht_bucket *head = &map->buckets[i]; 909 struct qht_bucket *b; 910 unsigned int version; 911 size_t buckets; 912 size_t entries; 913 int j; 914 915 do { 916 version = seqlock_read_begin(&head->sequence); 917 buckets = 0; 918 entries = 0; 919 b = head; 920 do { 921 for (j = 0; j < QHT_BUCKET_ENTRIES; j++) { 922 if (atomic_read(&b->pointers[j]) == NULL) { 923 break; 924 } 925 entries++; 926 } 927 buckets++; 928 b = atomic_rcu_read(&b->next); 929 } while (b); 930 } while (seqlock_read_retry(&head->sequence, version)); 931 932 if (entries) { 933 qdist_inc(&stats->chain, buckets); 934 qdist_inc(&stats->occupancy, 935 (double)entries / QHT_BUCKET_ENTRIES / buckets); 936 stats->used_head_buckets++; 937 stats->entries += entries; 938 } else { 939 qdist_inc(&stats->occupancy, 0); 940 } 941 } 942 } 943 944 void qht_statistics_destroy(struct qht_stats *stats) 945 { 946 qdist_destroy(&stats->occupancy); 947 qdist_destroy(&stats->chain); 948 } 949