1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resizable, Scalable, Concurrent Hash Table 4 * 5 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> 6 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> 7 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 8 * 9 * Code partially derived from nft_hash 10 * Rewritten with rehash code from br_multicast plus single list 11 * pointer as suggested by Josh Triplett 12 */ 13 14 #include <linux/atomic.h> 15 #include <linux/kernel.h> 16 #include <linux/init.h> 17 #include <linux/log2.h> 18 #include <linux/sched.h> 19 #include <linux/rculist.h> 20 #include <linux/slab.h> 21 #include <linux/vmalloc.h> 22 #include <linux/mm.h> 23 #include <linux/jhash.h> 24 #include <linux/random.h> 25 #include <linux/rhashtable.h> 26 #include <linux/err.h> 27 #include <linux/export.h> 28 29 #define HASH_DEFAULT_SIZE 64UL 30 #define HASH_MIN_SIZE 4U 31 32 union nested_table { 33 union nested_table __rcu *table; 34 struct rhash_lock_head *bucket; 35 }; 36 37 static u32 head_hashfn(struct rhashtable *ht, 38 const struct bucket_table *tbl, 39 const struct rhash_head *he) 40 { 41 return rht_head_hashfn(ht, tbl, he, ht->p); 42 } 43 44 #ifdef CONFIG_PROVE_LOCKING 45 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 46 47 int lockdep_rht_mutex_is_held(struct rhashtable *ht) 48 { 49 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; 50 } 51 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 52 53 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 54 { 55 if (!debug_locks) 56 return 1; 57 if (unlikely(tbl->nest)) 58 return 1; 59 return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); 60 } 61 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 62 #else 63 #define ASSERT_RHT_MUTEX(HT) 64 #endif 65 66 static void nested_table_free(union nested_table *ntbl, unsigned int size) 67 { 68 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 69 const unsigned int len = 1 << shift; 70 unsigned int i; 71 72 ntbl = rcu_dereference_raw(ntbl->table); 73 if (!ntbl) 74 return; 75 76 if (size > len) { 77 size >>= shift; 78 for (i = 0; i < len; i++) 79 nested_table_free(ntbl + i, size); 80 } 81 82 kfree(ntbl); 83 } 84 85 static void nested_bucket_table_free(const struct bucket_table *tbl) 86 { 87 unsigned int size = tbl->size >> tbl->nest; 88 unsigned int len = 1 << tbl->nest; 89 union nested_table *ntbl; 90 unsigned int i; 91 92 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); 93 94 for (i = 0; i < len; i++) 95 nested_table_free(ntbl + i, size); 96 97 kfree(ntbl); 98 } 99 100 static void bucket_table_free(const struct bucket_table *tbl) 101 { 102 if (tbl->nest) 103 nested_bucket_table_free(tbl); 104 105 kvfree(tbl); 106 } 107 108 static void bucket_table_free_rcu(struct rcu_head *head) 109 { 110 bucket_table_free(container_of(head, struct bucket_table, rcu)); 111 } 112 113 static union nested_table *nested_table_alloc(struct rhashtable *ht, 114 union nested_table __rcu **prev, 115 bool leaf) 116 { 117 union nested_table *ntbl; 118 int i; 119 120 ntbl = rcu_dereference(*prev); 121 if (ntbl) 122 return ntbl; 123 124 ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); 125 126 if (ntbl && leaf) { 127 for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) 128 INIT_RHT_NULLS_HEAD(ntbl[i].bucket); 129 } 130 131 if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL) 132 return ntbl; 133 /* Raced with another thread. */ 134 kfree(ntbl); 135 return rcu_dereference(*prev); 136 } 137 138 static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, 139 size_t nbuckets, 140 gfp_t gfp) 141 { 142 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 143 struct bucket_table *tbl; 144 size_t size; 145 146 if (nbuckets < (1 << (shift + 1))) 147 return NULL; 148 149 size = sizeof(*tbl) + sizeof(tbl->buckets[0]); 150 151 tbl = kzalloc(size, gfp); 152 if (!tbl) 153 return NULL; 154 155 if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, 156 false)) { 157 kfree(tbl); 158 return NULL; 159 } 160 161 tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; 162 163 return tbl; 164 } 165 166 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 167 size_t nbuckets, 168 gfp_t gfp) 169 { 170 struct bucket_table *tbl = NULL; 171 size_t size; 172 int i; 173 static struct lock_class_key __key; 174 175 tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp); 176 177 size = nbuckets; 178 179 if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) { 180 tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); 181 nbuckets = 0; 182 } 183 184 if (tbl == NULL) 185 return NULL; 186 187 lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); 188 189 tbl->size = size; 190 191 rcu_head_init(&tbl->rcu); 192 INIT_LIST_HEAD(&tbl->walkers); 193 194 tbl->hash_rnd = get_random_u32(); 195 196 for (i = 0; i < nbuckets; i++) 197 INIT_RHT_NULLS_HEAD(tbl->buckets[i]); 198 199 return tbl; 200 } 201 202 static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, 203 struct bucket_table *tbl) 204 { 205 struct bucket_table *new_tbl; 206 207 do { 208 new_tbl = tbl; 209 tbl = rht_dereference_rcu(tbl->future_tbl, ht); 210 } while (tbl); 211 212 return new_tbl; 213 } 214 215 static int rhashtable_rehash_one(struct rhashtable *ht, 216 struct rhash_lock_head **bkt, 217 unsigned int old_hash) 218 { 219 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 220 struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); 221 int err = -EAGAIN; 222 struct rhash_head *head, *next, *entry; 223 struct rhash_head __rcu **pprev = NULL; 224 unsigned int new_hash; 225 226 if (new_tbl->nest) 227 goto out; 228 229 err = -ENOENT; 230 231 rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), 232 old_tbl, old_hash) { 233 err = 0; 234 next = rht_dereference_bucket(entry->next, old_tbl, old_hash); 235 236 if (rht_is_a_nulls(next)) 237 break; 238 239 pprev = &entry->next; 240 } 241 242 if (err) 243 goto out; 244 245 new_hash = head_hashfn(ht, new_tbl, entry); 246 247 rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); 248 249 head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); 250 251 RCU_INIT_POINTER(entry->next, head); 252 253 rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry); 254 255 if (pprev) 256 rcu_assign_pointer(*pprev, next); 257 else 258 /* Need to preserved the bit lock. */ 259 rht_assign_locked(bkt, next); 260 261 out: 262 return err; 263 } 264 265 static int rhashtable_rehash_chain(struct rhashtable *ht, 266 unsigned int old_hash) 267 { 268 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 269 struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash); 270 int err; 271 272 if (!bkt) 273 return 0; 274 rht_lock(old_tbl, bkt); 275 276 while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) 277 ; 278 279 if (err == -ENOENT) 280 err = 0; 281 rht_unlock(old_tbl, bkt); 282 283 return err; 284 } 285 286 static int rhashtable_rehash_attach(struct rhashtable *ht, 287 struct bucket_table *old_tbl, 288 struct bucket_table *new_tbl) 289 { 290 /* Make insertions go into the new, empty table right away. Deletions 291 * and lookups will be attempted in both tables until we synchronize. 292 * As cmpxchg() provides strong barriers, we do not need 293 * rcu_assign_pointer(). 294 */ 295 296 if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL, 297 new_tbl) != NULL) 298 return -EEXIST; 299 300 return 0; 301 } 302 303 static int rhashtable_rehash_table(struct rhashtable *ht) 304 { 305 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 306 struct bucket_table *new_tbl; 307 struct rhashtable_walker *walker; 308 unsigned int old_hash; 309 int err; 310 311 new_tbl = rht_dereference(old_tbl->future_tbl, ht); 312 if (!new_tbl) 313 return 0; 314 315 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { 316 err = rhashtable_rehash_chain(ht, old_hash); 317 if (err) 318 return err; 319 cond_resched(); 320 } 321 322 /* Publish the new table pointer. */ 323 rcu_assign_pointer(ht->tbl, new_tbl); 324 325 spin_lock(&ht->lock); 326 list_for_each_entry(walker, &old_tbl->walkers, list) 327 walker->tbl = NULL; 328 329 /* Wait for readers. All new readers will see the new 330 * table, and thus no references to the old table will 331 * remain. 332 * We do this inside the locked region so that 333 * rhashtable_walk_stop() can use rcu_head_after_call_rcu() 334 * to check if it should not re-link the table. 335 */ 336 call_rcu(&old_tbl->rcu, bucket_table_free_rcu); 337 spin_unlock(&ht->lock); 338 339 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; 340 } 341 342 static int rhashtable_rehash_alloc(struct rhashtable *ht, 343 struct bucket_table *old_tbl, 344 unsigned int size) 345 { 346 struct bucket_table *new_tbl; 347 int err; 348 349 ASSERT_RHT_MUTEX(ht); 350 351 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 352 if (new_tbl == NULL) 353 return -ENOMEM; 354 355 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); 356 if (err) 357 bucket_table_free(new_tbl); 358 359 return err; 360 } 361 362 /** 363 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 364 * @ht: the hash table to shrink 365 * 366 * This function shrinks the hash table to fit, i.e., the smallest 367 * size would not cause it to expand right away automatically. 368 * 369 * The caller must ensure that no concurrent resizing occurs by holding 370 * ht->mutex. 371 * 372 * The caller must ensure that no concurrent table mutations take place. 373 * It is however valid to have concurrent lookups if they are RCU protected. 374 * 375 * It is valid to have concurrent insertions and deletions protected by per 376 * bucket locks or concurrent RCU protected lookups and traversals. 377 */ 378 static int rhashtable_shrink(struct rhashtable *ht) 379 { 380 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 381 unsigned int nelems = atomic_read(&ht->nelems); 382 unsigned int size = 0; 383 384 if (nelems) 385 size = roundup_pow_of_two(nelems * 3 / 2); 386 if (size < ht->p.min_size) 387 size = ht->p.min_size; 388 389 if (old_tbl->size <= size) 390 return 0; 391 392 if (rht_dereference(old_tbl->future_tbl, ht)) 393 return -EEXIST; 394 395 return rhashtable_rehash_alloc(ht, old_tbl, size); 396 } 397 398 static void rht_deferred_worker(struct work_struct *work) 399 { 400 struct rhashtable *ht; 401 struct bucket_table *tbl; 402 int err = 0; 403 404 ht = container_of(work, struct rhashtable, run_work); 405 mutex_lock(&ht->mutex); 406 407 tbl = rht_dereference(ht->tbl, ht); 408 tbl = rhashtable_last_table(ht, tbl); 409 410 if (rht_grow_above_75(ht, tbl)) 411 err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); 412 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) 413 err = rhashtable_shrink(ht); 414 else if (tbl->nest) 415 err = rhashtable_rehash_alloc(ht, tbl, tbl->size); 416 417 if (!err || err == -EEXIST) { 418 int nerr; 419 420 nerr = rhashtable_rehash_table(ht); 421 err = err ?: nerr; 422 } 423 424 mutex_unlock(&ht->mutex); 425 426 if (err) 427 schedule_work(&ht->run_work); 428 } 429 430 static int rhashtable_insert_rehash(struct rhashtable *ht, 431 struct bucket_table *tbl) 432 { 433 struct bucket_table *old_tbl; 434 struct bucket_table *new_tbl; 435 unsigned int size; 436 int err; 437 438 old_tbl = rht_dereference_rcu(ht->tbl, ht); 439 440 size = tbl->size; 441 442 err = -EBUSY; 443 444 if (rht_grow_above_75(ht, tbl)) 445 size *= 2; 446 /* Do not schedule more than one rehash */ 447 else if (old_tbl != tbl) 448 goto fail; 449 450 err = -ENOMEM; 451 452 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN); 453 if (new_tbl == NULL) 454 goto fail; 455 456 err = rhashtable_rehash_attach(ht, tbl, new_tbl); 457 if (err) { 458 bucket_table_free(new_tbl); 459 if (err == -EEXIST) 460 err = 0; 461 } else 462 schedule_work(&ht->run_work); 463 464 return err; 465 466 fail: 467 /* Do not fail the insert if someone else did a rehash. */ 468 if (likely(rcu_access_pointer(tbl->future_tbl))) 469 return 0; 470 471 /* Schedule async rehash to retry allocation in process context. */ 472 if (err == -ENOMEM) 473 schedule_work(&ht->run_work); 474 475 return err; 476 } 477 478 static void *rhashtable_lookup_one(struct rhashtable *ht, 479 struct rhash_lock_head **bkt, 480 struct bucket_table *tbl, unsigned int hash, 481 const void *key, struct rhash_head *obj) 482 { 483 struct rhashtable_compare_arg arg = { 484 .ht = ht, 485 .key = key, 486 }; 487 struct rhash_head __rcu **pprev = NULL; 488 struct rhash_head *head; 489 int elasticity; 490 491 elasticity = RHT_ELASTICITY; 492 rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { 493 struct rhlist_head *list; 494 struct rhlist_head *plist; 495 496 elasticity--; 497 if (!key || 498 (ht->p.obj_cmpfn ? 499 ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : 500 rhashtable_compare(&arg, rht_obj(ht, head)))) { 501 pprev = &head->next; 502 continue; 503 } 504 505 if (!ht->rhlist) 506 return rht_obj(ht, head); 507 508 list = container_of(obj, struct rhlist_head, rhead); 509 plist = container_of(head, struct rhlist_head, rhead); 510 511 RCU_INIT_POINTER(list->next, plist); 512 head = rht_dereference_bucket(head->next, tbl, hash); 513 RCU_INIT_POINTER(list->rhead.next, head); 514 if (pprev) 515 rcu_assign_pointer(*pprev, obj); 516 else 517 /* Need to preserve the bit lock */ 518 rht_assign_locked(bkt, obj); 519 520 return NULL; 521 } 522 523 if (elasticity <= 0) 524 return ERR_PTR(-EAGAIN); 525 526 return ERR_PTR(-ENOENT); 527 } 528 529 static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, 530 struct rhash_lock_head **bkt, 531 struct bucket_table *tbl, 532 unsigned int hash, 533 struct rhash_head *obj, 534 void *data) 535 { 536 struct bucket_table *new_tbl; 537 struct rhash_head *head; 538 539 if (!IS_ERR_OR_NULL(data)) 540 return ERR_PTR(-EEXIST); 541 542 if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) 543 return ERR_CAST(data); 544 545 new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); 546 if (new_tbl) 547 return new_tbl; 548 549 if (PTR_ERR(data) != -ENOENT) 550 return ERR_CAST(data); 551 552 if (unlikely(rht_grow_above_max(ht, tbl))) 553 return ERR_PTR(-E2BIG); 554 555 if (unlikely(rht_grow_above_100(ht, tbl))) 556 return ERR_PTR(-EAGAIN); 557 558 head = rht_ptr(bkt, tbl, hash); 559 560 RCU_INIT_POINTER(obj->next, head); 561 if (ht->rhlist) { 562 struct rhlist_head *list; 563 564 list = container_of(obj, struct rhlist_head, rhead); 565 RCU_INIT_POINTER(list->next, NULL); 566 } 567 568 /* bkt is always the head of the list, so it holds 569 * the lock, which we need to preserve 570 */ 571 rht_assign_locked(bkt, obj); 572 573 atomic_inc(&ht->nelems); 574 if (rht_grow_above_75(ht, tbl)) 575 schedule_work(&ht->run_work); 576 577 return NULL; 578 } 579 580 static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, 581 struct rhash_head *obj) 582 { 583 struct bucket_table *new_tbl; 584 struct bucket_table *tbl; 585 struct rhash_lock_head **bkt; 586 unsigned int hash; 587 void *data; 588 589 new_tbl = rcu_dereference(ht->tbl); 590 591 do { 592 tbl = new_tbl; 593 hash = rht_head_hashfn(ht, tbl, obj, ht->p); 594 if (rcu_access_pointer(tbl->future_tbl)) 595 /* Failure is OK */ 596 bkt = rht_bucket_var(tbl, hash); 597 else 598 bkt = rht_bucket_insert(ht, tbl, hash); 599 if (bkt == NULL) { 600 new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); 601 data = ERR_PTR(-EAGAIN); 602 } else { 603 rht_lock(tbl, bkt); 604 data = rhashtable_lookup_one(ht, bkt, tbl, 605 hash, key, obj); 606 new_tbl = rhashtable_insert_one(ht, bkt, tbl, 607 hash, obj, data); 608 if (PTR_ERR(new_tbl) != -EEXIST) 609 data = ERR_CAST(new_tbl); 610 611 rht_unlock(tbl, bkt); 612 } 613 } while (!IS_ERR_OR_NULL(new_tbl)); 614 615 if (PTR_ERR(data) == -EAGAIN) 616 data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: 617 -EAGAIN); 618 619 return data; 620 } 621 622 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, 623 struct rhash_head *obj) 624 { 625 void *data; 626 627 do { 628 rcu_read_lock(); 629 data = rhashtable_try_insert(ht, key, obj); 630 rcu_read_unlock(); 631 } while (PTR_ERR(data) == -EAGAIN); 632 633 return data; 634 } 635 EXPORT_SYMBOL_GPL(rhashtable_insert_slow); 636 637 /** 638 * rhashtable_walk_enter - Initialise an iterator 639 * @ht: Table to walk over 640 * @iter: Hash table Iterator 641 * 642 * This function prepares a hash table walk. 643 * 644 * Note that if you restart a walk after rhashtable_walk_stop you 645 * may see the same object twice. Also, you may miss objects if 646 * there are removals in between rhashtable_walk_stop and the next 647 * call to rhashtable_walk_start. 648 * 649 * For a completely stable walk you should construct your own data 650 * structure outside the hash table. 651 * 652 * This function may be called from any process context, including 653 * non-preemptable context, but cannot be called from softirq or 654 * hardirq context. 655 * 656 * You must call rhashtable_walk_exit after this function returns. 657 */ 658 void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) 659 { 660 iter->ht = ht; 661 iter->p = NULL; 662 iter->slot = 0; 663 iter->skip = 0; 664 iter->end_of_table = 0; 665 666 spin_lock(&ht->lock); 667 iter->walker.tbl = 668 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); 669 list_add(&iter->walker.list, &iter->walker.tbl->walkers); 670 spin_unlock(&ht->lock); 671 } 672 EXPORT_SYMBOL_GPL(rhashtable_walk_enter); 673 674 /** 675 * rhashtable_walk_exit - Free an iterator 676 * @iter: Hash table Iterator 677 * 678 * This function frees resources allocated by rhashtable_walk_enter. 679 */ 680 void rhashtable_walk_exit(struct rhashtable_iter *iter) 681 { 682 spin_lock(&iter->ht->lock); 683 if (iter->walker.tbl) 684 list_del(&iter->walker.list); 685 spin_unlock(&iter->ht->lock); 686 } 687 EXPORT_SYMBOL_GPL(rhashtable_walk_exit); 688 689 /** 690 * rhashtable_walk_start_check - Start a hash table walk 691 * @iter: Hash table iterator 692 * 693 * Start a hash table walk at the current iterator position. Note that we take 694 * the RCU lock in all cases including when we return an error. So you must 695 * always call rhashtable_walk_stop to clean up. 696 * 697 * Returns zero if successful. 698 * 699 * Returns -EAGAIN if resize event occured. Note that the iterator 700 * will rewind back to the beginning and you may use it immediately 701 * by calling rhashtable_walk_next. 702 * 703 * rhashtable_walk_start is defined as an inline variant that returns 704 * void. This is preferred in cases where the caller would ignore 705 * resize events and always continue. 706 */ 707 int rhashtable_walk_start_check(struct rhashtable_iter *iter) 708 __acquires(RCU) 709 { 710 struct rhashtable *ht = iter->ht; 711 bool rhlist = ht->rhlist; 712 713 rcu_read_lock(); 714 715 spin_lock(&ht->lock); 716 if (iter->walker.tbl) 717 list_del(&iter->walker.list); 718 spin_unlock(&ht->lock); 719 720 if (iter->end_of_table) 721 return 0; 722 if (!iter->walker.tbl) { 723 iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); 724 iter->slot = 0; 725 iter->skip = 0; 726 return -EAGAIN; 727 } 728 729 if (iter->p && !rhlist) { 730 /* 731 * We need to validate that 'p' is still in the table, and 732 * if so, update 'skip' 733 */ 734 struct rhash_head *p; 735 int skip = 0; 736 rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { 737 skip++; 738 if (p == iter->p) { 739 iter->skip = skip; 740 goto found; 741 } 742 } 743 iter->p = NULL; 744 } else if (iter->p && rhlist) { 745 /* Need to validate that 'list' is still in the table, and 746 * if so, update 'skip' and 'p'. 747 */ 748 struct rhash_head *p; 749 struct rhlist_head *list; 750 int skip = 0; 751 rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { 752 for (list = container_of(p, struct rhlist_head, rhead); 753 list; 754 list = rcu_dereference(list->next)) { 755 skip++; 756 if (list == iter->list) { 757 iter->p = p; 758 iter->skip = skip; 759 goto found; 760 } 761 } 762 } 763 iter->p = NULL; 764 } 765 found: 766 return 0; 767 } 768 EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); 769 770 /** 771 * __rhashtable_walk_find_next - Find the next element in a table (or the first 772 * one in case of a new walk). 773 * 774 * @iter: Hash table iterator 775 * 776 * Returns the found object or NULL when the end of the table is reached. 777 * 778 * Returns -EAGAIN if resize event occurred. 779 */ 780 static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) 781 { 782 struct bucket_table *tbl = iter->walker.tbl; 783 struct rhlist_head *list = iter->list; 784 struct rhashtable *ht = iter->ht; 785 struct rhash_head *p = iter->p; 786 bool rhlist = ht->rhlist; 787 788 if (!tbl) 789 return NULL; 790 791 for (; iter->slot < tbl->size; iter->slot++) { 792 int skip = iter->skip; 793 794 rht_for_each_rcu(p, tbl, iter->slot) { 795 if (rhlist) { 796 list = container_of(p, struct rhlist_head, 797 rhead); 798 do { 799 if (!skip) 800 goto next; 801 skip--; 802 list = rcu_dereference(list->next); 803 } while (list); 804 805 continue; 806 } 807 if (!skip) 808 break; 809 skip--; 810 } 811 812 next: 813 if (!rht_is_a_nulls(p)) { 814 iter->skip++; 815 iter->p = p; 816 iter->list = list; 817 return rht_obj(ht, rhlist ? &list->rhead : p); 818 } 819 820 iter->skip = 0; 821 } 822 823 iter->p = NULL; 824 825 /* Ensure we see any new tables. */ 826 smp_rmb(); 827 828 iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); 829 if (iter->walker.tbl) { 830 iter->slot = 0; 831 iter->skip = 0; 832 return ERR_PTR(-EAGAIN); 833 } else { 834 iter->end_of_table = true; 835 } 836 837 return NULL; 838 } 839 840 /** 841 * rhashtable_walk_next - Return the next object and advance the iterator 842 * @iter: Hash table iterator 843 * 844 * Note that you must call rhashtable_walk_stop when you are finished 845 * with the walk. 846 * 847 * Returns the next object or NULL when the end of the table is reached. 848 * 849 * Returns -EAGAIN if resize event occurred. Note that the iterator 850 * will rewind back to the beginning and you may continue to use it. 851 */ 852 void *rhashtable_walk_next(struct rhashtable_iter *iter) 853 { 854 struct rhlist_head *list = iter->list; 855 struct rhashtable *ht = iter->ht; 856 struct rhash_head *p = iter->p; 857 bool rhlist = ht->rhlist; 858 859 if (p) { 860 if (!rhlist || !(list = rcu_dereference(list->next))) { 861 p = rcu_dereference(p->next); 862 list = container_of(p, struct rhlist_head, rhead); 863 } 864 if (!rht_is_a_nulls(p)) { 865 iter->skip++; 866 iter->p = p; 867 iter->list = list; 868 return rht_obj(ht, rhlist ? &list->rhead : p); 869 } 870 871 /* At the end of this slot, switch to next one and then find 872 * next entry from that point. 873 */ 874 iter->skip = 0; 875 iter->slot++; 876 } 877 878 return __rhashtable_walk_find_next(iter); 879 } 880 EXPORT_SYMBOL_GPL(rhashtable_walk_next); 881 882 /** 883 * rhashtable_walk_peek - Return the next object but don't advance the iterator 884 * @iter: Hash table iterator 885 * 886 * Returns the next object or NULL when the end of the table is reached. 887 * 888 * Returns -EAGAIN if resize event occurred. Note that the iterator 889 * will rewind back to the beginning and you may continue to use it. 890 */ 891 void *rhashtable_walk_peek(struct rhashtable_iter *iter) 892 { 893 struct rhlist_head *list = iter->list; 894 struct rhashtable *ht = iter->ht; 895 struct rhash_head *p = iter->p; 896 897 if (p) 898 return rht_obj(ht, ht->rhlist ? &list->rhead : p); 899 900 /* No object found in current iter, find next one in the table. */ 901 902 if (iter->skip) { 903 /* A nonzero skip value points to the next entry in the table 904 * beyond that last one that was found. Decrement skip so 905 * we find the current value. __rhashtable_walk_find_next 906 * will restore the original value of skip assuming that 907 * the table hasn't changed. 908 */ 909 iter->skip--; 910 } 911 912 return __rhashtable_walk_find_next(iter); 913 } 914 EXPORT_SYMBOL_GPL(rhashtable_walk_peek); 915 916 /** 917 * rhashtable_walk_stop - Finish a hash table walk 918 * @iter: Hash table iterator 919 * 920 * Finish a hash table walk. Does not reset the iterator to the start of the 921 * hash table. 922 */ 923 void rhashtable_walk_stop(struct rhashtable_iter *iter) 924 __releases(RCU) 925 { 926 struct rhashtable *ht; 927 struct bucket_table *tbl = iter->walker.tbl; 928 929 if (!tbl) 930 goto out; 931 932 ht = iter->ht; 933 934 spin_lock(&ht->lock); 935 if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) 936 /* This bucket table is being freed, don't re-link it. */ 937 iter->walker.tbl = NULL; 938 else 939 list_add(&iter->walker.list, &tbl->walkers); 940 spin_unlock(&ht->lock); 941 942 out: 943 rcu_read_unlock(); 944 } 945 EXPORT_SYMBOL_GPL(rhashtable_walk_stop); 946 947 static size_t rounded_hashtable_size(const struct rhashtable_params *params) 948 { 949 size_t retsize; 950 951 if (params->nelem_hint) 952 retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 953 (unsigned long)params->min_size); 954 else 955 retsize = max(HASH_DEFAULT_SIZE, 956 (unsigned long)params->min_size); 957 958 return retsize; 959 } 960 961 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) 962 { 963 return jhash2(key, length, seed); 964 } 965 966 /** 967 * rhashtable_init - initialize a new hash table 968 * @ht: hash table to be initialized 969 * @params: configuration parameters 970 * 971 * Initializes a new hash table based on the provided configuration 972 * parameters. A table can be configured either with a variable or 973 * fixed length key: 974 * 975 * Configuration Example 1: Fixed length keys 976 * struct test_obj { 977 * int key; 978 * void * my_member; 979 * struct rhash_head node; 980 * }; 981 * 982 * struct rhashtable_params params = { 983 * .head_offset = offsetof(struct test_obj, node), 984 * .key_offset = offsetof(struct test_obj, key), 985 * .key_len = sizeof(int), 986 * .hashfn = jhash, 987 * }; 988 * 989 * Configuration Example 2: Variable length keys 990 * struct test_obj { 991 * [...] 992 * struct rhash_head node; 993 * }; 994 * 995 * u32 my_hash_fn(const void *data, u32 len, u32 seed) 996 * { 997 * struct test_obj *obj = data; 998 * 999 * return [... hash ...]; 1000 * } 1001 * 1002 * struct rhashtable_params params = { 1003 * .head_offset = offsetof(struct test_obj, node), 1004 * .hashfn = jhash, 1005 * .obj_hashfn = my_hash_fn, 1006 * }; 1007 */ 1008 int rhashtable_init(struct rhashtable *ht, 1009 const struct rhashtable_params *params) 1010 { 1011 struct bucket_table *tbl; 1012 size_t size; 1013 1014 if ((!params->key_len && !params->obj_hashfn) || 1015 (params->obj_hashfn && !params->obj_cmpfn)) 1016 return -EINVAL; 1017 1018 memset(ht, 0, sizeof(*ht)); 1019 mutex_init(&ht->mutex); 1020 spin_lock_init(&ht->lock); 1021 memcpy(&ht->p, params, sizeof(*params)); 1022 1023 if (params->min_size) 1024 ht->p.min_size = roundup_pow_of_two(params->min_size); 1025 1026 /* Cap total entries at 2^31 to avoid nelems overflow. */ 1027 ht->max_elems = 1u << 31; 1028 1029 if (params->max_size) { 1030 ht->p.max_size = rounddown_pow_of_two(params->max_size); 1031 if (ht->p.max_size < ht->max_elems / 2) 1032 ht->max_elems = ht->p.max_size * 2; 1033 } 1034 1035 ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); 1036 1037 size = rounded_hashtable_size(&ht->p); 1038 1039 ht->key_len = ht->p.key_len; 1040 if (!params->hashfn) { 1041 ht->p.hashfn = jhash; 1042 1043 if (!(ht->key_len & (sizeof(u32) - 1))) { 1044 ht->key_len /= sizeof(u32); 1045 ht->p.hashfn = rhashtable_jhash2; 1046 } 1047 } 1048 1049 /* 1050 * This is api initialization and thus we need to guarantee the 1051 * initial rhashtable allocation. Upon failure, retry with the 1052 * smallest possible size with __GFP_NOFAIL semantics. 1053 */ 1054 tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 1055 if (unlikely(tbl == NULL)) { 1056 size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); 1057 tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL); 1058 } 1059 1060 atomic_set(&ht->nelems, 0); 1061 1062 RCU_INIT_POINTER(ht->tbl, tbl); 1063 1064 INIT_WORK(&ht->run_work, rht_deferred_worker); 1065 1066 return 0; 1067 } 1068 EXPORT_SYMBOL_GPL(rhashtable_init); 1069 1070 /** 1071 * rhltable_init - initialize a new hash list table 1072 * @hlt: hash list table to be initialized 1073 * @params: configuration parameters 1074 * 1075 * Initializes a new hash list table. 1076 * 1077 * See documentation for rhashtable_init. 1078 */ 1079 int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) 1080 { 1081 int err; 1082 1083 err = rhashtable_init(&hlt->ht, params); 1084 hlt->ht.rhlist = true; 1085 return err; 1086 } 1087 EXPORT_SYMBOL_GPL(rhltable_init); 1088 1089 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, 1090 void (*free_fn)(void *ptr, void *arg), 1091 void *arg) 1092 { 1093 struct rhlist_head *list; 1094 1095 if (!ht->rhlist) { 1096 free_fn(rht_obj(ht, obj), arg); 1097 return; 1098 } 1099 1100 list = container_of(obj, struct rhlist_head, rhead); 1101 do { 1102 obj = &list->rhead; 1103 list = rht_dereference(list->next, ht); 1104 free_fn(rht_obj(ht, obj), arg); 1105 } while (list); 1106 } 1107 1108 /** 1109 * rhashtable_free_and_destroy - free elements and destroy hash table 1110 * @ht: the hash table to destroy 1111 * @free_fn: callback to release resources of element 1112 * @arg: pointer passed to free_fn 1113 * 1114 * Stops an eventual async resize. If defined, invokes free_fn for each 1115 * element to releasal resources. Please note that RCU protected 1116 * readers may still be accessing the elements. Releasing of resources 1117 * must occur in a compatible manner. Then frees the bucket array. 1118 * 1119 * This function will eventually sleep to wait for an async resize 1120 * to complete. The caller is responsible that no further write operations 1121 * occurs in parallel. 1122 */ 1123 void rhashtable_free_and_destroy(struct rhashtable *ht, 1124 void (*free_fn)(void *ptr, void *arg), 1125 void *arg) 1126 { 1127 struct bucket_table *tbl, *next_tbl; 1128 unsigned int i; 1129 1130 cancel_work_sync(&ht->run_work); 1131 1132 mutex_lock(&ht->mutex); 1133 tbl = rht_dereference(ht->tbl, ht); 1134 restart: 1135 if (free_fn) { 1136 for (i = 0; i < tbl->size; i++) { 1137 struct rhash_head *pos, *next; 1138 1139 cond_resched(); 1140 for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), 1141 next = !rht_is_a_nulls(pos) ? 1142 rht_dereference(pos->next, ht) : NULL; 1143 !rht_is_a_nulls(pos); 1144 pos = next, 1145 next = !rht_is_a_nulls(pos) ? 1146 rht_dereference(pos->next, ht) : NULL) 1147 rhashtable_free_one(ht, pos, free_fn, arg); 1148 } 1149 } 1150 1151 next_tbl = rht_dereference(tbl->future_tbl, ht); 1152 bucket_table_free(tbl); 1153 if (next_tbl) { 1154 tbl = next_tbl; 1155 goto restart; 1156 } 1157 mutex_unlock(&ht->mutex); 1158 } 1159 EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); 1160 1161 void rhashtable_destroy(struct rhashtable *ht) 1162 { 1163 return rhashtable_free_and_destroy(ht, NULL, NULL); 1164 } 1165 EXPORT_SYMBOL_GPL(rhashtable_destroy); 1166 1167 struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, 1168 unsigned int hash) 1169 { 1170 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 1171 unsigned int index = hash & ((1 << tbl->nest) - 1); 1172 unsigned int size = tbl->size >> tbl->nest; 1173 unsigned int subhash = hash; 1174 union nested_table *ntbl; 1175 1176 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); 1177 ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); 1178 subhash >>= tbl->nest; 1179 1180 while (ntbl && size > (1 << shift)) { 1181 index = subhash & ((1 << shift) - 1); 1182 ntbl = rht_dereference_bucket_rcu(ntbl[index].table, 1183 tbl, hash); 1184 size >>= shift; 1185 subhash >>= shift; 1186 } 1187 1188 if (!ntbl) 1189 return NULL; 1190 1191 return &ntbl[subhash].bucket; 1192 1193 } 1194 EXPORT_SYMBOL_GPL(__rht_bucket_nested); 1195 1196 struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, 1197 unsigned int hash) 1198 { 1199 static struct rhash_lock_head *rhnull; 1200 1201 if (!rhnull) 1202 INIT_RHT_NULLS_HEAD(rhnull); 1203 return __rht_bucket_nested(tbl, hash) ?: &rhnull; 1204 } 1205 EXPORT_SYMBOL_GPL(rht_bucket_nested); 1206 1207 struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, 1208 struct bucket_table *tbl, 1209 unsigned int hash) 1210 { 1211 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 1212 unsigned int index = hash & ((1 << tbl->nest) - 1); 1213 unsigned int size = tbl->size >> tbl->nest; 1214 union nested_table *ntbl; 1215 1216 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); 1217 hash >>= tbl->nest; 1218 ntbl = nested_table_alloc(ht, &ntbl[index].table, 1219 size <= (1 << shift)); 1220 1221 while (ntbl && size > (1 << shift)) { 1222 index = hash & ((1 << shift) - 1); 1223 size >>= shift; 1224 hash >>= shift; 1225 ntbl = nested_table_alloc(ht, &ntbl[index].table, 1226 size <= (1 << shift)); 1227 } 1228 1229 if (!ntbl) 1230 return NULL; 1231 1232 return &ntbl[hash].bucket; 1233 1234 } 1235 EXPORT_SYMBOL_GPL(rht_bucket_nested_insert); 1236