1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 92 int offset) 93 { 94 __set_bit(offset, node->tags[tag]); 95 } 96 97 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 98 int offset) 99 { 100 __clear_bit(offset, node->tags[tag]); 101 } 102 103 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 104 int offset) 105 { 106 return test_bit(offset, node->tags[tag]); 107 } 108 109 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 110 { 111 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 112 } 113 114 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 115 { 116 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 117 } 118 119 static inline void root_tag_clear_all(struct radix_tree_root *root) 120 { 121 root->gfp_mask &= __GFP_BITS_MASK; 122 } 123 124 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 125 { 126 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 127 } 128 129 /* 130 * Returns 1 if any slot in the node has this tag set. 131 * Otherwise returns 0. 132 */ 133 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 134 { 135 int idx; 136 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 137 if (node->tags[tag][idx]) 138 return 1; 139 } 140 return 0; 141 } 142 /* 143 * This assumes that the caller has performed appropriate preallocation, and 144 * that the caller has pinned this thread of control to the current CPU. 145 */ 146 static struct radix_tree_node * 147 radix_tree_node_alloc(struct radix_tree_root *root) 148 { 149 struct radix_tree_node *ret = NULL; 150 gfp_t gfp_mask = root_gfp_mask(root); 151 152 if (!(gfp_mask & __GFP_WAIT)) { 153 struct radix_tree_preload *rtp; 154 155 /* 156 * Provided the caller has preloaded here, we will always 157 * succeed in getting a node here (and never reach 158 * kmem_cache_alloc) 159 */ 160 rtp = &__get_cpu_var(radix_tree_preloads); 161 if (rtp->nr) { 162 ret = rtp->nodes[rtp->nr - 1]; 163 rtp->nodes[rtp->nr - 1] = NULL; 164 rtp->nr--; 165 } 166 } 167 if (ret == NULL) 168 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 169 170 BUG_ON(radix_tree_is_indirect_ptr(ret)); 171 return ret; 172 } 173 174 static void radix_tree_node_rcu_free(struct rcu_head *head) 175 { 176 struct radix_tree_node *node = 177 container_of(head, struct radix_tree_node, rcu_head); 178 179 /* 180 * must only free zeroed nodes into the slab. radix_tree_shrink 181 * can leave us with a non-NULL entry in the first slot, so clear 182 * that here to make sure. 183 */ 184 tag_clear(node, 0, 0); 185 tag_clear(node, 1, 0); 186 node->slots[0] = NULL; 187 node->count = 0; 188 189 kmem_cache_free(radix_tree_node_cachep, node); 190 } 191 192 static inline void 193 radix_tree_node_free(struct radix_tree_node *node) 194 { 195 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 196 } 197 198 /* 199 * Load up this CPU's radix_tree_node buffer with sufficient objects to 200 * ensure that the addition of a single element in the tree cannot fail. On 201 * success, return zero, with preemption disabled. On error, return -ENOMEM 202 * with preemption not disabled. 203 * 204 * To make use of this facility, the radix tree must be initialised without 205 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 206 */ 207 int radix_tree_preload(gfp_t gfp_mask) 208 { 209 struct radix_tree_preload *rtp; 210 struct radix_tree_node *node; 211 int ret = -ENOMEM; 212 213 preempt_disable(); 214 rtp = &__get_cpu_var(radix_tree_preloads); 215 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 216 preempt_enable(); 217 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 218 if (node == NULL) 219 goto out; 220 preempt_disable(); 221 rtp = &__get_cpu_var(radix_tree_preloads); 222 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 223 rtp->nodes[rtp->nr++] = node; 224 else 225 kmem_cache_free(radix_tree_node_cachep, node); 226 } 227 ret = 0; 228 out: 229 return ret; 230 } 231 EXPORT_SYMBOL(radix_tree_preload); 232 233 /* 234 * Return the maximum key which can be store into a 235 * radix tree with height HEIGHT. 236 */ 237 static inline unsigned long radix_tree_maxindex(unsigned int height) 238 { 239 return height_to_maxindex[height]; 240 } 241 242 /* 243 * Extend a radix tree so it can store key @index. 244 */ 245 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 246 { 247 struct radix_tree_node *node; 248 unsigned int height; 249 int tag; 250 251 /* Figure out what the height should be. */ 252 height = root->height + 1; 253 while (index > radix_tree_maxindex(height)) 254 height++; 255 256 if (root->rnode == NULL) { 257 root->height = height; 258 goto out; 259 } 260 261 do { 262 unsigned int newheight; 263 if (!(node = radix_tree_node_alloc(root))) 264 return -ENOMEM; 265 266 /* Increase the height. */ 267 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 268 269 /* Propagate the aggregated tag info into the new root */ 270 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 271 if (root_tag_get(root, tag)) 272 tag_set(node, tag, 0); 273 } 274 275 newheight = root->height+1; 276 node->height = newheight; 277 node->count = 1; 278 node = radix_tree_ptr_to_indirect(node); 279 rcu_assign_pointer(root->rnode, node); 280 root->height = newheight; 281 } while (height > root->height); 282 out: 283 return 0; 284 } 285 286 /** 287 * radix_tree_insert - insert into a radix tree 288 * @root: radix tree root 289 * @index: index key 290 * @item: item to insert 291 * 292 * Insert an item into the radix tree at position @index. 293 */ 294 int radix_tree_insert(struct radix_tree_root *root, 295 unsigned long index, void *item) 296 { 297 struct radix_tree_node *node = NULL, *slot; 298 unsigned int height, shift; 299 int offset; 300 int error; 301 302 BUG_ON(radix_tree_is_indirect_ptr(item)); 303 304 /* Make sure the tree is high enough. */ 305 if (index > radix_tree_maxindex(root->height)) { 306 error = radix_tree_extend(root, index); 307 if (error) 308 return error; 309 } 310 311 slot = radix_tree_indirect_to_ptr(root->rnode); 312 313 height = root->height; 314 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 315 316 offset = 0; /* uninitialised var warning */ 317 while (height > 0) { 318 if (slot == NULL) { 319 /* Have to add a child node. */ 320 if (!(slot = radix_tree_node_alloc(root))) 321 return -ENOMEM; 322 slot->height = height; 323 if (node) { 324 rcu_assign_pointer(node->slots[offset], slot); 325 node->count++; 326 } else 327 rcu_assign_pointer(root->rnode, 328 radix_tree_ptr_to_indirect(slot)); 329 } 330 331 /* Go a level down */ 332 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 333 node = slot; 334 slot = node->slots[offset]; 335 shift -= RADIX_TREE_MAP_SHIFT; 336 height--; 337 } 338 339 if (slot != NULL) 340 return -EEXIST; 341 342 if (node) { 343 node->count++; 344 rcu_assign_pointer(node->slots[offset], item); 345 BUG_ON(tag_get(node, 0, offset)); 346 BUG_ON(tag_get(node, 1, offset)); 347 } else { 348 rcu_assign_pointer(root->rnode, item); 349 BUG_ON(root_tag_get(root, 0)); 350 BUG_ON(root_tag_get(root, 1)); 351 } 352 353 return 0; 354 } 355 EXPORT_SYMBOL(radix_tree_insert); 356 357 /* 358 * is_slot == 1 : search for the slot. 359 * is_slot == 0 : search for the node. 360 */ 361 static void *radix_tree_lookup_element(struct radix_tree_root *root, 362 unsigned long index, int is_slot) 363 { 364 unsigned int height, shift; 365 struct radix_tree_node *node, **slot; 366 367 node = rcu_dereference(root->rnode); 368 if (node == NULL) 369 return NULL; 370 371 if (!radix_tree_is_indirect_ptr(node)) { 372 if (index > 0) 373 return NULL; 374 return is_slot ? (void *)&root->rnode : node; 375 } 376 node = radix_tree_indirect_to_ptr(node); 377 378 height = node->height; 379 if (index > radix_tree_maxindex(height)) 380 return NULL; 381 382 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 383 384 do { 385 slot = (struct radix_tree_node **) 386 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 387 node = rcu_dereference(*slot); 388 if (node == NULL) 389 return NULL; 390 391 shift -= RADIX_TREE_MAP_SHIFT; 392 height--; 393 } while (height > 0); 394 395 return is_slot ? (void *)slot:node; 396 } 397 398 /** 399 * radix_tree_lookup_slot - lookup a slot in a radix tree 400 * @root: radix tree root 401 * @index: index key 402 * 403 * Returns: the slot corresponding to the position @index in the 404 * radix tree @root. This is useful for update-if-exists operations. 405 * 406 * This function can be called under rcu_read_lock iff the slot is not 407 * modified by radix_tree_replace_slot, otherwise it must be called 408 * exclusive from other writers. Any dereference of the slot must be done 409 * using radix_tree_deref_slot. 410 */ 411 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 412 { 413 return (void **)radix_tree_lookup_element(root, index, 1); 414 } 415 EXPORT_SYMBOL(radix_tree_lookup_slot); 416 417 /** 418 * radix_tree_lookup - perform lookup operation on a radix tree 419 * @root: radix tree root 420 * @index: index key 421 * 422 * Lookup the item at the position @index in the radix tree @root. 423 * 424 * This function can be called under rcu_read_lock, however the caller 425 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 426 * them safely). No RCU barriers are required to access or modify the 427 * returned item, however. 428 */ 429 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 430 { 431 return radix_tree_lookup_element(root, index, 0); 432 } 433 EXPORT_SYMBOL(radix_tree_lookup); 434 435 /** 436 * radix_tree_tag_set - set a tag on a radix tree node 437 * @root: radix tree root 438 * @index: index key 439 * @tag: tag index 440 * 441 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 442 * corresponding to @index in the radix tree. From 443 * the root all the way down to the leaf node. 444 * 445 * Returns the address of the tagged item. Setting a tag on a not-present 446 * item is a bug. 447 */ 448 void *radix_tree_tag_set(struct radix_tree_root *root, 449 unsigned long index, unsigned int tag) 450 { 451 unsigned int height, shift; 452 struct radix_tree_node *slot; 453 454 height = root->height; 455 BUG_ON(index > radix_tree_maxindex(height)); 456 457 slot = radix_tree_indirect_to_ptr(root->rnode); 458 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 459 460 while (height > 0) { 461 int offset; 462 463 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 464 if (!tag_get(slot, tag, offset)) 465 tag_set(slot, tag, offset); 466 slot = slot->slots[offset]; 467 BUG_ON(slot == NULL); 468 shift -= RADIX_TREE_MAP_SHIFT; 469 height--; 470 } 471 472 /* set the root's tag bit */ 473 if (slot && !root_tag_get(root, tag)) 474 root_tag_set(root, tag); 475 476 return slot; 477 } 478 EXPORT_SYMBOL(radix_tree_tag_set); 479 480 /** 481 * radix_tree_tag_clear - clear a tag on a radix tree node 482 * @root: radix tree root 483 * @index: index key 484 * @tag: tag index 485 * 486 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 487 * corresponding to @index in the radix tree. If 488 * this causes the leaf node to have no tags set then clear the tag in the 489 * next-to-leaf node, etc. 490 * 491 * Returns the address of the tagged item on success, else NULL. ie: 492 * has the same return value and semantics as radix_tree_lookup(). 493 */ 494 void *radix_tree_tag_clear(struct radix_tree_root *root, 495 unsigned long index, unsigned int tag) 496 { 497 /* 498 * The radix tree path needs to be one longer than the maximum path 499 * since the "list" is null terminated. 500 */ 501 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 502 struct radix_tree_node *slot = NULL; 503 unsigned int height, shift; 504 505 height = root->height; 506 if (index > radix_tree_maxindex(height)) 507 goto out; 508 509 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 510 pathp->node = NULL; 511 slot = radix_tree_indirect_to_ptr(root->rnode); 512 513 while (height > 0) { 514 int offset; 515 516 if (slot == NULL) 517 goto out; 518 519 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 520 pathp[1].offset = offset; 521 pathp[1].node = slot; 522 slot = slot->slots[offset]; 523 pathp++; 524 shift -= RADIX_TREE_MAP_SHIFT; 525 height--; 526 } 527 528 if (slot == NULL) 529 goto out; 530 531 while (pathp->node) { 532 if (!tag_get(pathp->node, tag, pathp->offset)) 533 goto out; 534 tag_clear(pathp->node, tag, pathp->offset); 535 if (any_tag_set(pathp->node, tag)) 536 goto out; 537 pathp--; 538 } 539 540 /* clear the root's tag bit */ 541 if (root_tag_get(root, tag)) 542 root_tag_clear(root, tag); 543 544 out: 545 return slot; 546 } 547 EXPORT_SYMBOL(radix_tree_tag_clear); 548 549 /** 550 * radix_tree_tag_get - get a tag on a radix tree node 551 * @root: radix tree root 552 * @index: index key 553 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 554 * 555 * Return values: 556 * 557 * 0: tag not present or not set 558 * 1: tag set 559 */ 560 int radix_tree_tag_get(struct radix_tree_root *root, 561 unsigned long index, unsigned int tag) 562 { 563 unsigned int height, shift; 564 struct radix_tree_node *node; 565 int saw_unset_tag = 0; 566 567 /* check the root's tag bit */ 568 if (!root_tag_get(root, tag)) 569 return 0; 570 571 node = rcu_dereference(root->rnode); 572 if (node == NULL) 573 return 0; 574 575 if (!radix_tree_is_indirect_ptr(node)) 576 return (index == 0); 577 node = radix_tree_indirect_to_ptr(node); 578 579 height = node->height; 580 if (index > radix_tree_maxindex(height)) 581 return 0; 582 583 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 584 585 for ( ; ; ) { 586 int offset; 587 588 if (node == NULL) 589 return 0; 590 591 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 592 593 /* 594 * This is just a debug check. Later, we can bale as soon as 595 * we see an unset tag. 596 */ 597 if (!tag_get(node, tag, offset)) 598 saw_unset_tag = 1; 599 if (height == 1) { 600 int ret = tag_get(node, tag, offset); 601 602 BUG_ON(ret && saw_unset_tag); 603 return !!ret; 604 } 605 node = rcu_dereference(node->slots[offset]); 606 shift -= RADIX_TREE_MAP_SHIFT; 607 height--; 608 } 609 } 610 EXPORT_SYMBOL(radix_tree_tag_get); 611 612 /** 613 * radix_tree_next_hole - find the next hole (not-present entry) 614 * @root: tree root 615 * @index: index key 616 * @max_scan: maximum range to search 617 * 618 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 619 * indexed hole. 620 * 621 * Returns: the index of the hole if found, otherwise returns an index 622 * outside of the set specified (in which case 'return - index >= max_scan' 623 * will be true). In rare cases of index wrap-around, 0 will be returned. 624 * 625 * radix_tree_next_hole may be called under rcu_read_lock. However, like 626 * radix_tree_gang_lookup, this will not atomically search a snapshot of 627 * the tree at a single point in time. For example, if a hole is created 628 * at index 5, then subsequently a hole is created at index 10, 629 * radix_tree_next_hole covering both indexes may return 10 if called 630 * under rcu_read_lock. 631 */ 632 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 633 unsigned long index, unsigned long max_scan) 634 { 635 unsigned long i; 636 637 for (i = 0; i < max_scan; i++) { 638 if (!radix_tree_lookup(root, index)) 639 break; 640 index++; 641 if (index == 0) 642 break; 643 } 644 645 return index; 646 } 647 EXPORT_SYMBOL(radix_tree_next_hole); 648 649 /** 650 * radix_tree_prev_hole - find the prev hole (not-present entry) 651 * @root: tree root 652 * @index: index key 653 * @max_scan: maximum range to search 654 * 655 * Search backwards in the range [max(index-max_scan+1, 0), index] 656 * for the first hole. 657 * 658 * Returns: the index of the hole if found, otherwise returns an index 659 * outside of the set specified (in which case 'index - return >= max_scan' 660 * will be true). In rare cases of wrap-around, LONG_MAX will be returned. 661 * 662 * radix_tree_next_hole may be called under rcu_read_lock. However, like 663 * radix_tree_gang_lookup, this will not atomically search a snapshot of 664 * the tree at a single point in time. For example, if a hole is created 665 * at index 10, then subsequently a hole is created at index 5, 666 * radix_tree_prev_hole covering both indexes may return 5 if called under 667 * rcu_read_lock. 668 */ 669 unsigned long radix_tree_prev_hole(struct radix_tree_root *root, 670 unsigned long index, unsigned long max_scan) 671 { 672 unsigned long i; 673 674 for (i = 0; i < max_scan; i++) { 675 if (!radix_tree_lookup(root, index)) 676 break; 677 index--; 678 if (index == LONG_MAX) 679 break; 680 } 681 682 return index; 683 } 684 EXPORT_SYMBOL(radix_tree_prev_hole); 685 686 static unsigned int 687 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 688 unsigned int max_items, unsigned long *next_index) 689 { 690 unsigned int nr_found = 0; 691 unsigned int shift, height; 692 unsigned long i; 693 694 height = slot->height; 695 if (height == 0) 696 goto out; 697 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 698 699 for ( ; height > 1; height--) { 700 i = (index >> shift) & RADIX_TREE_MAP_MASK; 701 for (;;) { 702 if (slot->slots[i] != NULL) 703 break; 704 index &= ~((1UL << shift) - 1); 705 index += 1UL << shift; 706 if (index == 0) 707 goto out; /* 32-bit wraparound */ 708 i++; 709 if (i == RADIX_TREE_MAP_SIZE) 710 goto out; 711 } 712 713 shift -= RADIX_TREE_MAP_SHIFT; 714 slot = rcu_dereference(slot->slots[i]); 715 if (slot == NULL) 716 goto out; 717 } 718 719 /* Bottom level: grab some items */ 720 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 721 index++; 722 if (slot->slots[i]) { 723 results[nr_found++] = &(slot->slots[i]); 724 if (nr_found == max_items) 725 goto out; 726 } 727 } 728 out: 729 *next_index = index; 730 return nr_found; 731 } 732 733 /** 734 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 735 * @root: radix tree root 736 * @results: where the results of the lookup are placed 737 * @first_index: start the lookup from this key 738 * @max_items: place up to this many items at *results 739 * 740 * Performs an index-ascending scan of the tree for present items. Places 741 * them at *@results and returns the number of items which were placed at 742 * *@results. 743 * 744 * The implementation is naive. 745 * 746 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 747 * rcu_read_lock. In this case, rather than the returned results being 748 * an atomic snapshot of the tree at a single point in time, the semantics 749 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 750 * have been issued in individual locks, and results stored in 'results'. 751 */ 752 unsigned int 753 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 754 unsigned long first_index, unsigned int max_items) 755 { 756 unsigned long max_index; 757 struct radix_tree_node *node; 758 unsigned long cur_index = first_index; 759 unsigned int ret; 760 761 node = rcu_dereference(root->rnode); 762 if (!node) 763 return 0; 764 765 if (!radix_tree_is_indirect_ptr(node)) { 766 if (first_index > 0) 767 return 0; 768 results[0] = node; 769 return 1; 770 } 771 node = radix_tree_indirect_to_ptr(node); 772 773 max_index = radix_tree_maxindex(node->height); 774 775 ret = 0; 776 while (ret < max_items) { 777 unsigned int nr_found, slots_found, i; 778 unsigned long next_index; /* Index of next search */ 779 780 if (cur_index > max_index) 781 break; 782 slots_found = __lookup(node, (void ***)results + ret, cur_index, 783 max_items - ret, &next_index); 784 nr_found = 0; 785 for (i = 0; i < slots_found; i++) { 786 struct radix_tree_node *slot; 787 slot = *(((void ***)results)[ret + i]); 788 if (!slot) 789 continue; 790 results[ret + nr_found] = rcu_dereference(slot); 791 nr_found++; 792 } 793 ret += nr_found; 794 if (next_index == 0) 795 break; 796 cur_index = next_index; 797 } 798 799 return ret; 800 } 801 EXPORT_SYMBOL(radix_tree_gang_lookup); 802 803 /** 804 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 805 * @root: radix tree root 806 * @results: where the results of the lookup are placed 807 * @first_index: start the lookup from this key 808 * @max_items: place up to this many items at *results 809 * 810 * Performs an index-ascending scan of the tree for present items. Places 811 * their slots at *@results and returns the number of items which were 812 * placed at *@results. 813 * 814 * The implementation is naive. 815 * 816 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 817 * be dereferenced with radix_tree_deref_slot, and if using only RCU 818 * protection, radix_tree_deref_slot may fail requiring a retry. 819 */ 820 unsigned int 821 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 822 unsigned long first_index, unsigned int max_items) 823 { 824 unsigned long max_index; 825 struct radix_tree_node *node; 826 unsigned long cur_index = first_index; 827 unsigned int ret; 828 829 node = rcu_dereference(root->rnode); 830 if (!node) 831 return 0; 832 833 if (!radix_tree_is_indirect_ptr(node)) { 834 if (first_index > 0) 835 return 0; 836 results[0] = (void **)&root->rnode; 837 return 1; 838 } 839 node = radix_tree_indirect_to_ptr(node); 840 841 max_index = radix_tree_maxindex(node->height); 842 843 ret = 0; 844 while (ret < max_items) { 845 unsigned int slots_found; 846 unsigned long next_index; /* Index of next search */ 847 848 if (cur_index > max_index) 849 break; 850 slots_found = __lookup(node, results + ret, cur_index, 851 max_items - ret, &next_index); 852 ret += slots_found; 853 if (next_index == 0) 854 break; 855 cur_index = next_index; 856 } 857 858 return ret; 859 } 860 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 861 862 /* 863 * FIXME: the two tag_get()s here should use find_next_bit() instead of 864 * open-coding the search. 865 */ 866 static unsigned int 867 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 868 unsigned int max_items, unsigned long *next_index, unsigned int tag) 869 { 870 unsigned int nr_found = 0; 871 unsigned int shift, height; 872 873 height = slot->height; 874 if (height == 0) 875 goto out; 876 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 877 878 while (height > 0) { 879 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 880 881 for (;;) { 882 if (tag_get(slot, tag, i)) 883 break; 884 index &= ~((1UL << shift) - 1); 885 index += 1UL << shift; 886 if (index == 0) 887 goto out; /* 32-bit wraparound */ 888 i++; 889 if (i == RADIX_TREE_MAP_SIZE) 890 goto out; 891 } 892 height--; 893 if (height == 0) { /* Bottom level: grab some items */ 894 unsigned long j = index & RADIX_TREE_MAP_MASK; 895 896 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 897 index++; 898 if (!tag_get(slot, tag, j)) 899 continue; 900 /* 901 * Even though the tag was found set, we need to 902 * recheck that we have a non-NULL node, because 903 * if this lookup is lockless, it may have been 904 * subsequently deleted. 905 * 906 * Similar care must be taken in any place that 907 * lookup ->slots[x] without a lock (ie. can't 908 * rely on its value remaining the same). 909 */ 910 if (slot->slots[j]) { 911 results[nr_found++] = &(slot->slots[j]); 912 if (nr_found == max_items) 913 goto out; 914 } 915 } 916 } 917 shift -= RADIX_TREE_MAP_SHIFT; 918 slot = rcu_dereference(slot->slots[i]); 919 if (slot == NULL) 920 break; 921 } 922 out: 923 *next_index = index; 924 return nr_found; 925 } 926 927 /** 928 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 929 * based on a tag 930 * @root: radix tree root 931 * @results: where the results of the lookup are placed 932 * @first_index: start the lookup from this key 933 * @max_items: place up to this many items at *results 934 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 935 * 936 * Performs an index-ascending scan of the tree for present items which 937 * have the tag indexed by @tag set. Places the items at *@results and 938 * returns the number of items which were placed at *@results. 939 */ 940 unsigned int 941 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 942 unsigned long first_index, unsigned int max_items, 943 unsigned int tag) 944 { 945 struct radix_tree_node *node; 946 unsigned long max_index; 947 unsigned long cur_index = first_index; 948 unsigned int ret; 949 950 /* check the root's tag bit */ 951 if (!root_tag_get(root, tag)) 952 return 0; 953 954 node = rcu_dereference(root->rnode); 955 if (!node) 956 return 0; 957 958 if (!radix_tree_is_indirect_ptr(node)) { 959 if (first_index > 0) 960 return 0; 961 results[0] = node; 962 return 1; 963 } 964 node = radix_tree_indirect_to_ptr(node); 965 966 max_index = radix_tree_maxindex(node->height); 967 968 ret = 0; 969 while (ret < max_items) { 970 unsigned int nr_found, slots_found, i; 971 unsigned long next_index; /* Index of next search */ 972 973 if (cur_index > max_index) 974 break; 975 slots_found = __lookup_tag(node, (void ***)results + ret, 976 cur_index, max_items - ret, &next_index, tag); 977 nr_found = 0; 978 for (i = 0; i < slots_found; i++) { 979 struct radix_tree_node *slot; 980 slot = *(((void ***)results)[ret + i]); 981 if (!slot) 982 continue; 983 results[ret + nr_found] = rcu_dereference(slot); 984 nr_found++; 985 } 986 ret += nr_found; 987 if (next_index == 0) 988 break; 989 cur_index = next_index; 990 } 991 992 return ret; 993 } 994 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 995 996 /** 997 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 998 * radix tree based on a tag 999 * @root: radix tree root 1000 * @results: where the results of the lookup are placed 1001 * @first_index: start the lookup from this key 1002 * @max_items: place up to this many items at *results 1003 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1004 * 1005 * Performs an index-ascending scan of the tree for present items which 1006 * have the tag indexed by @tag set. Places the slots at *@results and 1007 * returns the number of slots which were placed at *@results. 1008 */ 1009 unsigned int 1010 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 1011 unsigned long first_index, unsigned int max_items, 1012 unsigned int tag) 1013 { 1014 struct radix_tree_node *node; 1015 unsigned long max_index; 1016 unsigned long cur_index = first_index; 1017 unsigned int ret; 1018 1019 /* check the root's tag bit */ 1020 if (!root_tag_get(root, tag)) 1021 return 0; 1022 1023 node = rcu_dereference(root->rnode); 1024 if (!node) 1025 return 0; 1026 1027 if (!radix_tree_is_indirect_ptr(node)) { 1028 if (first_index > 0) 1029 return 0; 1030 results[0] = (void **)&root->rnode; 1031 return 1; 1032 } 1033 node = radix_tree_indirect_to_ptr(node); 1034 1035 max_index = radix_tree_maxindex(node->height); 1036 1037 ret = 0; 1038 while (ret < max_items) { 1039 unsigned int slots_found; 1040 unsigned long next_index; /* Index of next search */ 1041 1042 if (cur_index > max_index) 1043 break; 1044 slots_found = __lookup_tag(node, results + ret, 1045 cur_index, max_items - ret, &next_index, tag); 1046 ret += slots_found; 1047 if (next_index == 0) 1048 break; 1049 cur_index = next_index; 1050 } 1051 1052 return ret; 1053 } 1054 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1055 1056 1057 /** 1058 * radix_tree_shrink - shrink height of a radix tree to minimal 1059 * @root radix tree root 1060 */ 1061 static inline void radix_tree_shrink(struct radix_tree_root *root) 1062 { 1063 /* try to shrink tree height */ 1064 while (root->height > 0) { 1065 struct radix_tree_node *to_free = root->rnode; 1066 void *newptr; 1067 1068 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1069 to_free = radix_tree_indirect_to_ptr(to_free); 1070 1071 /* 1072 * The candidate node has more than one child, or its child 1073 * is not at the leftmost slot, we cannot shrink. 1074 */ 1075 if (to_free->count != 1) 1076 break; 1077 if (!to_free->slots[0]) 1078 break; 1079 1080 /* 1081 * We don't need rcu_assign_pointer(), since we are simply 1082 * moving the node from one part of the tree to another. If 1083 * it was safe to dereference the old pointer to it 1084 * (to_free->slots[0]), it will be safe to dereference the new 1085 * one (root->rnode). 1086 */ 1087 newptr = to_free->slots[0]; 1088 if (root->height > 1) 1089 newptr = radix_tree_ptr_to_indirect(newptr); 1090 root->rnode = newptr; 1091 root->height--; 1092 radix_tree_node_free(to_free); 1093 } 1094 } 1095 1096 /** 1097 * radix_tree_delete - delete an item from a radix tree 1098 * @root: radix tree root 1099 * @index: index key 1100 * 1101 * Remove the item at @index from the radix tree rooted at @root. 1102 * 1103 * Returns the address of the deleted item, or NULL if it was not present. 1104 */ 1105 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1106 { 1107 /* 1108 * The radix tree path needs to be one longer than the maximum path 1109 * since the "list" is null terminated. 1110 */ 1111 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 1112 struct radix_tree_node *slot = NULL; 1113 struct radix_tree_node *to_free; 1114 unsigned int height, shift; 1115 int tag; 1116 int offset; 1117 1118 height = root->height; 1119 if (index > radix_tree_maxindex(height)) 1120 goto out; 1121 1122 slot = root->rnode; 1123 if (height == 0) { 1124 root_tag_clear_all(root); 1125 root->rnode = NULL; 1126 goto out; 1127 } 1128 slot = radix_tree_indirect_to_ptr(slot); 1129 1130 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1131 pathp->node = NULL; 1132 1133 do { 1134 if (slot == NULL) 1135 goto out; 1136 1137 pathp++; 1138 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1139 pathp->offset = offset; 1140 pathp->node = slot; 1141 slot = slot->slots[offset]; 1142 shift -= RADIX_TREE_MAP_SHIFT; 1143 height--; 1144 } while (height > 0); 1145 1146 if (slot == NULL) 1147 goto out; 1148 1149 /* 1150 * Clear all tags associated with the just-deleted item 1151 */ 1152 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1153 if (tag_get(pathp->node, tag, pathp->offset)) 1154 radix_tree_tag_clear(root, index, tag); 1155 } 1156 1157 to_free = NULL; 1158 /* Now free the nodes we do not need anymore */ 1159 while (pathp->node) { 1160 pathp->node->slots[pathp->offset] = NULL; 1161 pathp->node->count--; 1162 /* 1163 * Queue the node for deferred freeing after the 1164 * last reference to it disappears (set NULL, above). 1165 */ 1166 if (to_free) 1167 radix_tree_node_free(to_free); 1168 1169 if (pathp->node->count) { 1170 if (pathp->node == 1171 radix_tree_indirect_to_ptr(root->rnode)) 1172 radix_tree_shrink(root); 1173 goto out; 1174 } 1175 1176 /* Node with zero slots in use so free it */ 1177 to_free = pathp->node; 1178 pathp--; 1179 1180 } 1181 root_tag_clear_all(root); 1182 root->height = 0; 1183 root->rnode = NULL; 1184 if (to_free) 1185 radix_tree_node_free(to_free); 1186 1187 out: 1188 return slot; 1189 } 1190 EXPORT_SYMBOL(radix_tree_delete); 1191 1192 /** 1193 * radix_tree_tagged - test whether any items in the tree are tagged 1194 * @root: radix tree root 1195 * @tag: tag to test 1196 */ 1197 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1198 { 1199 return root_tag_get(root, tag); 1200 } 1201 EXPORT_SYMBOL(radix_tree_tagged); 1202 1203 static void 1204 radix_tree_node_ctor(void *node) 1205 { 1206 memset(node, 0, sizeof(struct radix_tree_node)); 1207 } 1208 1209 static __init unsigned long __maxindex(unsigned int height) 1210 { 1211 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1212 int shift = RADIX_TREE_INDEX_BITS - width; 1213 1214 if (shift < 0) 1215 return ~0UL; 1216 if (shift >= BITS_PER_LONG) 1217 return 0UL; 1218 return ~0UL >> shift; 1219 } 1220 1221 static __init void radix_tree_init_maxindex(void) 1222 { 1223 unsigned int i; 1224 1225 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1226 height_to_maxindex[i] = __maxindex(i); 1227 } 1228 1229 static int radix_tree_callback(struct notifier_block *nfb, 1230 unsigned long action, 1231 void *hcpu) 1232 { 1233 int cpu = (long)hcpu; 1234 struct radix_tree_preload *rtp; 1235 1236 /* Free per-cpu pool of perloaded nodes */ 1237 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1238 rtp = &per_cpu(radix_tree_preloads, cpu); 1239 while (rtp->nr) { 1240 kmem_cache_free(radix_tree_node_cachep, 1241 rtp->nodes[rtp->nr-1]); 1242 rtp->nodes[rtp->nr-1] = NULL; 1243 rtp->nr--; 1244 } 1245 } 1246 return NOTIFY_OK; 1247 } 1248 1249 void __init radix_tree_init(void) 1250 { 1251 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1252 sizeof(struct radix_tree_node), 0, 1253 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1254 radix_tree_node_ctor); 1255 radix_tree_init_maxindex(); 1256 hotcpu_notifier(radix_tree_callback, 0); 1257 } 1258