1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 92 int offset) 93 { 94 __set_bit(offset, node->tags[tag]); 95 } 96 97 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 98 int offset) 99 { 100 __clear_bit(offset, node->tags[tag]); 101 } 102 103 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 104 int offset) 105 { 106 return test_bit(offset, node->tags[tag]); 107 } 108 109 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 110 { 111 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 112 } 113 114 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 115 { 116 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 117 } 118 119 static inline void root_tag_clear_all(struct radix_tree_root *root) 120 { 121 root->gfp_mask &= __GFP_BITS_MASK; 122 } 123 124 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 125 { 126 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 127 } 128 129 /* 130 * Returns 1 if any slot in the node has this tag set. 131 * Otherwise returns 0. 132 */ 133 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 134 { 135 int idx; 136 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 137 if (node->tags[tag][idx]) 138 return 1; 139 } 140 return 0; 141 } 142 /* 143 * This assumes that the caller has performed appropriate preallocation, and 144 * that the caller has pinned this thread of control to the current CPU. 145 */ 146 static struct radix_tree_node * 147 radix_tree_node_alloc(struct radix_tree_root *root) 148 { 149 struct radix_tree_node *ret = NULL; 150 gfp_t gfp_mask = root_gfp_mask(root); 151 152 if (!(gfp_mask & __GFP_WAIT)) { 153 struct radix_tree_preload *rtp; 154 155 /* 156 * Provided the caller has preloaded here, we will always 157 * succeed in getting a node here (and never reach 158 * kmem_cache_alloc) 159 */ 160 rtp = &__get_cpu_var(radix_tree_preloads); 161 if (rtp->nr) { 162 ret = rtp->nodes[rtp->nr - 1]; 163 rtp->nodes[rtp->nr - 1] = NULL; 164 rtp->nr--; 165 } 166 } 167 if (ret == NULL) 168 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 169 170 BUG_ON(radix_tree_is_indirect_ptr(ret)); 171 return ret; 172 } 173 174 static void radix_tree_node_rcu_free(struct rcu_head *head) 175 { 176 struct radix_tree_node *node = 177 container_of(head, struct radix_tree_node, rcu_head); 178 179 /* 180 * must only free zeroed nodes into the slab. radix_tree_shrink 181 * can leave us with a non-NULL entry in the first slot, so clear 182 * that here to make sure. 183 */ 184 tag_clear(node, 0, 0); 185 tag_clear(node, 1, 0); 186 node->slots[0] = NULL; 187 node->count = 0; 188 189 kmem_cache_free(radix_tree_node_cachep, node); 190 } 191 192 static inline void 193 radix_tree_node_free(struct radix_tree_node *node) 194 { 195 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 196 } 197 198 /* 199 * Load up this CPU's radix_tree_node buffer with sufficient objects to 200 * ensure that the addition of a single element in the tree cannot fail. On 201 * success, return zero, with preemption disabled. On error, return -ENOMEM 202 * with preemption not disabled. 203 */ 204 int radix_tree_preload(gfp_t gfp_mask) 205 { 206 struct radix_tree_preload *rtp; 207 struct radix_tree_node *node; 208 int ret = -ENOMEM; 209 210 preempt_disable(); 211 rtp = &__get_cpu_var(radix_tree_preloads); 212 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 213 preempt_enable(); 214 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 215 if (node == NULL) 216 goto out; 217 preempt_disable(); 218 rtp = &__get_cpu_var(radix_tree_preloads); 219 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 220 rtp->nodes[rtp->nr++] = node; 221 else 222 kmem_cache_free(radix_tree_node_cachep, node); 223 } 224 ret = 0; 225 out: 226 return ret; 227 } 228 EXPORT_SYMBOL(radix_tree_preload); 229 230 /* 231 * Return the maximum key which can be store into a 232 * radix tree with height HEIGHT. 233 */ 234 static inline unsigned long radix_tree_maxindex(unsigned int height) 235 { 236 return height_to_maxindex[height]; 237 } 238 239 /* 240 * Extend a radix tree so it can store key @index. 241 */ 242 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 243 { 244 struct radix_tree_node *node; 245 unsigned int height; 246 int tag; 247 248 /* Figure out what the height should be. */ 249 height = root->height + 1; 250 while (index > radix_tree_maxindex(height)) 251 height++; 252 253 if (root->rnode == NULL) { 254 root->height = height; 255 goto out; 256 } 257 258 do { 259 unsigned int newheight; 260 if (!(node = radix_tree_node_alloc(root))) 261 return -ENOMEM; 262 263 /* Increase the height. */ 264 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 265 266 /* Propagate the aggregated tag info into the new root */ 267 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 268 if (root_tag_get(root, tag)) 269 tag_set(node, tag, 0); 270 } 271 272 newheight = root->height+1; 273 node->height = newheight; 274 node->count = 1; 275 node = radix_tree_ptr_to_indirect(node); 276 rcu_assign_pointer(root->rnode, node); 277 root->height = newheight; 278 } while (height > root->height); 279 out: 280 return 0; 281 } 282 283 /** 284 * radix_tree_insert - insert into a radix tree 285 * @root: radix tree root 286 * @index: index key 287 * @item: item to insert 288 * 289 * Insert an item into the radix tree at position @index. 290 */ 291 int radix_tree_insert(struct radix_tree_root *root, 292 unsigned long index, void *item) 293 { 294 struct radix_tree_node *node = NULL, *slot; 295 unsigned int height, shift; 296 int offset; 297 int error; 298 299 BUG_ON(radix_tree_is_indirect_ptr(item)); 300 301 /* Make sure the tree is high enough. */ 302 if (index > radix_tree_maxindex(root->height)) { 303 error = radix_tree_extend(root, index); 304 if (error) 305 return error; 306 } 307 308 slot = radix_tree_indirect_to_ptr(root->rnode); 309 310 height = root->height; 311 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 312 313 offset = 0; /* uninitialised var warning */ 314 while (height > 0) { 315 if (slot == NULL) { 316 /* Have to add a child node. */ 317 if (!(slot = radix_tree_node_alloc(root))) 318 return -ENOMEM; 319 slot->height = height; 320 if (node) { 321 rcu_assign_pointer(node->slots[offset], slot); 322 node->count++; 323 } else 324 rcu_assign_pointer(root->rnode, 325 radix_tree_ptr_to_indirect(slot)); 326 } 327 328 /* Go a level down */ 329 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 330 node = slot; 331 slot = node->slots[offset]; 332 shift -= RADIX_TREE_MAP_SHIFT; 333 height--; 334 } 335 336 if (slot != NULL) 337 return -EEXIST; 338 339 if (node) { 340 node->count++; 341 rcu_assign_pointer(node->slots[offset], item); 342 BUG_ON(tag_get(node, 0, offset)); 343 BUG_ON(tag_get(node, 1, offset)); 344 } else { 345 rcu_assign_pointer(root->rnode, item); 346 BUG_ON(root_tag_get(root, 0)); 347 BUG_ON(root_tag_get(root, 1)); 348 } 349 350 return 0; 351 } 352 EXPORT_SYMBOL(radix_tree_insert); 353 354 /* 355 * is_slot == 1 : search for the slot. 356 * is_slot == 0 : search for the node. 357 */ 358 static void *radix_tree_lookup_element(struct radix_tree_root *root, 359 unsigned long index, int is_slot) 360 { 361 unsigned int height, shift; 362 struct radix_tree_node *node, **slot; 363 364 node = rcu_dereference(root->rnode); 365 if (node == NULL) 366 return NULL; 367 368 if (!radix_tree_is_indirect_ptr(node)) { 369 if (index > 0) 370 return NULL; 371 return is_slot ? (void *)&root->rnode : node; 372 } 373 node = radix_tree_indirect_to_ptr(node); 374 375 height = node->height; 376 if (index > radix_tree_maxindex(height)) 377 return NULL; 378 379 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 380 381 do { 382 slot = (struct radix_tree_node **) 383 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 384 node = rcu_dereference(*slot); 385 if (node == NULL) 386 return NULL; 387 388 shift -= RADIX_TREE_MAP_SHIFT; 389 height--; 390 } while (height > 0); 391 392 return is_slot ? (void *)slot:node; 393 } 394 395 /** 396 * radix_tree_lookup_slot - lookup a slot in a radix tree 397 * @root: radix tree root 398 * @index: index key 399 * 400 * Returns: the slot corresponding to the position @index in the 401 * radix tree @root. This is useful for update-if-exists operations. 402 * 403 * This function can be called under rcu_read_lock iff the slot is not 404 * modified by radix_tree_replace_slot, otherwise it must be called 405 * exclusive from other writers. Any dereference of the slot must be done 406 * using radix_tree_deref_slot. 407 */ 408 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 409 { 410 return (void **)radix_tree_lookup_element(root, index, 1); 411 } 412 EXPORT_SYMBOL(radix_tree_lookup_slot); 413 414 /** 415 * radix_tree_lookup - perform lookup operation on a radix tree 416 * @root: radix tree root 417 * @index: index key 418 * 419 * Lookup the item at the position @index in the radix tree @root. 420 * 421 * This function can be called under rcu_read_lock, however the caller 422 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 423 * them safely). No RCU barriers are required to access or modify the 424 * returned item, however. 425 */ 426 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 427 { 428 return radix_tree_lookup_element(root, index, 0); 429 } 430 EXPORT_SYMBOL(radix_tree_lookup); 431 432 /** 433 * radix_tree_tag_set - set a tag on a radix tree node 434 * @root: radix tree root 435 * @index: index key 436 * @tag: tag index 437 * 438 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 439 * corresponding to @index in the radix tree. From 440 * the root all the way down to the leaf node. 441 * 442 * Returns the address of the tagged item. Setting a tag on a not-present 443 * item is a bug. 444 */ 445 void *radix_tree_tag_set(struct radix_tree_root *root, 446 unsigned long index, unsigned int tag) 447 { 448 unsigned int height, shift; 449 struct radix_tree_node *slot; 450 451 height = root->height; 452 BUG_ON(index > radix_tree_maxindex(height)); 453 454 slot = radix_tree_indirect_to_ptr(root->rnode); 455 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 456 457 while (height > 0) { 458 int offset; 459 460 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 461 if (!tag_get(slot, tag, offset)) 462 tag_set(slot, tag, offset); 463 slot = slot->slots[offset]; 464 BUG_ON(slot == NULL); 465 shift -= RADIX_TREE_MAP_SHIFT; 466 height--; 467 } 468 469 /* set the root's tag bit */ 470 if (slot && !root_tag_get(root, tag)) 471 root_tag_set(root, tag); 472 473 return slot; 474 } 475 EXPORT_SYMBOL(radix_tree_tag_set); 476 477 /** 478 * radix_tree_tag_clear - clear a tag on a radix tree node 479 * @root: radix tree root 480 * @index: index key 481 * @tag: tag index 482 * 483 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 484 * corresponding to @index in the radix tree. If 485 * this causes the leaf node to have no tags set then clear the tag in the 486 * next-to-leaf node, etc. 487 * 488 * Returns the address of the tagged item on success, else NULL. ie: 489 * has the same return value and semantics as radix_tree_lookup(). 490 */ 491 void *radix_tree_tag_clear(struct radix_tree_root *root, 492 unsigned long index, unsigned int tag) 493 { 494 /* 495 * The radix tree path needs to be one longer than the maximum path 496 * since the "list" is null terminated. 497 */ 498 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 499 struct radix_tree_node *slot = NULL; 500 unsigned int height, shift; 501 502 height = root->height; 503 if (index > radix_tree_maxindex(height)) 504 goto out; 505 506 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 507 pathp->node = NULL; 508 slot = radix_tree_indirect_to_ptr(root->rnode); 509 510 while (height > 0) { 511 int offset; 512 513 if (slot == NULL) 514 goto out; 515 516 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 517 pathp[1].offset = offset; 518 pathp[1].node = slot; 519 slot = slot->slots[offset]; 520 pathp++; 521 shift -= RADIX_TREE_MAP_SHIFT; 522 height--; 523 } 524 525 if (slot == NULL) 526 goto out; 527 528 while (pathp->node) { 529 if (!tag_get(pathp->node, tag, pathp->offset)) 530 goto out; 531 tag_clear(pathp->node, tag, pathp->offset); 532 if (any_tag_set(pathp->node, tag)) 533 goto out; 534 pathp--; 535 } 536 537 /* clear the root's tag bit */ 538 if (root_tag_get(root, tag)) 539 root_tag_clear(root, tag); 540 541 out: 542 return slot; 543 } 544 EXPORT_SYMBOL(radix_tree_tag_clear); 545 546 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 547 /** 548 * radix_tree_tag_get - get a tag on a radix tree node 549 * @root: radix tree root 550 * @index: index key 551 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 552 * 553 * Return values: 554 * 555 * 0: tag not present or not set 556 * 1: tag set 557 */ 558 int radix_tree_tag_get(struct radix_tree_root *root, 559 unsigned long index, unsigned int tag) 560 { 561 unsigned int height, shift; 562 struct radix_tree_node *node; 563 int saw_unset_tag = 0; 564 565 /* check the root's tag bit */ 566 if (!root_tag_get(root, tag)) 567 return 0; 568 569 node = rcu_dereference(root->rnode); 570 if (node == NULL) 571 return 0; 572 573 if (!radix_tree_is_indirect_ptr(node)) 574 return (index == 0); 575 node = radix_tree_indirect_to_ptr(node); 576 577 height = node->height; 578 if (index > radix_tree_maxindex(height)) 579 return 0; 580 581 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 582 583 for ( ; ; ) { 584 int offset; 585 586 if (node == NULL) 587 return 0; 588 589 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 590 591 /* 592 * This is just a debug check. Later, we can bale as soon as 593 * we see an unset tag. 594 */ 595 if (!tag_get(node, tag, offset)) 596 saw_unset_tag = 1; 597 if (height == 1) { 598 int ret = tag_get(node, tag, offset); 599 600 BUG_ON(ret && saw_unset_tag); 601 return !!ret; 602 } 603 node = rcu_dereference(node->slots[offset]); 604 shift -= RADIX_TREE_MAP_SHIFT; 605 height--; 606 } 607 } 608 EXPORT_SYMBOL(radix_tree_tag_get); 609 #endif 610 611 /** 612 * radix_tree_next_hole - find the next hole (not-present entry) 613 * @root: tree root 614 * @index: index key 615 * @max_scan: maximum range to search 616 * 617 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 618 * indexed hole. 619 * 620 * Returns: the index of the hole if found, otherwise returns an index 621 * outside of the set specified (in which case 'return - index >= max_scan' 622 * will be true). In rare cases of index wrap-around, 0 will be returned. 623 * 624 * radix_tree_next_hole may be called under rcu_read_lock. However, like 625 * radix_tree_gang_lookup, this will not atomically search a snapshot of 626 * the tree at a single point in time. For example, if a hole is created 627 * at index 5, then subsequently a hole is created at index 10, 628 * radix_tree_next_hole covering both indexes may return 10 if called 629 * under rcu_read_lock. 630 */ 631 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 632 unsigned long index, unsigned long max_scan) 633 { 634 unsigned long i; 635 636 for (i = 0; i < max_scan; i++) { 637 if (!radix_tree_lookup(root, index)) 638 break; 639 index++; 640 if (index == 0) 641 break; 642 } 643 644 return index; 645 } 646 EXPORT_SYMBOL(radix_tree_next_hole); 647 648 /** 649 * radix_tree_prev_hole - find the prev hole (not-present entry) 650 * @root: tree root 651 * @index: index key 652 * @max_scan: maximum range to search 653 * 654 * Search backwards in the range [max(index-max_scan+1, 0), index] 655 * for the first hole. 656 * 657 * Returns: the index of the hole if found, otherwise returns an index 658 * outside of the set specified (in which case 'index - return >= max_scan' 659 * will be true). In rare cases of wrap-around, LONG_MAX will be returned. 660 * 661 * radix_tree_next_hole may be called under rcu_read_lock. However, like 662 * radix_tree_gang_lookup, this will not atomically search a snapshot of 663 * the tree at a single point in time. For example, if a hole is created 664 * at index 10, then subsequently a hole is created at index 5, 665 * radix_tree_prev_hole covering both indexes may return 5 if called under 666 * rcu_read_lock. 667 */ 668 unsigned long radix_tree_prev_hole(struct radix_tree_root *root, 669 unsigned long index, unsigned long max_scan) 670 { 671 unsigned long i; 672 673 for (i = 0; i < max_scan; i++) { 674 if (!radix_tree_lookup(root, index)) 675 break; 676 index--; 677 if (index == LONG_MAX) 678 break; 679 } 680 681 return index; 682 } 683 EXPORT_SYMBOL(radix_tree_prev_hole); 684 685 static unsigned int 686 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 687 unsigned int max_items, unsigned long *next_index) 688 { 689 unsigned int nr_found = 0; 690 unsigned int shift, height; 691 unsigned long i; 692 693 height = slot->height; 694 if (height == 0) 695 goto out; 696 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 697 698 for ( ; height > 1; height--) { 699 i = (index >> shift) & RADIX_TREE_MAP_MASK; 700 for (;;) { 701 if (slot->slots[i] != NULL) 702 break; 703 index &= ~((1UL << shift) - 1); 704 index += 1UL << shift; 705 if (index == 0) 706 goto out; /* 32-bit wraparound */ 707 i++; 708 if (i == RADIX_TREE_MAP_SIZE) 709 goto out; 710 } 711 712 shift -= RADIX_TREE_MAP_SHIFT; 713 slot = rcu_dereference(slot->slots[i]); 714 if (slot == NULL) 715 goto out; 716 } 717 718 /* Bottom level: grab some items */ 719 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 720 index++; 721 if (slot->slots[i]) { 722 results[nr_found++] = &(slot->slots[i]); 723 if (nr_found == max_items) 724 goto out; 725 } 726 } 727 out: 728 *next_index = index; 729 return nr_found; 730 } 731 732 /** 733 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 734 * @root: radix tree root 735 * @results: where the results of the lookup are placed 736 * @first_index: start the lookup from this key 737 * @max_items: place up to this many items at *results 738 * 739 * Performs an index-ascending scan of the tree for present items. Places 740 * them at *@results and returns the number of items which were placed at 741 * *@results. 742 * 743 * The implementation is naive. 744 * 745 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 746 * rcu_read_lock. In this case, rather than the returned results being 747 * an atomic snapshot of the tree at a single point in time, the semantics 748 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 749 * have been issued in individual locks, and results stored in 'results'. 750 */ 751 unsigned int 752 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 753 unsigned long first_index, unsigned int max_items) 754 { 755 unsigned long max_index; 756 struct radix_tree_node *node; 757 unsigned long cur_index = first_index; 758 unsigned int ret; 759 760 node = rcu_dereference(root->rnode); 761 if (!node) 762 return 0; 763 764 if (!radix_tree_is_indirect_ptr(node)) { 765 if (first_index > 0) 766 return 0; 767 results[0] = node; 768 return 1; 769 } 770 node = radix_tree_indirect_to_ptr(node); 771 772 max_index = radix_tree_maxindex(node->height); 773 774 ret = 0; 775 while (ret < max_items) { 776 unsigned int nr_found, slots_found, i; 777 unsigned long next_index; /* Index of next search */ 778 779 if (cur_index > max_index) 780 break; 781 slots_found = __lookup(node, (void ***)results + ret, cur_index, 782 max_items - ret, &next_index); 783 nr_found = 0; 784 for (i = 0; i < slots_found; i++) { 785 struct radix_tree_node *slot; 786 slot = *(((void ***)results)[ret + i]); 787 if (!slot) 788 continue; 789 results[ret + nr_found] = rcu_dereference(slot); 790 nr_found++; 791 } 792 ret += nr_found; 793 if (next_index == 0) 794 break; 795 cur_index = next_index; 796 } 797 798 return ret; 799 } 800 EXPORT_SYMBOL(radix_tree_gang_lookup); 801 802 /** 803 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 804 * @root: radix tree root 805 * @results: where the results of the lookup are placed 806 * @first_index: start the lookup from this key 807 * @max_items: place up to this many items at *results 808 * 809 * Performs an index-ascending scan of the tree for present items. Places 810 * their slots at *@results and returns the number of items which were 811 * placed at *@results. 812 * 813 * The implementation is naive. 814 * 815 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 816 * be dereferenced with radix_tree_deref_slot, and if using only RCU 817 * protection, radix_tree_deref_slot may fail requiring a retry. 818 */ 819 unsigned int 820 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 821 unsigned long first_index, unsigned int max_items) 822 { 823 unsigned long max_index; 824 struct radix_tree_node *node; 825 unsigned long cur_index = first_index; 826 unsigned int ret; 827 828 node = rcu_dereference(root->rnode); 829 if (!node) 830 return 0; 831 832 if (!radix_tree_is_indirect_ptr(node)) { 833 if (first_index > 0) 834 return 0; 835 results[0] = (void **)&root->rnode; 836 return 1; 837 } 838 node = radix_tree_indirect_to_ptr(node); 839 840 max_index = radix_tree_maxindex(node->height); 841 842 ret = 0; 843 while (ret < max_items) { 844 unsigned int slots_found; 845 unsigned long next_index; /* Index of next search */ 846 847 if (cur_index > max_index) 848 break; 849 slots_found = __lookup(node, results + ret, cur_index, 850 max_items - ret, &next_index); 851 ret += slots_found; 852 if (next_index == 0) 853 break; 854 cur_index = next_index; 855 } 856 857 return ret; 858 } 859 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 860 861 /* 862 * FIXME: the two tag_get()s here should use find_next_bit() instead of 863 * open-coding the search. 864 */ 865 static unsigned int 866 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 867 unsigned int max_items, unsigned long *next_index, unsigned int tag) 868 { 869 unsigned int nr_found = 0; 870 unsigned int shift, height; 871 872 height = slot->height; 873 if (height == 0) 874 goto out; 875 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 876 877 while (height > 0) { 878 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 879 880 for (;;) { 881 if (tag_get(slot, tag, i)) 882 break; 883 index &= ~((1UL << shift) - 1); 884 index += 1UL << shift; 885 if (index == 0) 886 goto out; /* 32-bit wraparound */ 887 i++; 888 if (i == RADIX_TREE_MAP_SIZE) 889 goto out; 890 } 891 height--; 892 if (height == 0) { /* Bottom level: grab some items */ 893 unsigned long j = index & RADIX_TREE_MAP_MASK; 894 895 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 896 index++; 897 if (!tag_get(slot, tag, j)) 898 continue; 899 /* 900 * Even though the tag was found set, we need to 901 * recheck that we have a non-NULL node, because 902 * if this lookup is lockless, it may have been 903 * subsequently deleted. 904 * 905 * Similar care must be taken in any place that 906 * lookup ->slots[x] without a lock (ie. can't 907 * rely on its value remaining the same). 908 */ 909 if (slot->slots[j]) { 910 results[nr_found++] = &(slot->slots[j]); 911 if (nr_found == max_items) 912 goto out; 913 } 914 } 915 } 916 shift -= RADIX_TREE_MAP_SHIFT; 917 slot = rcu_dereference(slot->slots[i]); 918 if (slot == NULL) 919 break; 920 } 921 out: 922 *next_index = index; 923 return nr_found; 924 } 925 926 /** 927 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 928 * based on a tag 929 * @root: radix tree root 930 * @results: where the results of the lookup are placed 931 * @first_index: start the lookup from this key 932 * @max_items: place up to this many items at *results 933 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 934 * 935 * Performs an index-ascending scan of the tree for present items which 936 * have the tag indexed by @tag set. Places the items at *@results and 937 * returns the number of items which were placed at *@results. 938 */ 939 unsigned int 940 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 941 unsigned long first_index, unsigned int max_items, 942 unsigned int tag) 943 { 944 struct radix_tree_node *node; 945 unsigned long max_index; 946 unsigned long cur_index = first_index; 947 unsigned int ret; 948 949 /* check the root's tag bit */ 950 if (!root_tag_get(root, tag)) 951 return 0; 952 953 node = rcu_dereference(root->rnode); 954 if (!node) 955 return 0; 956 957 if (!radix_tree_is_indirect_ptr(node)) { 958 if (first_index > 0) 959 return 0; 960 results[0] = node; 961 return 1; 962 } 963 node = radix_tree_indirect_to_ptr(node); 964 965 max_index = radix_tree_maxindex(node->height); 966 967 ret = 0; 968 while (ret < max_items) { 969 unsigned int nr_found, slots_found, i; 970 unsigned long next_index; /* Index of next search */ 971 972 if (cur_index > max_index) 973 break; 974 slots_found = __lookup_tag(node, (void ***)results + ret, 975 cur_index, max_items - ret, &next_index, tag); 976 nr_found = 0; 977 for (i = 0; i < slots_found; i++) { 978 struct radix_tree_node *slot; 979 slot = *(((void ***)results)[ret + i]); 980 if (!slot) 981 continue; 982 results[ret + nr_found] = rcu_dereference(slot); 983 nr_found++; 984 } 985 ret += nr_found; 986 if (next_index == 0) 987 break; 988 cur_index = next_index; 989 } 990 991 return ret; 992 } 993 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 994 995 /** 996 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 997 * radix tree based on a tag 998 * @root: radix tree root 999 * @results: where the results of the lookup are placed 1000 * @first_index: start the lookup from this key 1001 * @max_items: place up to this many items at *results 1002 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1003 * 1004 * Performs an index-ascending scan of the tree for present items which 1005 * have the tag indexed by @tag set. Places the slots at *@results and 1006 * returns the number of slots which were placed at *@results. 1007 */ 1008 unsigned int 1009 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 1010 unsigned long first_index, unsigned int max_items, 1011 unsigned int tag) 1012 { 1013 struct radix_tree_node *node; 1014 unsigned long max_index; 1015 unsigned long cur_index = first_index; 1016 unsigned int ret; 1017 1018 /* check the root's tag bit */ 1019 if (!root_tag_get(root, tag)) 1020 return 0; 1021 1022 node = rcu_dereference(root->rnode); 1023 if (!node) 1024 return 0; 1025 1026 if (!radix_tree_is_indirect_ptr(node)) { 1027 if (first_index > 0) 1028 return 0; 1029 results[0] = (void **)&root->rnode; 1030 return 1; 1031 } 1032 node = radix_tree_indirect_to_ptr(node); 1033 1034 max_index = radix_tree_maxindex(node->height); 1035 1036 ret = 0; 1037 while (ret < max_items) { 1038 unsigned int slots_found; 1039 unsigned long next_index; /* Index of next search */ 1040 1041 if (cur_index > max_index) 1042 break; 1043 slots_found = __lookup_tag(node, results + ret, 1044 cur_index, max_items - ret, &next_index, tag); 1045 ret += slots_found; 1046 if (next_index == 0) 1047 break; 1048 cur_index = next_index; 1049 } 1050 1051 return ret; 1052 } 1053 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1054 1055 1056 /** 1057 * radix_tree_shrink - shrink height of a radix tree to minimal 1058 * @root radix tree root 1059 */ 1060 static inline void radix_tree_shrink(struct radix_tree_root *root) 1061 { 1062 /* try to shrink tree height */ 1063 while (root->height > 0) { 1064 struct radix_tree_node *to_free = root->rnode; 1065 void *newptr; 1066 1067 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1068 to_free = radix_tree_indirect_to_ptr(to_free); 1069 1070 /* 1071 * The candidate node has more than one child, or its child 1072 * is not at the leftmost slot, we cannot shrink. 1073 */ 1074 if (to_free->count != 1) 1075 break; 1076 if (!to_free->slots[0]) 1077 break; 1078 1079 /* 1080 * We don't need rcu_assign_pointer(), since we are simply 1081 * moving the node from one part of the tree to another. If 1082 * it was safe to dereference the old pointer to it 1083 * (to_free->slots[0]), it will be safe to dereference the new 1084 * one (root->rnode). 1085 */ 1086 newptr = to_free->slots[0]; 1087 if (root->height > 1) 1088 newptr = radix_tree_ptr_to_indirect(newptr); 1089 root->rnode = newptr; 1090 root->height--; 1091 radix_tree_node_free(to_free); 1092 } 1093 } 1094 1095 /** 1096 * radix_tree_delete - delete an item from a radix tree 1097 * @root: radix tree root 1098 * @index: index key 1099 * 1100 * Remove the item at @index from the radix tree rooted at @root. 1101 * 1102 * Returns the address of the deleted item, or NULL if it was not present. 1103 */ 1104 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1105 { 1106 /* 1107 * The radix tree path needs to be one longer than the maximum path 1108 * since the "list" is null terminated. 1109 */ 1110 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 1111 struct radix_tree_node *slot = NULL; 1112 struct radix_tree_node *to_free; 1113 unsigned int height, shift; 1114 int tag; 1115 int offset; 1116 1117 height = root->height; 1118 if (index > radix_tree_maxindex(height)) 1119 goto out; 1120 1121 slot = root->rnode; 1122 if (height == 0) { 1123 root_tag_clear_all(root); 1124 root->rnode = NULL; 1125 goto out; 1126 } 1127 slot = radix_tree_indirect_to_ptr(slot); 1128 1129 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1130 pathp->node = NULL; 1131 1132 do { 1133 if (slot == NULL) 1134 goto out; 1135 1136 pathp++; 1137 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1138 pathp->offset = offset; 1139 pathp->node = slot; 1140 slot = slot->slots[offset]; 1141 shift -= RADIX_TREE_MAP_SHIFT; 1142 height--; 1143 } while (height > 0); 1144 1145 if (slot == NULL) 1146 goto out; 1147 1148 /* 1149 * Clear all tags associated with the just-deleted item 1150 */ 1151 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1152 if (tag_get(pathp->node, tag, pathp->offset)) 1153 radix_tree_tag_clear(root, index, tag); 1154 } 1155 1156 to_free = NULL; 1157 /* Now free the nodes we do not need anymore */ 1158 while (pathp->node) { 1159 pathp->node->slots[pathp->offset] = NULL; 1160 pathp->node->count--; 1161 /* 1162 * Queue the node for deferred freeing after the 1163 * last reference to it disappears (set NULL, above). 1164 */ 1165 if (to_free) 1166 radix_tree_node_free(to_free); 1167 1168 if (pathp->node->count) { 1169 if (pathp->node == 1170 radix_tree_indirect_to_ptr(root->rnode)) 1171 radix_tree_shrink(root); 1172 goto out; 1173 } 1174 1175 /* Node with zero slots in use so free it */ 1176 to_free = pathp->node; 1177 pathp--; 1178 1179 } 1180 root_tag_clear_all(root); 1181 root->height = 0; 1182 root->rnode = NULL; 1183 if (to_free) 1184 radix_tree_node_free(to_free); 1185 1186 out: 1187 return slot; 1188 } 1189 EXPORT_SYMBOL(radix_tree_delete); 1190 1191 /** 1192 * radix_tree_tagged - test whether any items in the tree are tagged 1193 * @root: radix tree root 1194 * @tag: tag to test 1195 */ 1196 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1197 { 1198 return root_tag_get(root, tag); 1199 } 1200 EXPORT_SYMBOL(radix_tree_tagged); 1201 1202 static void 1203 radix_tree_node_ctor(void *node) 1204 { 1205 memset(node, 0, sizeof(struct radix_tree_node)); 1206 } 1207 1208 static __init unsigned long __maxindex(unsigned int height) 1209 { 1210 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1211 int shift = RADIX_TREE_INDEX_BITS - width; 1212 1213 if (shift < 0) 1214 return ~0UL; 1215 if (shift >= BITS_PER_LONG) 1216 return 0UL; 1217 return ~0UL >> shift; 1218 } 1219 1220 static __init void radix_tree_init_maxindex(void) 1221 { 1222 unsigned int i; 1223 1224 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1225 height_to_maxindex[i] = __maxindex(i); 1226 } 1227 1228 static int radix_tree_callback(struct notifier_block *nfb, 1229 unsigned long action, 1230 void *hcpu) 1231 { 1232 int cpu = (long)hcpu; 1233 struct radix_tree_preload *rtp; 1234 1235 /* Free per-cpu pool of perloaded nodes */ 1236 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1237 rtp = &per_cpu(radix_tree_preloads, cpu); 1238 while (rtp->nr) { 1239 kmem_cache_free(radix_tree_node_cachep, 1240 rtp->nodes[rtp->nr-1]); 1241 rtp->nodes[rtp->nr-1] = NULL; 1242 rtp->nr--; 1243 } 1244 } 1245 return NOTIFY_OK; 1246 } 1247 1248 void __init radix_tree_init(void) 1249 { 1250 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1251 sizeof(struct radix_tree_node), 0, 1252 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1253 radix_tree_node_ctor); 1254 radix_tree_init_maxindex(); 1255 hotcpu_notifier(radix_tree_callback, 0); 1256 } 1257