1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 92 int offset) 93 { 94 __set_bit(offset, node->tags[tag]); 95 } 96 97 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 98 int offset) 99 { 100 __clear_bit(offset, node->tags[tag]); 101 } 102 103 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 104 int offset) 105 { 106 return test_bit(offset, node->tags[tag]); 107 } 108 109 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 110 { 111 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 112 } 113 114 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 115 { 116 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 117 } 118 119 static inline void root_tag_clear_all(struct radix_tree_root *root) 120 { 121 root->gfp_mask &= __GFP_BITS_MASK; 122 } 123 124 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 125 { 126 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 127 } 128 129 /* 130 * Returns 1 if any slot in the node has this tag set. 131 * Otherwise returns 0. 132 */ 133 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 134 { 135 int idx; 136 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 137 if (node->tags[tag][idx]) 138 return 1; 139 } 140 return 0; 141 } 142 /* 143 * This assumes that the caller has performed appropriate preallocation, and 144 * that the caller has pinned this thread of control to the current CPU. 145 */ 146 static struct radix_tree_node * 147 radix_tree_node_alloc(struct radix_tree_root *root) 148 { 149 struct radix_tree_node *ret = NULL; 150 gfp_t gfp_mask = root_gfp_mask(root); 151 152 if (!(gfp_mask & __GFP_WAIT)) { 153 struct radix_tree_preload *rtp; 154 155 /* 156 * Provided the caller has preloaded here, we will always 157 * succeed in getting a node here (and never reach 158 * kmem_cache_alloc) 159 */ 160 rtp = &__get_cpu_var(radix_tree_preloads); 161 if (rtp->nr) { 162 ret = rtp->nodes[rtp->nr - 1]; 163 rtp->nodes[rtp->nr - 1] = NULL; 164 rtp->nr--; 165 } 166 } 167 if (ret == NULL) 168 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 169 170 BUG_ON(radix_tree_is_indirect_ptr(ret)); 171 return ret; 172 } 173 174 static void radix_tree_node_rcu_free(struct rcu_head *head) 175 { 176 struct radix_tree_node *node = 177 container_of(head, struct radix_tree_node, rcu_head); 178 179 /* 180 * must only free zeroed nodes into the slab. radix_tree_shrink 181 * can leave us with a non-NULL entry in the first slot, so clear 182 * that here to make sure. 183 */ 184 tag_clear(node, 0, 0); 185 tag_clear(node, 1, 0); 186 node->slots[0] = NULL; 187 node->count = 0; 188 189 kmem_cache_free(radix_tree_node_cachep, node); 190 } 191 192 static inline void 193 radix_tree_node_free(struct radix_tree_node *node) 194 { 195 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 196 } 197 198 /* 199 * Load up this CPU's radix_tree_node buffer with sufficient objects to 200 * ensure that the addition of a single element in the tree cannot fail. On 201 * success, return zero, with preemption disabled. On error, return -ENOMEM 202 * with preemption not disabled. 203 */ 204 int radix_tree_preload(gfp_t gfp_mask) 205 { 206 struct radix_tree_preload *rtp; 207 struct radix_tree_node *node; 208 int ret = -ENOMEM; 209 210 preempt_disable(); 211 rtp = &__get_cpu_var(radix_tree_preloads); 212 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 213 preempt_enable(); 214 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 215 if (node == NULL) 216 goto out; 217 preempt_disable(); 218 rtp = &__get_cpu_var(radix_tree_preloads); 219 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 220 rtp->nodes[rtp->nr++] = node; 221 else 222 kmem_cache_free(radix_tree_node_cachep, node); 223 } 224 ret = 0; 225 out: 226 return ret; 227 } 228 EXPORT_SYMBOL(radix_tree_preload); 229 230 /* 231 * Return the maximum key which can be store into a 232 * radix tree with height HEIGHT. 233 */ 234 static inline unsigned long radix_tree_maxindex(unsigned int height) 235 { 236 return height_to_maxindex[height]; 237 } 238 239 /* 240 * Extend a radix tree so it can store key @index. 241 */ 242 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 243 { 244 struct radix_tree_node *node; 245 unsigned int height; 246 int tag; 247 248 /* Figure out what the height should be. */ 249 height = root->height + 1; 250 while (index > radix_tree_maxindex(height)) 251 height++; 252 253 if (root->rnode == NULL) { 254 root->height = height; 255 goto out; 256 } 257 258 do { 259 unsigned int newheight; 260 if (!(node = radix_tree_node_alloc(root))) 261 return -ENOMEM; 262 263 /* Increase the height. */ 264 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 265 266 /* Propagate the aggregated tag info into the new root */ 267 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 268 if (root_tag_get(root, tag)) 269 tag_set(node, tag, 0); 270 } 271 272 newheight = root->height+1; 273 node->height = newheight; 274 node->count = 1; 275 node = radix_tree_ptr_to_indirect(node); 276 rcu_assign_pointer(root->rnode, node); 277 root->height = newheight; 278 } while (height > root->height); 279 out: 280 return 0; 281 } 282 283 /** 284 * radix_tree_insert - insert into a radix tree 285 * @root: radix tree root 286 * @index: index key 287 * @item: item to insert 288 * 289 * Insert an item into the radix tree at position @index. 290 */ 291 int radix_tree_insert(struct radix_tree_root *root, 292 unsigned long index, void *item) 293 { 294 struct radix_tree_node *node = NULL, *slot; 295 unsigned int height, shift; 296 int offset; 297 int error; 298 299 BUG_ON(radix_tree_is_indirect_ptr(item)); 300 301 /* Make sure the tree is high enough. */ 302 if (index > radix_tree_maxindex(root->height)) { 303 error = radix_tree_extend(root, index); 304 if (error) 305 return error; 306 } 307 308 slot = radix_tree_indirect_to_ptr(root->rnode); 309 310 height = root->height; 311 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 312 313 offset = 0; /* uninitialised var warning */ 314 while (height > 0) { 315 if (slot == NULL) { 316 /* Have to add a child node. */ 317 if (!(slot = radix_tree_node_alloc(root))) 318 return -ENOMEM; 319 slot->height = height; 320 if (node) { 321 rcu_assign_pointer(node->slots[offset], slot); 322 node->count++; 323 } else 324 rcu_assign_pointer(root->rnode, 325 radix_tree_ptr_to_indirect(slot)); 326 } 327 328 /* Go a level down */ 329 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 330 node = slot; 331 slot = node->slots[offset]; 332 shift -= RADIX_TREE_MAP_SHIFT; 333 height--; 334 } 335 336 if (slot != NULL) 337 return -EEXIST; 338 339 if (node) { 340 node->count++; 341 rcu_assign_pointer(node->slots[offset], item); 342 BUG_ON(tag_get(node, 0, offset)); 343 BUG_ON(tag_get(node, 1, offset)); 344 } else { 345 rcu_assign_pointer(root->rnode, item); 346 BUG_ON(root_tag_get(root, 0)); 347 BUG_ON(root_tag_get(root, 1)); 348 } 349 350 return 0; 351 } 352 EXPORT_SYMBOL(radix_tree_insert); 353 354 /** 355 * radix_tree_lookup_slot - lookup a slot in a radix tree 356 * @root: radix tree root 357 * @index: index key 358 * 359 * Returns: the slot corresponding to the position @index in the 360 * radix tree @root. This is useful for update-if-exists operations. 361 * 362 * This function cannot be called under rcu_read_lock, it must be 363 * excluded from writers, as must the returned slot for subsequent 364 * use by radix_tree_deref_slot() and radix_tree_replace slot. 365 * Caller must hold tree write locked across slot lookup and 366 * replace. 367 */ 368 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 369 { 370 unsigned int height, shift; 371 struct radix_tree_node *node, **slot; 372 373 node = root->rnode; 374 if (node == NULL) 375 return NULL; 376 377 if (!radix_tree_is_indirect_ptr(node)) { 378 if (index > 0) 379 return NULL; 380 return (void **)&root->rnode; 381 } 382 node = radix_tree_indirect_to_ptr(node); 383 384 height = node->height; 385 if (index > radix_tree_maxindex(height)) 386 return NULL; 387 388 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 389 390 do { 391 slot = (struct radix_tree_node **) 392 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 393 node = *slot; 394 if (node == NULL) 395 return NULL; 396 397 shift -= RADIX_TREE_MAP_SHIFT; 398 height--; 399 } while (height > 0); 400 401 return (void **)slot; 402 } 403 EXPORT_SYMBOL(radix_tree_lookup_slot); 404 405 /** 406 * radix_tree_lookup - perform lookup operation on a radix tree 407 * @root: radix tree root 408 * @index: index key 409 * 410 * Lookup the item at the position @index in the radix tree @root. 411 * 412 * This function can be called under rcu_read_lock, however the caller 413 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 414 * them safely). No RCU barriers are required to access or modify the 415 * returned item, however. 416 */ 417 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 418 { 419 unsigned int height, shift; 420 struct radix_tree_node *node, **slot; 421 422 node = rcu_dereference(root->rnode); 423 if (node == NULL) 424 return NULL; 425 426 if (!radix_tree_is_indirect_ptr(node)) { 427 if (index > 0) 428 return NULL; 429 return node; 430 } 431 node = radix_tree_indirect_to_ptr(node); 432 433 height = node->height; 434 if (index > radix_tree_maxindex(height)) 435 return NULL; 436 437 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 438 439 do { 440 slot = (struct radix_tree_node **) 441 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 442 node = rcu_dereference(*slot); 443 if (node == NULL) 444 return NULL; 445 446 shift -= RADIX_TREE_MAP_SHIFT; 447 height--; 448 } while (height > 0); 449 450 return node; 451 } 452 EXPORT_SYMBOL(radix_tree_lookup); 453 454 /** 455 * radix_tree_tag_set - set a tag on a radix tree node 456 * @root: radix tree root 457 * @index: index key 458 * @tag: tag index 459 * 460 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 461 * corresponding to @index in the radix tree. From 462 * the root all the way down to the leaf node. 463 * 464 * Returns the address of the tagged item. Setting a tag on a not-present 465 * item is a bug. 466 */ 467 void *radix_tree_tag_set(struct radix_tree_root *root, 468 unsigned long index, unsigned int tag) 469 { 470 unsigned int height, shift; 471 struct radix_tree_node *slot; 472 473 height = root->height; 474 BUG_ON(index > radix_tree_maxindex(height)); 475 476 slot = radix_tree_indirect_to_ptr(root->rnode); 477 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 478 479 while (height > 0) { 480 int offset; 481 482 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 483 if (!tag_get(slot, tag, offset)) 484 tag_set(slot, tag, offset); 485 slot = slot->slots[offset]; 486 BUG_ON(slot == NULL); 487 shift -= RADIX_TREE_MAP_SHIFT; 488 height--; 489 } 490 491 /* set the root's tag bit */ 492 if (slot && !root_tag_get(root, tag)) 493 root_tag_set(root, tag); 494 495 return slot; 496 } 497 EXPORT_SYMBOL(radix_tree_tag_set); 498 499 /** 500 * radix_tree_tag_clear - clear a tag on a radix tree node 501 * @root: radix tree root 502 * @index: index key 503 * @tag: tag index 504 * 505 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 506 * corresponding to @index in the radix tree. If 507 * this causes the leaf node to have no tags set then clear the tag in the 508 * next-to-leaf node, etc. 509 * 510 * Returns the address of the tagged item on success, else NULL. ie: 511 * has the same return value and semantics as radix_tree_lookup(). 512 */ 513 void *radix_tree_tag_clear(struct radix_tree_root *root, 514 unsigned long index, unsigned int tag) 515 { 516 /* 517 * The radix tree path needs to be one longer than the maximum path 518 * since the "list" is null terminated. 519 */ 520 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 521 struct radix_tree_node *slot = NULL; 522 unsigned int height, shift; 523 524 height = root->height; 525 if (index > radix_tree_maxindex(height)) 526 goto out; 527 528 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 529 pathp->node = NULL; 530 slot = radix_tree_indirect_to_ptr(root->rnode); 531 532 while (height > 0) { 533 int offset; 534 535 if (slot == NULL) 536 goto out; 537 538 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 539 pathp[1].offset = offset; 540 pathp[1].node = slot; 541 slot = slot->slots[offset]; 542 pathp++; 543 shift -= RADIX_TREE_MAP_SHIFT; 544 height--; 545 } 546 547 if (slot == NULL) 548 goto out; 549 550 while (pathp->node) { 551 if (!tag_get(pathp->node, tag, pathp->offset)) 552 goto out; 553 tag_clear(pathp->node, tag, pathp->offset); 554 if (any_tag_set(pathp->node, tag)) 555 goto out; 556 pathp--; 557 } 558 559 /* clear the root's tag bit */ 560 if (root_tag_get(root, tag)) 561 root_tag_clear(root, tag); 562 563 out: 564 return slot; 565 } 566 EXPORT_SYMBOL(radix_tree_tag_clear); 567 568 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 569 /** 570 * radix_tree_tag_get - get a tag on a radix tree node 571 * @root: radix tree root 572 * @index: index key 573 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 574 * 575 * Return values: 576 * 577 * 0: tag not present or not set 578 * 1: tag set 579 */ 580 int radix_tree_tag_get(struct radix_tree_root *root, 581 unsigned long index, unsigned int tag) 582 { 583 unsigned int height, shift; 584 struct radix_tree_node *node; 585 int saw_unset_tag = 0; 586 587 /* check the root's tag bit */ 588 if (!root_tag_get(root, tag)) 589 return 0; 590 591 node = rcu_dereference(root->rnode); 592 if (node == NULL) 593 return 0; 594 595 if (!radix_tree_is_indirect_ptr(node)) 596 return (index == 0); 597 node = radix_tree_indirect_to_ptr(node); 598 599 height = node->height; 600 if (index > radix_tree_maxindex(height)) 601 return 0; 602 603 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 604 605 for ( ; ; ) { 606 int offset; 607 608 if (node == NULL) 609 return 0; 610 611 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 612 613 /* 614 * This is just a debug check. Later, we can bale as soon as 615 * we see an unset tag. 616 */ 617 if (!tag_get(node, tag, offset)) 618 saw_unset_tag = 1; 619 if (height == 1) { 620 int ret = tag_get(node, tag, offset); 621 622 BUG_ON(ret && saw_unset_tag); 623 return !!ret; 624 } 625 node = rcu_dereference(node->slots[offset]); 626 shift -= RADIX_TREE_MAP_SHIFT; 627 height--; 628 } 629 } 630 EXPORT_SYMBOL(radix_tree_tag_get); 631 #endif 632 633 /** 634 * radix_tree_next_hole - find the next hole (not-present entry) 635 * @root: tree root 636 * @index: index key 637 * @max_scan: maximum range to search 638 * 639 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 640 * indexed hole. 641 * 642 * Returns: the index of the hole if found, otherwise returns an index 643 * outside of the set specified (in which case 'return - index >= max_scan' 644 * will be true). 645 * 646 * radix_tree_next_hole may be called under rcu_read_lock. However, like 647 * radix_tree_gang_lookup, this will not atomically search a snapshot of the 648 * tree at a single point in time. For example, if a hole is created at index 649 * 5, then subsequently a hole is created at index 10, radix_tree_next_hole 650 * covering both indexes may return 10 if called under rcu_read_lock. 651 */ 652 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 653 unsigned long index, unsigned long max_scan) 654 { 655 unsigned long i; 656 657 for (i = 0; i < max_scan; i++) { 658 if (!radix_tree_lookup(root, index)) 659 break; 660 index++; 661 if (index == 0) 662 break; 663 } 664 665 return index; 666 } 667 EXPORT_SYMBOL(radix_tree_next_hole); 668 669 static unsigned int 670 __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 671 unsigned int max_items, unsigned long *next_index) 672 { 673 unsigned int nr_found = 0; 674 unsigned int shift, height; 675 unsigned long i; 676 677 height = slot->height; 678 if (height == 0) 679 goto out; 680 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 681 682 for ( ; height > 1; height--) { 683 i = (index >> shift) & RADIX_TREE_MAP_MASK; 684 for (;;) { 685 if (slot->slots[i] != NULL) 686 break; 687 index &= ~((1UL << shift) - 1); 688 index += 1UL << shift; 689 if (index == 0) 690 goto out; /* 32-bit wraparound */ 691 i++; 692 if (i == RADIX_TREE_MAP_SIZE) 693 goto out; 694 } 695 696 shift -= RADIX_TREE_MAP_SHIFT; 697 slot = rcu_dereference(slot->slots[i]); 698 if (slot == NULL) 699 goto out; 700 } 701 702 /* Bottom level: grab some items */ 703 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 704 struct radix_tree_node *node; 705 index++; 706 node = slot->slots[i]; 707 if (node) { 708 results[nr_found++] = rcu_dereference(node); 709 if (nr_found == max_items) 710 goto out; 711 } 712 } 713 out: 714 *next_index = index; 715 return nr_found; 716 } 717 718 /** 719 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 720 * @root: radix tree root 721 * @results: where the results of the lookup are placed 722 * @first_index: start the lookup from this key 723 * @max_items: place up to this many items at *results 724 * 725 * Performs an index-ascending scan of the tree for present items. Places 726 * them at *@results and returns the number of items which were placed at 727 * *@results. 728 * 729 * The implementation is naive. 730 * 731 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 732 * rcu_read_lock. In this case, rather than the returned results being 733 * an atomic snapshot of the tree at a single point in time, the semantics 734 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 735 * have been issued in individual locks, and results stored in 'results'. 736 */ 737 unsigned int 738 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 739 unsigned long first_index, unsigned int max_items) 740 { 741 unsigned long max_index; 742 struct radix_tree_node *node; 743 unsigned long cur_index = first_index; 744 unsigned int ret; 745 746 node = rcu_dereference(root->rnode); 747 if (!node) 748 return 0; 749 750 if (!radix_tree_is_indirect_ptr(node)) { 751 if (first_index > 0) 752 return 0; 753 results[0] = node; 754 return 1; 755 } 756 node = radix_tree_indirect_to_ptr(node); 757 758 max_index = radix_tree_maxindex(node->height); 759 760 ret = 0; 761 while (ret < max_items) { 762 unsigned int nr_found; 763 unsigned long next_index; /* Index of next search */ 764 765 if (cur_index > max_index) 766 break; 767 nr_found = __lookup(node, results + ret, cur_index, 768 max_items - ret, &next_index); 769 ret += nr_found; 770 if (next_index == 0) 771 break; 772 cur_index = next_index; 773 } 774 775 return ret; 776 } 777 EXPORT_SYMBOL(radix_tree_gang_lookup); 778 779 /* 780 * FIXME: the two tag_get()s here should use find_next_bit() instead of 781 * open-coding the search. 782 */ 783 static unsigned int 784 __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 785 unsigned int max_items, unsigned long *next_index, unsigned int tag) 786 { 787 unsigned int nr_found = 0; 788 unsigned int shift, height; 789 790 height = slot->height; 791 if (height == 0) 792 goto out; 793 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 794 795 while (height > 0) { 796 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 797 798 for (;;) { 799 if (tag_get(slot, tag, i)) 800 break; 801 index &= ~((1UL << shift) - 1); 802 index += 1UL << shift; 803 if (index == 0) 804 goto out; /* 32-bit wraparound */ 805 i++; 806 if (i == RADIX_TREE_MAP_SIZE) 807 goto out; 808 } 809 height--; 810 if (height == 0) { /* Bottom level: grab some items */ 811 unsigned long j = index & RADIX_TREE_MAP_MASK; 812 813 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 814 struct radix_tree_node *node; 815 index++; 816 if (!tag_get(slot, tag, j)) 817 continue; 818 node = slot->slots[j]; 819 /* 820 * Even though the tag was found set, we need to 821 * recheck that we have a non-NULL node, because 822 * if this lookup is lockless, it may have been 823 * subsequently deleted. 824 * 825 * Similar care must be taken in any place that 826 * lookup ->slots[x] without a lock (ie. can't 827 * rely on its value remaining the same). 828 */ 829 if (node) { 830 node = rcu_dereference(node); 831 results[nr_found++] = node; 832 if (nr_found == max_items) 833 goto out; 834 } 835 } 836 } 837 shift -= RADIX_TREE_MAP_SHIFT; 838 slot = rcu_dereference(slot->slots[i]); 839 if (slot == NULL) 840 break; 841 } 842 out: 843 *next_index = index; 844 return nr_found; 845 } 846 847 /** 848 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 849 * based on a tag 850 * @root: radix tree root 851 * @results: where the results of the lookup are placed 852 * @first_index: start the lookup from this key 853 * @max_items: place up to this many items at *results 854 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 855 * 856 * Performs an index-ascending scan of the tree for present items which 857 * have the tag indexed by @tag set. Places the items at *@results and 858 * returns the number of items which were placed at *@results. 859 */ 860 unsigned int 861 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 862 unsigned long first_index, unsigned int max_items, 863 unsigned int tag) 864 { 865 struct radix_tree_node *node; 866 unsigned long max_index; 867 unsigned long cur_index = first_index; 868 unsigned int ret; 869 870 /* check the root's tag bit */ 871 if (!root_tag_get(root, tag)) 872 return 0; 873 874 node = rcu_dereference(root->rnode); 875 if (!node) 876 return 0; 877 878 if (!radix_tree_is_indirect_ptr(node)) { 879 if (first_index > 0) 880 return 0; 881 results[0] = node; 882 return 1; 883 } 884 node = radix_tree_indirect_to_ptr(node); 885 886 max_index = radix_tree_maxindex(node->height); 887 888 ret = 0; 889 while (ret < max_items) { 890 unsigned int nr_found; 891 unsigned long next_index; /* Index of next search */ 892 893 if (cur_index > max_index) 894 break; 895 nr_found = __lookup_tag(node, results + ret, cur_index, 896 max_items - ret, &next_index, tag); 897 ret += nr_found; 898 if (next_index == 0) 899 break; 900 cur_index = next_index; 901 } 902 903 return ret; 904 } 905 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 906 907 /** 908 * radix_tree_shrink - shrink height of a radix tree to minimal 909 * @root radix tree root 910 */ 911 static inline void radix_tree_shrink(struct radix_tree_root *root) 912 { 913 /* try to shrink tree height */ 914 while (root->height > 0) { 915 struct radix_tree_node *to_free = root->rnode; 916 void *newptr; 917 918 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 919 to_free = radix_tree_indirect_to_ptr(to_free); 920 921 /* 922 * The candidate node has more than one child, or its child 923 * is not at the leftmost slot, we cannot shrink. 924 */ 925 if (to_free->count != 1) 926 break; 927 if (!to_free->slots[0]) 928 break; 929 930 /* 931 * We don't need rcu_assign_pointer(), since we are simply 932 * moving the node from one part of the tree to another. If 933 * it was safe to dereference the old pointer to it 934 * (to_free->slots[0]), it will be safe to dereference the new 935 * one (root->rnode). 936 */ 937 newptr = to_free->slots[0]; 938 if (root->height > 1) 939 newptr = radix_tree_ptr_to_indirect(newptr); 940 root->rnode = newptr; 941 root->height--; 942 radix_tree_node_free(to_free); 943 } 944 } 945 946 /** 947 * radix_tree_delete - delete an item from a radix tree 948 * @root: radix tree root 949 * @index: index key 950 * 951 * Remove the item at @index from the radix tree rooted at @root. 952 * 953 * Returns the address of the deleted item, or NULL if it was not present. 954 */ 955 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 956 { 957 /* 958 * The radix tree path needs to be one longer than the maximum path 959 * since the "list" is null terminated. 960 */ 961 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 962 struct radix_tree_node *slot = NULL; 963 struct radix_tree_node *to_free; 964 unsigned int height, shift; 965 int tag; 966 int offset; 967 968 height = root->height; 969 if (index > radix_tree_maxindex(height)) 970 goto out; 971 972 slot = root->rnode; 973 if (height == 0) { 974 root_tag_clear_all(root); 975 root->rnode = NULL; 976 goto out; 977 } 978 slot = radix_tree_indirect_to_ptr(slot); 979 980 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 981 pathp->node = NULL; 982 983 do { 984 if (slot == NULL) 985 goto out; 986 987 pathp++; 988 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 989 pathp->offset = offset; 990 pathp->node = slot; 991 slot = slot->slots[offset]; 992 shift -= RADIX_TREE_MAP_SHIFT; 993 height--; 994 } while (height > 0); 995 996 if (slot == NULL) 997 goto out; 998 999 /* 1000 * Clear all tags associated with the just-deleted item 1001 */ 1002 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1003 if (tag_get(pathp->node, tag, pathp->offset)) 1004 radix_tree_tag_clear(root, index, tag); 1005 } 1006 1007 to_free = NULL; 1008 /* Now free the nodes we do not need anymore */ 1009 while (pathp->node) { 1010 pathp->node->slots[pathp->offset] = NULL; 1011 pathp->node->count--; 1012 /* 1013 * Queue the node for deferred freeing after the 1014 * last reference to it disappears (set NULL, above). 1015 */ 1016 if (to_free) 1017 radix_tree_node_free(to_free); 1018 1019 if (pathp->node->count) { 1020 if (pathp->node == 1021 radix_tree_indirect_to_ptr(root->rnode)) 1022 radix_tree_shrink(root); 1023 goto out; 1024 } 1025 1026 /* Node with zero slots in use so free it */ 1027 to_free = pathp->node; 1028 pathp--; 1029 1030 } 1031 root_tag_clear_all(root); 1032 root->height = 0; 1033 root->rnode = NULL; 1034 if (to_free) 1035 radix_tree_node_free(to_free); 1036 1037 out: 1038 return slot; 1039 } 1040 EXPORT_SYMBOL(radix_tree_delete); 1041 1042 /** 1043 * radix_tree_tagged - test whether any items in the tree are tagged 1044 * @root: radix tree root 1045 * @tag: tag to test 1046 */ 1047 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1048 { 1049 return root_tag_get(root, tag); 1050 } 1051 EXPORT_SYMBOL(radix_tree_tagged); 1052 1053 static void 1054 radix_tree_node_ctor(struct kmem_cache *cachep, void *node) 1055 { 1056 memset(node, 0, sizeof(struct radix_tree_node)); 1057 } 1058 1059 static __init unsigned long __maxindex(unsigned int height) 1060 { 1061 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1062 int shift = RADIX_TREE_INDEX_BITS - width; 1063 1064 if (shift < 0) 1065 return ~0UL; 1066 if (shift >= BITS_PER_LONG) 1067 return 0UL; 1068 return ~0UL >> shift; 1069 } 1070 1071 static __init void radix_tree_init_maxindex(void) 1072 { 1073 unsigned int i; 1074 1075 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1076 height_to_maxindex[i] = __maxindex(i); 1077 } 1078 1079 static int radix_tree_callback(struct notifier_block *nfb, 1080 unsigned long action, 1081 void *hcpu) 1082 { 1083 int cpu = (long)hcpu; 1084 struct radix_tree_preload *rtp; 1085 1086 /* Free per-cpu pool of perloaded nodes */ 1087 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1088 rtp = &per_cpu(radix_tree_preloads, cpu); 1089 while (rtp->nr) { 1090 kmem_cache_free(radix_tree_node_cachep, 1091 rtp->nodes[rtp->nr-1]); 1092 rtp->nodes[rtp->nr-1] = NULL; 1093 rtp->nr--; 1094 } 1095 } 1096 return NOTIFY_OK; 1097 } 1098 1099 void __init radix_tree_init(void) 1100 { 1101 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1102 sizeof(struct radix_tree_node), 0, 1103 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1104 radix_tree_node_ctor); 1105 radix_tree_init_maxindex(); 1106 hotcpu_notifier(radix_tree_callback, 0); 1107 } 1108