1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 /* 92 * This assumes that the caller has performed appropriate preallocation, and 93 * that the caller has pinned this thread of control to the current CPU. 94 */ 95 static struct radix_tree_node * 96 radix_tree_node_alloc(struct radix_tree_root *root) 97 { 98 struct radix_tree_node *ret = NULL; 99 gfp_t gfp_mask = root_gfp_mask(root); 100 101 if (!(gfp_mask & __GFP_WAIT)) { 102 struct radix_tree_preload *rtp; 103 104 /* 105 * Provided the caller has preloaded here, we will always 106 * succeed in getting a node here (and never reach 107 * kmem_cache_alloc) 108 */ 109 rtp = &__get_cpu_var(radix_tree_preloads); 110 if (rtp->nr) { 111 ret = rtp->nodes[rtp->nr - 1]; 112 rtp->nodes[rtp->nr - 1] = NULL; 113 rtp->nr--; 114 } 115 } 116 if (ret == NULL) 117 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 118 119 BUG_ON(radix_tree_is_indirect_ptr(ret)); 120 return ret; 121 } 122 123 static void radix_tree_node_rcu_free(struct rcu_head *head) 124 { 125 struct radix_tree_node *node = 126 container_of(head, struct radix_tree_node, rcu_head); 127 kmem_cache_free(radix_tree_node_cachep, node); 128 } 129 130 static inline void 131 radix_tree_node_free(struct radix_tree_node *node) 132 { 133 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 134 } 135 136 /* 137 * Load up this CPU's radix_tree_node buffer with sufficient objects to 138 * ensure that the addition of a single element in the tree cannot fail. On 139 * success, return zero, with preemption disabled. On error, return -ENOMEM 140 * with preemption not disabled. 141 */ 142 int radix_tree_preload(gfp_t gfp_mask) 143 { 144 struct radix_tree_preload *rtp; 145 struct radix_tree_node *node; 146 int ret = -ENOMEM; 147 148 preempt_disable(); 149 rtp = &__get_cpu_var(radix_tree_preloads); 150 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 151 preempt_enable(); 152 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 153 if (node == NULL) 154 goto out; 155 preempt_disable(); 156 rtp = &__get_cpu_var(radix_tree_preloads); 157 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 158 rtp->nodes[rtp->nr++] = node; 159 else 160 kmem_cache_free(radix_tree_node_cachep, node); 161 } 162 ret = 0; 163 out: 164 return ret; 165 } 166 EXPORT_SYMBOL(radix_tree_preload); 167 168 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 169 int offset) 170 { 171 __set_bit(offset, node->tags[tag]); 172 } 173 174 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 175 int offset) 176 { 177 __clear_bit(offset, node->tags[tag]); 178 } 179 180 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 181 int offset) 182 { 183 return test_bit(offset, node->tags[tag]); 184 } 185 186 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 187 { 188 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 189 } 190 191 192 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 193 { 194 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 195 } 196 197 static inline void root_tag_clear_all(struct radix_tree_root *root) 198 { 199 root->gfp_mask &= __GFP_BITS_MASK; 200 } 201 202 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 203 { 204 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 205 } 206 207 /* 208 * Returns 1 if any slot in the node has this tag set. 209 * Otherwise returns 0. 210 */ 211 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 212 { 213 int idx; 214 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 215 if (node->tags[tag][idx]) 216 return 1; 217 } 218 return 0; 219 } 220 221 /* 222 * Return the maximum key which can be store into a 223 * radix tree with height HEIGHT. 224 */ 225 static inline unsigned long radix_tree_maxindex(unsigned int height) 226 { 227 return height_to_maxindex[height]; 228 } 229 230 /* 231 * Extend a radix tree so it can store key @index. 232 */ 233 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 234 { 235 struct radix_tree_node *node; 236 unsigned int height; 237 int tag; 238 239 /* Figure out what the height should be. */ 240 height = root->height + 1; 241 while (index > radix_tree_maxindex(height)) 242 height++; 243 244 if (root->rnode == NULL) { 245 root->height = height; 246 goto out; 247 } 248 249 do { 250 unsigned int newheight; 251 if (!(node = radix_tree_node_alloc(root))) 252 return -ENOMEM; 253 254 /* Increase the height. */ 255 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 256 257 /* Propagate the aggregated tag info into the new root */ 258 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 259 if (root_tag_get(root, tag)) 260 tag_set(node, tag, 0); 261 } 262 263 newheight = root->height+1; 264 node->height = newheight; 265 node->count = 1; 266 node = radix_tree_ptr_to_indirect(node); 267 rcu_assign_pointer(root->rnode, node); 268 root->height = newheight; 269 } while (height > root->height); 270 out: 271 return 0; 272 } 273 274 /** 275 * radix_tree_insert - insert into a radix tree 276 * @root: radix tree root 277 * @index: index key 278 * @item: item to insert 279 * 280 * Insert an item into the radix tree at position @index. 281 */ 282 int radix_tree_insert(struct radix_tree_root *root, 283 unsigned long index, void *item) 284 { 285 struct radix_tree_node *node = NULL, *slot; 286 unsigned int height, shift; 287 int offset; 288 int error; 289 290 BUG_ON(radix_tree_is_indirect_ptr(item)); 291 292 /* Make sure the tree is high enough. */ 293 if (index > radix_tree_maxindex(root->height)) { 294 error = radix_tree_extend(root, index); 295 if (error) 296 return error; 297 } 298 299 slot = radix_tree_indirect_to_ptr(root->rnode); 300 301 height = root->height; 302 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 303 304 offset = 0; /* uninitialised var warning */ 305 while (height > 0) { 306 if (slot == NULL) { 307 /* Have to add a child node. */ 308 if (!(slot = radix_tree_node_alloc(root))) 309 return -ENOMEM; 310 slot->height = height; 311 if (node) { 312 rcu_assign_pointer(node->slots[offset], slot); 313 node->count++; 314 } else 315 rcu_assign_pointer(root->rnode, 316 radix_tree_ptr_to_indirect(slot)); 317 } 318 319 /* Go a level down */ 320 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 321 node = slot; 322 slot = node->slots[offset]; 323 shift -= RADIX_TREE_MAP_SHIFT; 324 height--; 325 } 326 327 if (slot != NULL) 328 return -EEXIST; 329 330 if (node) { 331 node->count++; 332 rcu_assign_pointer(node->slots[offset], item); 333 BUG_ON(tag_get(node, 0, offset)); 334 BUG_ON(tag_get(node, 1, offset)); 335 } else { 336 rcu_assign_pointer(root->rnode, item); 337 BUG_ON(root_tag_get(root, 0)); 338 BUG_ON(root_tag_get(root, 1)); 339 } 340 341 return 0; 342 } 343 EXPORT_SYMBOL(radix_tree_insert); 344 345 /** 346 * radix_tree_lookup_slot - lookup a slot in a radix tree 347 * @root: radix tree root 348 * @index: index key 349 * 350 * Returns: the slot corresponding to the position @index in the 351 * radix tree @root. This is useful for update-if-exists operations. 352 * 353 * This function cannot be called under rcu_read_lock, it must be 354 * excluded from writers, as must the returned slot for subsequent 355 * use by radix_tree_deref_slot() and radix_tree_replace slot. 356 * Caller must hold tree write locked across slot lookup and 357 * replace. 358 */ 359 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 360 { 361 unsigned int height, shift; 362 struct radix_tree_node *node, **slot; 363 364 node = root->rnode; 365 if (node == NULL) 366 return NULL; 367 368 if (!radix_tree_is_indirect_ptr(node)) { 369 if (index > 0) 370 return NULL; 371 return (void **)&root->rnode; 372 } 373 node = radix_tree_indirect_to_ptr(node); 374 375 height = node->height; 376 if (index > radix_tree_maxindex(height)) 377 return NULL; 378 379 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 380 381 do { 382 slot = (struct radix_tree_node **) 383 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 384 node = *slot; 385 if (node == NULL) 386 return NULL; 387 388 shift -= RADIX_TREE_MAP_SHIFT; 389 height--; 390 } while (height > 0); 391 392 return (void **)slot; 393 } 394 EXPORT_SYMBOL(radix_tree_lookup_slot); 395 396 /** 397 * radix_tree_lookup - perform lookup operation on a radix tree 398 * @root: radix tree root 399 * @index: index key 400 * 401 * Lookup the item at the position @index in the radix tree @root. 402 * 403 * This function can be called under rcu_read_lock, however the caller 404 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 405 * them safely). No RCU barriers are required to access or modify the 406 * returned item, however. 407 */ 408 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 409 { 410 unsigned int height, shift; 411 struct radix_tree_node *node, **slot; 412 413 node = rcu_dereference(root->rnode); 414 if (node == NULL) 415 return NULL; 416 417 if (!radix_tree_is_indirect_ptr(node)) { 418 if (index > 0) 419 return NULL; 420 return node; 421 } 422 node = radix_tree_indirect_to_ptr(node); 423 424 height = node->height; 425 if (index > radix_tree_maxindex(height)) 426 return NULL; 427 428 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 429 430 do { 431 slot = (struct radix_tree_node **) 432 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 433 node = rcu_dereference(*slot); 434 if (node == NULL) 435 return NULL; 436 437 shift -= RADIX_TREE_MAP_SHIFT; 438 height--; 439 } while (height > 0); 440 441 return node; 442 } 443 EXPORT_SYMBOL(radix_tree_lookup); 444 445 /** 446 * radix_tree_tag_set - set a tag on a radix tree node 447 * @root: radix tree root 448 * @index: index key 449 * @tag: tag index 450 * 451 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 452 * corresponding to @index in the radix tree. From 453 * the root all the way down to the leaf node. 454 * 455 * Returns the address of the tagged item. Setting a tag on a not-present 456 * item is a bug. 457 */ 458 void *radix_tree_tag_set(struct radix_tree_root *root, 459 unsigned long index, unsigned int tag) 460 { 461 unsigned int height, shift; 462 struct radix_tree_node *slot; 463 464 height = root->height; 465 BUG_ON(index > radix_tree_maxindex(height)); 466 467 slot = radix_tree_indirect_to_ptr(root->rnode); 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 469 470 while (height > 0) { 471 int offset; 472 473 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 474 if (!tag_get(slot, tag, offset)) 475 tag_set(slot, tag, offset); 476 slot = slot->slots[offset]; 477 BUG_ON(slot == NULL); 478 shift -= RADIX_TREE_MAP_SHIFT; 479 height--; 480 } 481 482 /* set the root's tag bit */ 483 if (slot && !root_tag_get(root, tag)) 484 root_tag_set(root, tag); 485 486 return slot; 487 } 488 EXPORT_SYMBOL(radix_tree_tag_set); 489 490 /** 491 * radix_tree_tag_clear - clear a tag on a radix tree node 492 * @root: radix tree root 493 * @index: index key 494 * @tag: tag index 495 * 496 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 497 * corresponding to @index in the radix tree. If 498 * this causes the leaf node to have no tags set then clear the tag in the 499 * next-to-leaf node, etc. 500 * 501 * Returns the address of the tagged item on success, else NULL. ie: 502 * has the same return value and semantics as radix_tree_lookup(). 503 */ 504 void *radix_tree_tag_clear(struct radix_tree_root *root, 505 unsigned long index, unsigned int tag) 506 { 507 /* 508 * The radix tree path needs to be one longer than the maximum path 509 * since the "list" is null terminated. 510 */ 511 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 512 struct radix_tree_node *slot = NULL; 513 unsigned int height, shift; 514 515 height = root->height; 516 if (index > radix_tree_maxindex(height)) 517 goto out; 518 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 520 pathp->node = NULL; 521 slot = radix_tree_indirect_to_ptr(root->rnode); 522 523 while (height > 0) { 524 int offset; 525 526 if (slot == NULL) 527 goto out; 528 529 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 530 pathp[1].offset = offset; 531 pathp[1].node = slot; 532 slot = slot->slots[offset]; 533 pathp++; 534 shift -= RADIX_TREE_MAP_SHIFT; 535 height--; 536 } 537 538 if (slot == NULL) 539 goto out; 540 541 while (pathp->node) { 542 if (!tag_get(pathp->node, tag, pathp->offset)) 543 goto out; 544 tag_clear(pathp->node, tag, pathp->offset); 545 if (any_tag_set(pathp->node, tag)) 546 goto out; 547 pathp--; 548 } 549 550 /* clear the root's tag bit */ 551 if (root_tag_get(root, tag)) 552 root_tag_clear(root, tag); 553 554 out: 555 return slot; 556 } 557 EXPORT_SYMBOL(radix_tree_tag_clear); 558 559 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 560 /** 561 * radix_tree_tag_get - get a tag on a radix tree node 562 * @root: radix tree root 563 * @index: index key 564 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 565 * 566 * Return values: 567 * 568 * 0: tag not present or not set 569 * 1: tag set 570 */ 571 int radix_tree_tag_get(struct radix_tree_root *root, 572 unsigned long index, unsigned int tag) 573 { 574 unsigned int height, shift; 575 struct radix_tree_node *node; 576 int saw_unset_tag = 0; 577 578 /* check the root's tag bit */ 579 if (!root_tag_get(root, tag)) 580 return 0; 581 582 node = rcu_dereference(root->rnode); 583 if (node == NULL) 584 return 0; 585 586 if (!radix_tree_is_indirect_ptr(node)) 587 return (index == 0); 588 node = radix_tree_indirect_to_ptr(node); 589 590 height = node->height; 591 if (index > radix_tree_maxindex(height)) 592 return 0; 593 594 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 595 596 for ( ; ; ) { 597 int offset; 598 599 if (node == NULL) 600 return 0; 601 602 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 603 604 /* 605 * This is just a debug check. Later, we can bale as soon as 606 * we see an unset tag. 607 */ 608 if (!tag_get(node, tag, offset)) 609 saw_unset_tag = 1; 610 if (height == 1) { 611 int ret = tag_get(node, tag, offset); 612 613 BUG_ON(ret && saw_unset_tag); 614 return !!ret; 615 } 616 node = rcu_dereference(node->slots[offset]); 617 shift -= RADIX_TREE_MAP_SHIFT; 618 height--; 619 } 620 } 621 EXPORT_SYMBOL(radix_tree_tag_get); 622 #endif 623 624 /** 625 * radix_tree_next_hole - find the next hole (not-present entry) 626 * @root: tree root 627 * @index: index key 628 * @max_scan: maximum range to search 629 * 630 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 631 * indexed hole. 632 * 633 * Returns: the index of the hole if found, otherwise returns an index 634 * outside of the set specified (in which case 'return - index >= max_scan' 635 * will be true). 636 * 637 * radix_tree_next_hole may be called under rcu_read_lock. However, like 638 * radix_tree_gang_lookup, this will not atomically search a snapshot of the 639 * tree at a single point in time. For example, if a hole is created at index 640 * 5, then subsequently a hole is created at index 10, radix_tree_next_hole 641 * covering both indexes may return 10 if called under rcu_read_lock. 642 */ 643 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 644 unsigned long index, unsigned long max_scan) 645 { 646 unsigned long i; 647 648 for (i = 0; i < max_scan; i++) { 649 if (!radix_tree_lookup(root, index)) 650 break; 651 index++; 652 if (index == 0) 653 break; 654 } 655 656 return index; 657 } 658 EXPORT_SYMBOL(radix_tree_next_hole); 659 660 static unsigned int 661 __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 662 unsigned int max_items, unsigned long *next_index) 663 { 664 unsigned int nr_found = 0; 665 unsigned int shift, height; 666 unsigned long i; 667 668 height = slot->height; 669 if (height == 0) 670 goto out; 671 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 672 673 for ( ; height > 1; height--) { 674 i = (index >> shift) & RADIX_TREE_MAP_MASK; 675 for (;;) { 676 if (slot->slots[i] != NULL) 677 break; 678 index &= ~((1UL << shift) - 1); 679 index += 1UL << shift; 680 if (index == 0) 681 goto out; /* 32-bit wraparound */ 682 i++; 683 if (i == RADIX_TREE_MAP_SIZE) 684 goto out; 685 } 686 687 shift -= RADIX_TREE_MAP_SHIFT; 688 slot = rcu_dereference(slot->slots[i]); 689 if (slot == NULL) 690 goto out; 691 } 692 693 /* Bottom level: grab some items */ 694 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 695 struct radix_tree_node *node; 696 index++; 697 node = slot->slots[i]; 698 if (node) { 699 results[nr_found++] = rcu_dereference(node); 700 if (nr_found == max_items) 701 goto out; 702 } 703 } 704 out: 705 *next_index = index; 706 return nr_found; 707 } 708 709 /** 710 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 711 * @root: radix tree root 712 * @results: where the results of the lookup are placed 713 * @first_index: start the lookup from this key 714 * @max_items: place up to this many items at *results 715 * 716 * Performs an index-ascending scan of the tree for present items. Places 717 * them at *@results and returns the number of items which were placed at 718 * *@results. 719 * 720 * The implementation is naive. 721 * 722 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 723 * rcu_read_lock. In this case, rather than the returned results being 724 * an atomic snapshot of the tree at a single point in time, the semantics 725 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 726 * have been issued in individual locks, and results stored in 'results'. 727 */ 728 unsigned int 729 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 730 unsigned long first_index, unsigned int max_items) 731 { 732 unsigned long max_index; 733 struct radix_tree_node *node; 734 unsigned long cur_index = first_index; 735 unsigned int ret; 736 737 node = rcu_dereference(root->rnode); 738 if (!node) 739 return 0; 740 741 if (!radix_tree_is_indirect_ptr(node)) { 742 if (first_index > 0) 743 return 0; 744 results[0] = node; 745 return 1; 746 } 747 node = radix_tree_indirect_to_ptr(node); 748 749 max_index = radix_tree_maxindex(node->height); 750 751 ret = 0; 752 while (ret < max_items) { 753 unsigned int nr_found; 754 unsigned long next_index; /* Index of next search */ 755 756 if (cur_index > max_index) 757 break; 758 nr_found = __lookup(node, results + ret, cur_index, 759 max_items - ret, &next_index); 760 ret += nr_found; 761 if (next_index == 0) 762 break; 763 cur_index = next_index; 764 } 765 766 return ret; 767 } 768 EXPORT_SYMBOL(radix_tree_gang_lookup); 769 770 /* 771 * FIXME: the two tag_get()s here should use find_next_bit() instead of 772 * open-coding the search. 773 */ 774 static unsigned int 775 __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 776 unsigned int max_items, unsigned long *next_index, unsigned int tag) 777 { 778 unsigned int nr_found = 0; 779 unsigned int shift, height; 780 781 height = slot->height; 782 if (height == 0) 783 goto out; 784 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 785 786 while (height > 0) { 787 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 788 789 for (;;) { 790 if (tag_get(slot, tag, i)) 791 break; 792 index &= ~((1UL << shift) - 1); 793 index += 1UL << shift; 794 if (index == 0) 795 goto out; /* 32-bit wraparound */ 796 i++; 797 if (i == RADIX_TREE_MAP_SIZE) 798 goto out; 799 } 800 height--; 801 if (height == 0) { /* Bottom level: grab some items */ 802 unsigned long j = index & RADIX_TREE_MAP_MASK; 803 804 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 805 struct radix_tree_node *node; 806 index++; 807 if (!tag_get(slot, tag, j)) 808 continue; 809 node = slot->slots[j]; 810 /* 811 * Even though the tag was found set, we need to 812 * recheck that we have a non-NULL node, because 813 * if this lookup is lockless, it may have been 814 * subsequently deleted. 815 * 816 * Similar care must be taken in any place that 817 * lookup ->slots[x] without a lock (ie. can't 818 * rely on its value remaining the same). 819 */ 820 if (node) { 821 node = rcu_dereference(node); 822 results[nr_found++] = node; 823 if (nr_found == max_items) 824 goto out; 825 } 826 } 827 } 828 shift -= RADIX_TREE_MAP_SHIFT; 829 slot = rcu_dereference(slot->slots[i]); 830 if (slot == NULL) 831 break; 832 } 833 out: 834 *next_index = index; 835 return nr_found; 836 } 837 838 /** 839 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 840 * based on a tag 841 * @root: radix tree root 842 * @results: where the results of the lookup are placed 843 * @first_index: start the lookup from this key 844 * @max_items: place up to this many items at *results 845 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 846 * 847 * Performs an index-ascending scan of the tree for present items which 848 * have the tag indexed by @tag set. Places the items at *@results and 849 * returns the number of items which were placed at *@results. 850 */ 851 unsigned int 852 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 853 unsigned long first_index, unsigned int max_items, 854 unsigned int tag) 855 { 856 struct radix_tree_node *node; 857 unsigned long max_index; 858 unsigned long cur_index = first_index; 859 unsigned int ret; 860 861 /* check the root's tag bit */ 862 if (!root_tag_get(root, tag)) 863 return 0; 864 865 node = rcu_dereference(root->rnode); 866 if (!node) 867 return 0; 868 869 if (!radix_tree_is_indirect_ptr(node)) { 870 if (first_index > 0) 871 return 0; 872 results[0] = node; 873 return 1; 874 } 875 node = radix_tree_indirect_to_ptr(node); 876 877 max_index = radix_tree_maxindex(node->height); 878 879 ret = 0; 880 while (ret < max_items) { 881 unsigned int nr_found; 882 unsigned long next_index; /* Index of next search */ 883 884 if (cur_index > max_index) 885 break; 886 nr_found = __lookup_tag(node, results + ret, cur_index, 887 max_items - ret, &next_index, tag); 888 ret += nr_found; 889 if (next_index == 0) 890 break; 891 cur_index = next_index; 892 } 893 894 return ret; 895 } 896 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 897 898 /** 899 * radix_tree_shrink - shrink height of a radix tree to minimal 900 * @root radix tree root 901 */ 902 static inline void radix_tree_shrink(struct radix_tree_root *root) 903 { 904 /* try to shrink tree height */ 905 while (root->height > 0) { 906 struct radix_tree_node *to_free = root->rnode; 907 void *newptr; 908 909 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 910 to_free = radix_tree_indirect_to_ptr(to_free); 911 912 /* 913 * The candidate node has more than one child, or its child 914 * is not at the leftmost slot, we cannot shrink. 915 */ 916 if (to_free->count != 1) 917 break; 918 if (!to_free->slots[0]) 919 break; 920 921 /* 922 * We don't need rcu_assign_pointer(), since we are simply 923 * moving the node from one part of the tree to another. If 924 * it was safe to dereference the old pointer to it 925 * (to_free->slots[0]), it will be safe to dereference the new 926 * one (root->rnode). 927 */ 928 newptr = to_free->slots[0]; 929 if (root->height > 1) 930 newptr = radix_tree_ptr_to_indirect(newptr); 931 root->rnode = newptr; 932 root->height--; 933 /* must only free zeroed nodes into the slab */ 934 tag_clear(to_free, 0, 0); 935 tag_clear(to_free, 1, 0); 936 to_free->slots[0] = NULL; 937 to_free->count = 0; 938 radix_tree_node_free(to_free); 939 } 940 } 941 942 /** 943 * radix_tree_delete - delete an item from a radix tree 944 * @root: radix tree root 945 * @index: index key 946 * 947 * Remove the item at @index from the radix tree rooted at @root. 948 * 949 * Returns the address of the deleted item, or NULL if it was not present. 950 */ 951 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 952 { 953 /* 954 * The radix tree path needs to be one longer than the maximum path 955 * since the "list" is null terminated. 956 */ 957 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 958 struct radix_tree_node *slot = NULL; 959 struct radix_tree_node *to_free; 960 unsigned int height, shift; 961 int tag; 962 int offset; 963 964 height = root->height; 965 if (index > radix_tree_maxindex(height)) 966 goto out; 967 968 slot = root->rnode; 969 if (height == 0) { 970 root_tag_clear_all(root); 971 root->rnode = NULL; 972 goto out; 973 } 974 slot = radix_tree_indirect_to_ptr(slot); 975 976 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 977 pathp->node = NULL; 978 979 do { 980 if (slot == NULL) 981 goto out; 982 983 pathp++; 984 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 985 pathp->offset = offset; 986 pathp->node = slot; 987 slot = slot->slots[offset]; 988 shift -= RADIX_TREE_MAP_SHIFT; 989 height--; 990 } while (height > 0); 991 992 if (slot == NULL) 993 goto out; 994 995 /* 996 * Clear all tags associated with the just-deleted item 997 */ 998 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 999 if (tag_get(pathp->node, tag, pathp->offset)) 1000 radix_tree_tag_clear(root, index, tag); 1001 } 1002 1003 to_free = NULL; 1004 /* Now free the nodes we do not need anymore */ 1005 while (pathp->node) { 1006 pathp->node->slots[pathp->offset] = NULL; 1007 pathp->node->count--; 1008 /* 1009 * Queue the node for deferred freeing after the 1010 * last reference to it disappears (set NULL, above). 1011 */ 1012 if (to_free) 1013 radix_tree_node_free(to_free); 1014 1015 if (pathp->node->count) { 1016 if (pathp->node == 1017 radix_tree_indirect_to_ptr(root->rnode)) 1018 radix_tree_shrink(root); 1019 goto out; 1020 } 1021 1022 /* Node with zero slots in use so free it */ 1023 to_free = pathp->node; 1024 pathp--; 1025 1026 } 1027 root_tag_clear_all(root); 1028 root->height = 0; 1029 root->rnode = NULL; 1030 if (to_free) 1031 radix_tree_node_free(to_free); 1032 1033 out: 1034 return slot; 1035 } 1036 EXPORT_SYMBOL(radix_tree_delete); 1037 1038 /** 1039 * radix_tree_tagged - test whether any items in the tree are tagged 1040 * @root: radix tree root 1041 * @tag: tag to test 1042 */ 1043 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1044 { 1045 return root_tag_get(root, tag); 1046 } 1047 EXPORT_SYMBOL(radix_tree_tagged); 1048 1049 static void 1050 radix_tree_node_ctor(struct kmem_cache *cachep, void *node) 1051 { 1052 memset(node, 0, sizeof(struct radix_tree_node)); 1053 } 1054 1055 static __init unsigned long __maxindex(unsigned int height) 1056 { 1057 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1058 int shift = RADIX_TREE_INDEX_BITS - width; 1059 1060 if (shift < 0) 1061 return ~0UL; 1062 if (shift >= BITS_PER_LONG) 1063 return 0UL; 1064 return ~0UL >> shift; 1065 } 1066 1067 static __init void radix_tree_init_maxindex(void) 1068 { 1069 unsigned int i; 1070 1071 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1072 height_to_maxindex[i] = __maxindex(i); 1073 } 1074 1075 static int radix_tree_callback(struct notifier_block *nfb, 1076 unsigned long action, 1077 void *hcpu) 1078 { 1079 int cpu = (long)hcpu; 1080 struct radix_tree_preload *rtp; 1081 1082 /* Free per-cpu pool of perloaded nodes */ 1083 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1084 rtp = &per_cpu(radix_tree_preloads, cpu); 1085 while (rtp->nr) { 1086 kmem_cache_free(radix_tree_node_cachep, 1087 rtp->nodes[rtp->nr-1]); 1088 rtp->nodes[rtp->nr-1] = NULL; 1089 rtp->nr--; 1090 } 1091 } 1092 return NOTIFY_OK; 1093 } 1094 1095 void __init radix_tree_init(void) 1096 { 1097 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1098 sizeof(struct radix_tree_node), 0, 1099 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1100 radix_tree_node_ctor); 1101 radix_tree_init_maxindex(); 1102 hotcpu_notifier(radix_tree_callback, 0); 1103 } 1104