1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 /* 92 * This assumes that the caller has performed appropriate preallocation, and 93 * that the caller has pinned this thread of control to the current CPU. 94 */ 95 static struct radix_tree_node * 96 radix_tree_node_alloc(struct radix_tree_root *root) 97 { 98 struct radix_tree_node *ret; 99 gfp_t gfp_mask = root_gfp_mask(root); 100 101 ret = kmem_cache_alloc(radix_tree_node_cachep, 102 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); 103 if (ret == NULL && !(gfp_mask & __GFP_WAIT)) { 104 struct radix_tree_preload *rtp; 105 106 rtp = &__get_cpu_var(radix_tree_preloads); 107 if (rtp->nr) { 108 ret = rtp->nodes[rtp->nr - 1]; 109 rtp->nodes[rtp->nr - 1] = NULL; 110 rtp->nr--; 111 } 112 } 113 BUG_ON(radix_tree_is_indirect_ptr(ret)); 114 return ret; 115 } 116 117 static void radix_tree_node_rcu_free(struct rcu_head *head) 118 { 119 struct radix_tree_node *node = 120 container_of(head, struct radix_tree_node, rcu_head); 121 kmem_cache_free(radix_tree_node_cachep, node); 122 } 123 124 static inline void 125 radix_tree_node_free(struct radix_tree_node *node) 126 { 127 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 128 } 129 130 /* 131 * Load up this CPU's radix_tree_node buffer with sufficient objects to 132 * ensure that the addition of a single element in the tree cannot fail. On 133 * success, return zero, with preemption disabled. On error, return -ENOMEM 134 * with preemption not disabled. 135 */ 136 int radix_tree_preload(gfp_t gfp_mask) 137 { 138 struct radix_tree_preload *rtp; 139 struct radix_tree_node *node; 140 int ret = -ENOMEM; 141 142 preempt_disable(); 143 rtp = &__get_cpu_var(radix_tree_preloads); 144 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 145 preempt_enable(); 146 node = kmem_cache_alloc(radix_tree_node_cachep, 147 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); 148 if (node == NULL) 149 goto out; 150 preempt_disable(); 151 rtp = &__get_cpu_var(radix_tree_preloads); 152 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 153 rtp->nodes[rtp->nr++] = node; 154 else 155 kmem_cache_free(radix_tree_node_cachep, node); 156 } 157 ret = 0; 158 out: 159 return ret; 160 } 161 EXPORT_SYMBOL(radix_tree_preload); 162 163 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 164 int offset) 165 { 166 __set_bit(offset, node->tags[tag]); 167 } 168 169 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 170 int offset) 171 { 172 __clear_bit(offset, node->tags[tag]); 173 } 174 175 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 176 int offset) 177 { 178 return test_bit(offset, node->tags[tag]); 179 } 180 181 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 182 { 183 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 184 } 185 186 187 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 188 { 189 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 190 } 191 192 static inline void root_tag_clear_all(struct radix_tree_root *root) 193 { 194 root->gfp_mask &= __GFP_BITS_MASK; 195 } 196 197 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 198 { 199 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 200 } 201 202 /* 203 * Returns 1 if any slot in the node has this tag set. 204 * Otherwise returns 0. 205 */ 206 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 207 { 208 int idx; 209 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 210 if (node->tags[tag][idx]) 211 return 1; 212 } 213 return 0; 214 } 215 216 /* 217 * Return the maximum key which can be store into a 218 * radix tree with height HEIGHT. 219 */ 220 static inline unsigned long radix_tree_maxindex(unsigned int height) 221 { 222 return height_to_maxindex[height]; 223 } 224 225 /* 226 * Extend a radix tree so it can store key @index. 227 */ 228 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 229 { 230 struct radix_tree_node *node; 231 unsigned int height; 232 int tag; 233 234 /* Figure out what the height should be. */ 235 height = root->height + 1; 236 while (index > radix_tree_maxindex(height)) 237 height++; 238 239 if (root->rnode == NULL) { 240 root->height = height; 241 goto out; 242 } 243 244 do { 245 unsigned int newheight; 246 if (!(node = radix_tree_node_alloc(root))) 247 return -ENOMEM; 248 249 /* Increase the height. */ 250 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 251 252 /* Propagate the aggregated tag info into the new root */ 253 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 254 if (root_tag_get(root, tag)) 255 tag_set(node, tag, 0); 256 } 257 258 newheight = root->height+1; 259 node->height = newheight; 260 node->count = 1; 261 node = radix_tree_ptr_to_indirect(node); 262 rcu_assign_pointer(root->rnode, node); 263 root->height = newheight; 264 } while (height > root->height); 265 out: 266 return 0; 267 } 268 269 /** 270 * radix_tree_insert - insert into a radix tree 271 * @root: radix tree root 272 * @index: index key 273 * @item: item to insert 274 * 275 * Insert an item into the radix tree at position @index. 276 */ 277 int radix_tree_insert(struct radix_tree_root *root, 278 unsigned long index, void *item) 279 { 280 struct radix_tree_node *node = NULL, *slot; 281 unsigned int height, shift; 282 int offset; 283 int error; 284 285 BUG_ON(radix_tree_is_indirect_ptr(item)); 286 287 /* Make sure the tree is high enough. */ 288 if (index > radix_tree_maxindex(root->height)) { 289 error = radix_tree_extend(root, index); 290 if (error) 291 return error; 292 } 293 294 slot = radix_tree_indirect_to_ptr(root->rnode); 295 296 height = root->height; 297 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 298 299 offset = 0; /* uninitialised var warning */ 300 while (height > 0) { 301 if (slot == NULL) { 302 /* Have to add a child node. */ 303 if (!(slot = radix_tree_node_alloc(root))) 304 return -ENOMEM; 305 slot->height = height; 306 if (node) { 307 rcu_assign_pointer(node->slots[offset], slot); 308 node->count++; 309 } else 310 rcu_assign_pointer(root->rnode, 311 radix_tree_ptr_to_indirect(slot)); 312 } 313 314 /* Go a level down */ 315 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 316 node = slot; 317 slot = node->slots[offset]; 318 shift -= RADIX_TREE_MAP_SHIFT; 319 height--; 320 } 321 322 if (slot != NULL) 323 return -EEXIST; 324 325 if (node) { 326 node->count++; 327 rcu_assign_pointer(node->slots[offset], item); 328 BUG_ON(tag_get(node, 0, offset)); 329 BUG_ON(tag_get(node, 1, offset)); 330 } else { 331 rcu_assign_pointer(root->rnode, item); 332 BUG_ON(root_tag_get(root, 0)); 333 BUG_ON(root_tag_get(root, 1)); 334 } 335 336 return 0; 337 } 338 EXPORT_SYMBOL(radix_tree_insert); 339 340 /** 341 * radix_tree_lookup_slot - lookup a slot in a radix tree 342 * @root: radix tree root 343 * @index: index key 344 * 345 * Returns: the slot corresponding to the position @index in the 346 * radix tree @root. This is useful for update-if-exists operations. 347 * 348 * This function cannot be called under rcu_read_lock, it must be 349 * excluded from writers, as must the returned slot for subsequent 350 * use by radix_tree_deref_slot() and radix_tree_replace slot. 351 * Caller must hold tree write locked across slot lookup and 352 * replace. 353 */ 354 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 355 { 356 unsigned int height, shift; 357 struct radix_tree_node *node, **slot; 358 359 node = root->rnode; 360 if (node == NULL) 361 return NULL; 362 363 if (!radix_tree_is_indirect_ptr(node)) { 364 if (index > 0) 365 return NULL; 366 return (void **)&root->rnode; 367 } 368 node = radix_tree_indirect_to_ptr(node); 369 370 height = node->height; 371 if (index > radix_tree_maxindex(height)) 372 return NULL; 373 374 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 375 376 do { 377 slot = (struct radix_tree_node **) 378 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 379 node = *slot; 380 if (node == NULL) 381 return NULL; 382 383 shift -= RADIX_TREE_MAP_SHIFT; 384 height--; 385 } while (height > 0); 386 387 return (void **)slot; 388 } 389 EXPORT_SYMBOL(radix_tree_lookup_slot); 390 391 /** 392 * radix_tree_lookup - perform lookup operation on a radix tree 393 * @root: radix tree root 394 * @index: index key 395 * 396 * Lookup the item at the position @index in the radix tree @root. 397 * 398 * This function can be called under rcu_read_lock, however the caller 399 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 400 * them safely). No RCU barriers are required to access or modify the 401 * returned item, however. 402 */ 403 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 404 { 405 unsigned int height, shift; 406 struct radix_tree_node *node, **slot; 407 408 node = rcu_dereference(root->rnode); 409 if (node == NULL) 410 return NULL; 411 412 if (!radix_tree_is_indirect_ptr(node)) { 413 if (index > 0) 414 return NULL; 415 return node; 416 } 417 node = radix_tree_indirect_to_ptr(node); 418 419 height = node->height; 420 if (index > radix_tree_maxindex(height)) 421 return NULL; 422 423 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 424 425 do { 426 slot = (struct radix_tree_node **) 427 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 428 node = rcu_dereference(*slot); 429 if (node == NULL) 430 return NULL; 431 432 shift -= RADIX_TREE_MAP_SHIFT; 433 height--; 434 } while (height > 0); 435 436 return node; 437 } 438 EXPORT_SYMBOL(radix_tree_lookup); 439 440 /** 441 * radix_tree_tag_set - set a tag on a radix tree node 442 * @root: radix tree root 443 * @index: index key 444 * @tag: tag index 445 * 446 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 447 * corresponding to @index in the radix tree. From 448 * the root all the way down to the leaf node. 449 * 450 * Returns the address of the tagged item. Setting a tag on a not-present 451 * item is a bug. 452 */ 453 void *radix_tree_tag_set(struct radix_tree_root *root, 454 unsigned long index, unsigned int tag) 455 { 456 unsigned int height, shift; 457 struct radix_tree_node *slot; 458 459 height = root->height; 460 BUG_ON(index > radix_tree_maxindex(height)); 461 462 slot = radix_tree_indirect_to_ptr(root->rnode); 463 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 464 465 while (height > 0) { 466 int offset; 467 468 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 469 if (!tag_get(slot, tag, offset)) 470 tag_set(slot, tag, offset); 471 slot = slot->slots[offset]; 472 BUG_ON(slot == NULL); 473 shift -= RADIX_TREE_MAP_SHIFT; 474 height--; 475 } 476 477 /* set the root's tag bit */ 478 if (slot && !root_tag_get(root, tag)) 479 root_tag_set(root, tag); 480 481 return slot; 482 } 483 EXPORT_SYMBOL(radix_tree_tag_set); 484 485 /** 486 * radix_tree_tag_clear - clear a tag on a radix tree node 487 * @root: radix tree root 488 * @index: index key 489 * @tag: tag index 490 * 491 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 492 * corresponding to @index in the radix tree. If 493 * this causes the leaf node to have no tags set then clear the tag in the 494 * next-to-leaf node, etc. 495 * 496 * Returns the address of the tagged item on success, else NULL. ie: 497 * has the same return value and semantics as radix_tree_lookup(). 498 */ 499 void *radix_tree_tag_clear(struct radix_tree_root *root, 500 unsigned long index, unsigned int tag) 501 { 502 /* 503 * The radix tree path needs to be one longer than the maximum path 504 * since the "list" is null terminated. 505 */ 506 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 507 struct radix_tree_node *slot = NULL; 508 unsigned int height, shift; 509 510 height = root->height; 511 if (index > radix_tree_maxindex(height)) 512 goto out; 513 514 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 515 pathp->node = NULL; 516 slot = radix_tree_indirect_to_ptr(root->rnode); 517 518 while (height > 0) { 519 int offset; 520 521 if (slot == NULL) 522 goto out; 523 524 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 525 pathp[1].offset = offset; 526 pathp[1].node = slot; 527 slot = slot->slots[offset]; 528 pathp++; 529 shift -= RADIX_TREE_MAP_SHIFT; 530 height--; 531 } 532 533 if (slot == NULL) 534 goto out; 535 536 while (pathp->node) { 537 if (!tag_get(pathp->node, tag, pathp->offset)) 538 goto out; 539 tag_clear(pathp->node, tag, pathp->offset); 540 if (any_tag_set(pathp->node, tag)) 541 goto out; 542 pathp--; 543 } 544 545 /* clear the root's tag bit */ 546 if (root_tag_get(root, tag)) 547 root_tag_clear(root, tag); 548 549 out: 550 return slot; 551 } 552 EXPORT_SYMBOL(radix_tree_tag_clear); 553 554 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 555 /** 556 * radix_tree_tag_get - get a tag on a radix tree node 557 * @root: radix tree root 558 * @index: index key 559 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 560 * 561 * Return values: 562 * 563 * 0: tag not present or not set 564 * 1: tag set 565 */ 566 int radix_tree_tag_get(struct radix_tree_root *root, 567 unsigned long index, unsigned int tag) 568 { 569 unsigned int height, shift; 570 struct radix_tree_node *node; 571 int saw_unset_tag = 0; 572 573 /* check the root's tag bit */ 574 if (!root_tag_get(root, tag)) 575 return 0; 576 577 node = rcu_dereference(root->rnode); 578 if (node == NULL) 579 return 0; 580 581 if (!radix_tree_is_indirect_ptr(node)) 582 return (index == 0); 583 node = radix_tree_indirect_to_ptr(node); 584 585 height = node->height; 586 if (index > radix_tree_maxindex(height)) 587 return 0; 588 589 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 590 591 for ( ; ; ) { 592 int offset; 593 594 if (node == NULL) 595 return 0; 596 597 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 598 599 /* 600 * This is just a debug check. Later, we can bale as soon as 601 * we see an unset tag. 602 */ 603 if (!tag_get(node, tag, offset)) 604 saw_unset_tag = 1; 605 if (height == 1) { 606 int ret = tag_get(node, tag, offset); 607 608 BUG_ON(ret && saw_unset_tag); 609 return !!ret; 610 } 611 node = rcu_dereference(node->slots[offset]); 612 shift -= RADIX_TREE_MAP_SHIFT; 613 height--; 614 } 615 } 616 EXPORT_SYMBOL(radix_tree_tag_get); 617 #endif 618 619 /** 620 * radix_tree_next_hole - find the next hole (not-present entry) 621 * @root: tree root 622 * @index: index key 623 * @max_scan: maximum range to search 624 * 625 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 626 * indexed hole. 627 * 628 * Returns: the index of the hole if found, otherwise returns an index 629 * outside of the set specified (in which case 'return - index >= max_scan' 630 * will be true). 631 * 632 * radix_tree_next_hole may be called under rcu_read_lock. However, like 633 * radix_tree_gang_lookup, this will not atomically search a snapshot of the 634 * tree at a single point in time. For example, if a hole is created at index 635 * 5, then subsequently a hole is created at index 10, radix_tree_next_hole 636 * covering both indexes may return 10 if called under rcu_read_lock. 637 */ 638 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 639 unsigned long index, unsigned long max_scan) 640 { 641 unsigned long i; 642 643 for (i = 0; i < max_scan; i++) { 644 if (!radix_tree_lookup(root, index)) 645 break; 646 index++; 647 if (index == 0) 648 break; 649 } 650 651 return index; 652 } 653 EXPORT_SYMBOL(radix_tree_next_hole); 654 655 static unsigned int 656 __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 657 unsigned int max_items, unsigned long *next_index) 658 { 659 unsigned int nr_found = 0; 660 unsigned int shift, height; 661 unsigned long i; 662 663 height = slot->height; 664 if (height == 0) 665 goto out; 666 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 667 668 for ( ; height > 1; height--) { 669 i = (index >> shift) & RADIX_TREE_MAP_MASK; 670 for (;;) { 671 if (slot->slots[i] != NULL) 672 break; 673 index &= ~((1UL << shift) - 1); 674 index += 1UL << shift; 675 if (index == 0) 676 goto out; /* 32-bit wraparound */ 677 i++; 678 if (i == RADIX_TREE_MAP_SIZE) 679 goto out; 680 } 681 682 shift -= RADIX_TREE_MAP_SHIFT; 683 slot = rcu_dereference(slot->slots[i]); 684 if (slot == NULL) 685 goto out; 686 } 687 688 /* Bottom level: grab some items */ 689 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 690 struct radix_tree_node *node; 691 index++; 692 node = slot->slots[i]; 693 if (node) { 694 results[nr_found++] = rcu_dereference(node); 695 if (nr_found == max_items) 696 goto out; 697 } 698 } 699 out: 700 *next_index = index; 701 return nr_found; 702 } 703 704 /** 705 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 706 * @root: radix tree root 707 * @results: where the results of the lookup are placed 708 * @first_index: start the lookup from this key 709 * @max_items: place up to this many items at *results 710 * 711 * Performs an index-ascending scan of the tree for present items. Places 712 * them at *@results and returns the number of items which were placed at 713 * *@results. 714 * 715 * The implementation is naive. 716 * 717 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 718 * rcu_read_lock. In this case, rather than the returned results being 719 * an atomic snapshot of the tree at a single point in time, the semantics 720 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 721 * have been issued in individual locks, and results stored in 'results'. 722 */ 723 unsigned int 724 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 725 unsigned long first_index, unsigned int max_items) 726 { 727 unsigned long max_index; 728 struct radix_tree_node *node; 729 unsigned long cur_index = first_index; 730 unsigned int ret; 731 732 node = rcu_dereference(root->rnode); 733 if (!node) 734 return 0; 735 736 if (!radix_tree_is_indirect_ptr(node)) { 737 if (first_index > 0) 738 return 0; 739 results[0] = node; 740 return 1; 741 } 742 node = radix_tree_indirect_to_ptr(node); 743 744 max_index = radix_tree_maxindex(node->height); 745 746 ret = 0; 747 while (ret < max_items) { 748 unsigned int nr_found; 749 unsigned long next_index; /* Index of next search */ 750 751 if (cur_index > max_index) 752 break; 753 nr_found = __lookup(node, results + ret, cur_index, 754 max_items - ret, &next_index); 755 ret += nr_found; 756 if (next_index == 0) 757 break; 758 cur_index = next_index; 759 } 760 761 return ret; 762 } 763 EXPORT_SYMBOL(radix_tree_gang_lookup); 764 765 /* 766 * FIXME: the two tag_get()s here should use find_next_bit() instead of 767 * open-coding the search. 768 */ 769 static unsigned int 770 __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 771 unsigned int max_items, unsigned long *next_index, unsigned int tag) 772 { 773 unsigned int nr_found = 0; 774 unsigned int shift, height; 775 776 height = slot->height; 777 if (height == 0) 778 goto out; 779 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 780 781 while (height > 0) { 782 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 783 784 for (;;) { 785 if (tag_get(slot, tag, i)) 786 break; 787 index &= ~((1UL << shift) - 1); 788 index += 1UL << shift; 789 if (index == 0) 790 goto out; /* 32-bit wraparound */ 791 i++; 792 if (i == RADIX_TREE_MAP_SIZE) 793 goto out; 794 } 795 height--; 796 if (height == 0) { /* Bottom level: grab some items */ 797 unsigned long j = index & RADIX_TREE_MAP_MASK; 798 799 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 800 struct radix_tree_node *node; 801 index++; 802 if (!tag_get(slot, tag, j)) 803 continue; 804 node = slot->slots[j]; 805 /* 806 * Even though the tag was found set, we need to 807 * recheck that we have a non-NULL node, because 808 * if this lookup is lockless, it may have been 809 * subsequently deleted. 810 * 811 * Similar care must be taken in any place that 812 * lookup ->slots[x] without a lock (ie. can't 813 * rely on its value remaining the same). 814 */ 815 if (node) { 816 node = rcu_dereference(node); 817 results[nr_found++] = node; 818 if (nr_found == max_items) 819 goto out; 820 } 821 } 822 } 823 shift -= RADIX_TREE_MAP_SHIFT; 824 slot = rcu_dereference(slot->slots[i]); 825 if (slot == NULL) 826 break; 827 } 828 out: 829 *next_index = index; 830 return nr_found; 831 } 832 833 /** 834 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 835 * based on a tag 836 * @root: radix tree root 837 * @results: where the results of the lookup are placed 838 * @first_index: start the lookup from this key 839 * @max_items: place up to this many items at *results 840 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 841 * 842 * Performs an index-ascending scan of the tree for present items which 843 * have the tag indexed by @tag set. Places the items at *@results and 844 * returns the number of items which were placed at *@results. 845 */ 846 unsigned int 847 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 848 unsigned long first_index, unsigned int max_items, 849 unsigned int tag) 850 { 851 struct radix_tree_node *node; 852 unsigned long max_index; 853 unsigned long cur_index = first_index; 854 unsigned int ret; 855 856 /* check the root's tag bit */ 857 if (!root_tag_get(root, tag)) 858 return 0; 859 860 node = rcu_dereference(root->rnode); 861 if (!node) 862 return 0; 863 864 if (!radix_tree_is_indirect_ptr(node)) { 865 if (first_index > 0) 866 return 0; 867 results[0] = node; 868 return 1; 869 } 870 node = radix_tree_indirect_to_ptr(node); 871 872 max_index = radix_tree_maxindex(node->height); 873 874 ret = 0; 875 while (ret < max_items) { 876 unsigned int nr_found; 877 unsigned long next_index; /* Index of next search */ 878 879 if (cur_index > max_index) 880 break; 881 nr_found = __lookup_tag(node, results + ret, cur_index, 882 max_items - ret, &next_index, tag); 883 ret += nr_found; 884 if (next_index == 0) 885 break; 886 cur_index = next_index; 887 } 888 889 return ret; 890 } 891 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 892 893 /** 894 * radix_tree_shrink - shrink height of a radix tree to minimal 895 * @root radix tree root 896 */ 897 static inline void radix_tree_shrink(struct radix_tree_root *root) 898 { 899 /* try to shrink tree height */ 900 while (root->height > 0) { 901 struct radix_tree_node *to_free = root->rnode; 902 void *newptr; 903 904 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 905 to_free = radix_tree_indirect_to_ptr(to_free); 906 907 /* 908 * The candidate node has more than one child, or its child 909 * is not at the leftmost slot, we cannot shrink. 910 */ 911 if (to_free->count != 1) 912 break; 913 if (!to_free->slots[0]) 914 break; 915 916 /* 917 * We don't need rcu_assign_pointer(), since we are simply 918 * moving the node from one part of the tree to another. If 919 * it was safe to dereference the old pointer to it 920 * (to_free->slots[0]), it will be safe to dereference the new 921 * one (root->rnode). 922 */ 923 newptr = to_free->slots[0]; 924 if (root->height > 1) 925 newptr = radix_tree_ptr_to_indirect(newptr); 926 root->rnode = newptr; 927 root->height--; 928 /* must only free zeroed nodes into the slab */ 929 tag_clear(to_free, 0, 0); 930 tag_clear(to_free, 1, 0); 931 to_free->slots[0] = NULL; 932 to_free->count = 0; 933 radix_tree_node_free(to_free); 934 } 935 } 936 937 /** 938 * radix_tree_delete - delete an item from a radix tree 939 * @root: radix tree root 940 * @index: index key 941 * 942 * Remove the item at @index from the radix tree rooted at @root. 943 * 944 * Returns the address of the deleted item, or NULL if it was not present. 945 */ 946 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 947 { 948 /* 949 * The radix tree path needs to be one longer than the maximum path 950 * since the "list" is null terminated. 951 */ 952 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 953 struct radix_tree_node *slot = NULL; 954 struct radix_tree_node *to_free; 955 unsigned int height, shift; 956 int tag; 957 int offset; 958 959 height = root->height; 960 if (index > radix_tree_maxindex(height)) 961 goto out; 962 963 slot = root->rnode; 964 if (height == 0) { 965 root_tag_clear_all(root); 966 root->rnode = NULL; 967 goto out; 968 } 969 slot = radix_tree_indirect_to_ptr(slot); 970 971 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 972 pathp->node = NULL; 973 974 do { 975 if (slot == NULL) 976 goto out; 977 978 pathp++; 979 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 980 pathp->offset = offset; 981 pathp->node = slot; 982 slot = slot->slots[offset]; 983 shift -= RADIX_TREE_MAP_SHIFT; 984 height--; 985 } while (height > 0); 986 987 if (slot == NULL) 988 goto out; 989 990 /* 991 * Clear all tags associated with the just-deleted item 992 */ 993 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 994 if (tag_get(pathp->node, tag, pathp->offset)) 995 radix_tree_tag_clear(root, index, tag); 996 } 997 998 to_free = NULL; 999 /* Now free the nodes we do not need anymore */ 1000 while (pathp->node) { 1001 pathp->node->slots[pathp->offset] = NULL; 1002 pathp->node->count--; 1003 /* 1004 * Queue the node for deferred freeing after the 1005 * last reference to it disappears (set NULL, above). 1006 */ 1007 if (to_free) 1008 radix_tree_node_free(to_free); 1009 1010 if (pathp->node->count) { 1011 if (pathp->node == 1012 radix_tree_indirect_to_ptr(root->rnode)) 1013 radix_tree_shrink(root); 1014 goto out; 1015 } 1016 1017 /* Node with zero slots in use so free it */ 1018 to_free = pathp->node; 1019 pathp--; 1020 1021 } 1022 root_tag_clear_all(root); 1023 root->height = 0; 1024 root->rnode = NULL; 1025 if (to_free) 1026 radix_tree_node_free(to_free); 1027 1028 out: 1029 return slot; 1030 } 1031 EXPORT_SYMBOL(radix_tree_delete); 1032 1033 /** 1034 * radix_tree_tagged - test whether any items in the tree are tagged 1035 * @root: radix tree root 1036 * @tag: tag to test 1037 */ 1038 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1039 { 1040 return root_tag_get(root, tag); 1041 } 1042 EXPORT_SYMBOL(radix_tree_tagged); 1043 1044 static void 1045 radix_tree_node_ctor(struct kmem_cache *cachep, void *node) 1046 { 1047 memset(node, 0, sizeof(struct radix_tree_node)); 1048 } 1049 1050 static __init unsigned long __maxindex(unsigned int height) 1051 { 1052 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1053 int shift = RADIX_TREE_INDEX_BITS - width; 1054 1055 if (shift < 0) 1056 return ~0UL; 1057 if (shift >= BITS_PER_LONG) 1058 return 0UL; 1059 return ~0UL >> shift; 1060 } 1061 1062 static __init void radix_tree_init_maxindex(void) 1063 { 1064 unsigned int i; 1065 1066 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1067 height_to_maxindex[i] = __maxindex(i); 1068 } 1069 1070 static int radix_tree_callback(struct notifier_block *nfb, 1071 unsigned long action, 1072 void *hcpu) 1073 { 1074 int cpu = (long)hcpu; 1075 struct radix_tree_preload *rtp; 1076 1077 /* Free per-cpu pool of perloaded nodes */ 1078 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1079 rtp = &per_cpu(radix_tree_preloads, cpu); 1080 while (rtp->nr) { 1081 kmem_cache_free(radix_tree_node_cachep, 1082 rtp->nodes[rtp->nr-1]); 1083 rtp->nodes[rtp->nr-1] = NULL; 1084 rtp->nr--; 1085 } 1086 } 1087 return NOTIFY_OK; 1088 } 1089 1090 void __init radix_tree_init(void) 1091 { 1092 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1093 sizeof(struct radix_tree_node), 0, 1094 SLAB_PANIC, radix_tree_node_ctor); 1095 radix_tree_init_maxindex(); 1096 hotcpu_notifier(radix_tree_callback, 0); 1097 } 1098