1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 /* 92 * This assumes that the caller has performed appropriate preallocation, and 93 * that the caller has pinned this thread of control to the current CPU. 94 */ 95 static struct radix_tree_node * 96 radix_tree_node_alloc(struct radix_tree_root *root) 97 { 98 struct radix_tree_node *ret = NULL; 99 gfp_t gfp_mask = root_gfp_mask(root); 100 101 if (!(gfp_mask & __GFP_WAIT)) { 102 struct radix_tree_preload *rtp; 103 104 /* 105 * Provided the caller has preloaded here, we will always 106 * succeed in getting a node here (and never reach 107 * kmem_cache_alloc) 108 */ 109 rtp = &__get_cpu_var(radix_tree_preloads); 110 if (rtp->nr) { 111 ret = rtp->nodes[rtp->nr - 1]; 112 rtp->nodes[rtp->nr - 1] = NULL; 113 rtp->nr--; 114 } 115 } 116 if (ret == NULL) 117 ret = kmem_cache_alloc(radix_tree_node_cachep, 118 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); 119 120 BUG_ON(radix_tree_is_indirect_ptr(ret)); 121 return ret; 122 } 123 124 static void radix_tree_node_rcu_free(struct rcu_head *head) 125 { 126 struct radix_tree_node *node = 127 container_of(head, struct radix_tree_node, rcu_head); 128 kmem_cache_free(radix_tree_node_cachep, node); 129 } 130 131 static inline void 132 radix_tree_node_free(struct radix_tree_node *node) 133 { 134 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 135 } 136 137 /* 138 * Load up this CPU's radix_tree_node buffer with sufficient objects to 139 * ensure that the addition of a single element in the tree cannot fail. On 140 * success, return zero, with preemption disabled. On error, return -ENOMEM 141 * with preemption not disabled. 142 */ 143 int radix_tree_preload(gfp_t gfp_mask) 144 { 145 struct radix_tree_preload *rtp; 146 struct radix_tree_node *node; 147 int ret = -ENOMEM; 148 149 preempt_disable(); 150 rtp = &__get_cpu_var(radix_tree_preloads); 151 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 152 preempt_enable(); 153 node = kmem_cache_alloc(radix_tree_node_cachep, 154 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); 155 if (node == NULL) 156 goto out; 157 preempt_disable(); 158 rtp = &__get_cpu_var(radix_tree_preloads); 159 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 160 rtp->nodes[rtp->nr++] = node; 161 else 162 kmem_cache_free(radix_tree_node_cachep, node); 163 } 164 ret = 0; 165 out: 166 return ret; 167 } 168 EXPORT_SYMBOL(radix_tree_preload); 169 170 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 171 int offset) 172 { 173 __set_bit(offset, node->tags[tag]); 174 } 175 176 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 177 int offset) 178 { 179 __clear_bit(offset, node->tags[tag]); 180 } 181 182 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 183 int offset) 184 { 185 return test_bit(offset, node->tags[tag]); 186 } 187 188 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 189 { 190 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 191 } 192 193 194 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 195 { 196 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 197 } 198 199 static inline void root_tag_clear_all(struct radix_tree_root *root) 200 { 201 root->gfp_mask &= __GFP_BITS_MASK; 202 } 203 204 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 205 { 206 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 207 } 208 209 /* 210 * Returns 1 if any slot in the node has this tag set. 211 * Otherwise returns 0. 212 */ 213 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 214 { 215 int idx; 216 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 217 if (node->tags[tag][idx]) 218 return 1; 219 } 220 return 0; 221 } 222 223 /* 224 * Return the maximum key which can be store into a 225 * radix tree with height HEIGHT. 226 */ 227 static inline unsigned long radix_tree_maxindex(unsigned int height) 228 { 229 return height_to_maxindex[height]; 230 } 231 232 /* 233 * Extend a radix tree so it can store key @index. 234 */ 235 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 236 { 237 struct radix_tree_node *node; 238 unsigned int height; 239 int tag; 240 241 /* Figure out what the height should be. */ 242 height = root->height + 1; 243 while (index > radix_tree_maxindex(height)) 244 height++; 245 246 if (root->rnode == NULL) { 247 root->height = height; 248 goto out; 249 } 250 251 do { 252 unsigned int newheight; 253 if (!(node = radix_tree_node_alloc(root))) 254 return -ENOMEM; 255 256 /* Increase the height. */ 257 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 258 259 /* Propagate the aggregated tag info into the new root */ 260 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 261 if (root_tag_get(root, tag)) 262 tag_set(node, tag, 0); 263 } 264 265 newheight = root->height+1; 266 node->height = newheight; 267 node->count = 1; 268 node = radix_tree_ptr_to_indirect(node); 269 rcu_assign_pointer(root->rnode, node); 270 root->height = newheight; 271 } while (height > root->height); 272 out: 273 return 0; 274 } 275 276 /** 277 * radix_tree_insert - insert into a radix tree 278 * @root: radix tree root 279 * @index: index key 280 * @item: item to insert 281 * 282 * Insert an item into the radix tree at position @index. 283 */ 284 int radix_tree_insert(struct radix_tree_root *root, 285 unsigned long index, void *item) 286 { 287 struct radix_tree_node *node = NULL, *slot; 288 unsigned int height, shift; 289 int offset; 290 int error; 291 292 BUG_ON(radix_tree_is_indirect_ptr(item)); 293 294 /* Make sure the tree is high enough. */ 295 if (index > radix_tree_maxindex(root->height)) { 296 error = radix_tree_extend(root, index); 297 if (error) 298 return error; 299 } 300 301 slot = radix_tree_indirect_to_ptr(root->rnode); 302 303 height = root->height; 304 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 305 306 offset = 0; /* uninitialised var warning */ 307 while (height > 0) { 308 if (slot == NULL) { 309 /* Have to add a child node. */ 310 if (!(slot = radix_tree_node_alloc(root))) 311 return -ENOMEM; 312 slot->height = height; 313 if (node) { 314 rcu_assign_pointer(node->slots[offset], slot); 315 node->count++; 316 } else 317 rcu_assign_pointer(root->rnode, 318 radix_tree_ptr_to_indirect(slot)); 319 } 320 321 /* Go a level down */ 322 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 323 node = slot; 324 slot = node->slots[offset]; 325 shift -= RADIX_TREE_MAP_SHIFT; 326 height--; 327 } 328 329 if (slot != NULL) 330 return -EEXIST; 331 332 if (node) { 333 node->count++; 334 rcu_assign_pointer(node->slots[offset], item); 335 BUG_ON(tag_get(node, 0, offset)); 336 BUG_ON(tag_get(node, 1, offset)); 337 } else { 338 rcu_assign_pointer(root->rnode, item); 339 BUG_ON(root_tag_get(root, 0)); 340 BUG_ON(root_tag_get(root, 1)); 341 } 342 343 return 0; 344 } 345 EXPORT_SYMBOL(radix_tree_insert); 346 347 /** 348 * radix_tree_lookup_slot - lookup a slot in a radix tree 349 * @root: radix tree root 350 * @index: index key 351 * 352 * Returns: the slot corresponding to the position @index in the 353 * radix tree @root. This is useful for update-if-exists operations. 354 * 355 * This function cannot be called under rcu_read_lock, it must be 356 * excluded from writers, as must the returned slot for subsequent 357 * use by radix_tree_deref_slot() and radix_tree_replace slot. 358 * Caller must hold tree write locked across slot lookup and 359 * replace. 360 */ 361 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 362 { 363 unsigned int height, shift; 364 struct radix_tree_node *node, **slot; 365 366 node = root->rnode; 367 if (node == NULL) 368 return NULL; 369 370 if (!radix_tree_is_indirect_ptr(node)) { 371 if (index > 0) 372 return NULL; 373 return (void **)&root->rnode; 374 } 375 node = radix_tree_indirect_to_ptr(node); 376 377 height = node->height; 378 if (index > radix_tree_maxindex(height)) 379 return NULL; 380 381 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 382 383 do { 384 slot = (struct radix_tree_node **) 385 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 386 node = *slot; 387 if (node == NULL) 388 return NULL; 389 390 shift -= RADIX_TREE_MAP_SHIFT; 391 height--; 392 } while (height > 0); 393 394 return (void **)slot; 395 } 396 EXPORT_SYMBOL(radix_tree_lookup_slot); 397 398 /** 399 * radix_tree_lookup - perform lookup operation on a radix tree 400 * @root: radix tree root 401 * @index: index key 402 * 403 * Lookup the item at the position @index in the radix tree @root. 404 * 405 * This function can be called under rcu_read_lock, however the caller 406 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 407 * them safely). No RCU barriers are required to access or modify the 408 * returned item, however. 409 */ 410 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 411 { 412 unsigned int height, shift; 413 struct radix_tree_node *node, **slot; 414 415 node = rcu_dereference(root->rnode); 416 if (node == NULL) 417 return NULL; 418 419 if (!radix_tree_is_indirect_ptr(node)) { 420 if (index > 0) 421 return NULL; 422 return node; 423 } 424 node = radix_tree_indirect_to_ptr(node); 425 426 height = node->height; 427 if (index > radix_tree_maxindex(height)) 428 return NULL; 429 430 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 431 432 do { 433 slot = (struct radix_tree_node **) 434 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 435 node = rcu_dereference(*slot); 436 if (node == NULL) 437 return NULL; 438 439 shift -= RADIX_TREE_MAP_SHIFT; 440 height--; 441 } while (height > 0); 442 443 return node; 444 } 445 EXPORT_SYMBOL(radix_tree_lookup); 446 447 /** 448 * radix_tree_tag_set - set a tag on a radix tree node 449 * @root: radix tree root 450 * @index: index key 451 * @tag: tag index 452 * 453 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 454 * corresponding to @index in the radix tree. From 455 * the root all the way down to the leaf node. 456 * 457 * Returns the address of the tagged item. Setting a tag on a not-present 458 * item is a bug. 459 */ 460 void *radix_tree_tag_set(struct radix_tree_root *root, 461 unsigned long index, unsigned int tag) 462 { 463 unsigned int height, shift; 464 struct radix_tree_node *slot; 465 466 height = root->height; 467 BUG_ON(index > radix_tree_maxindex(height)); 468 469 slot = radix_tree_indirect_to_ptr(root->rnode); 470 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 471 472 while (height > 0) { 473 int offset; 474 475 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 476 if (!tag_get(slot, tag, offset)) 477 tag_set(slot, tag, offset); 478 slot = slot->slots[offset]; 479 BUG_ON(slot == NULL); 480 shift -= RADIX_TREE_MAP_SHIFT; 481 height--; 482 } 483 484 /* set the root's tag bit */ 485 if (slot && !root_tag_get(root, tag)) 486 root_tag_set(root, tag); 487 488 return slot; 489 } 490 EXPORT_SYMBOL(radix_tree_tag_set); 491 492 /** 493 * radix_tree_tag_clear - clear a tag on a radix tree node 494 * @root: radix tree root 495 * @index: index key 496 * @tag: tag index 497 * 498 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 499 * corresponding to @index in the radix tree. If 500 * this causes the leaf node to have no tags set then clear the tag in the 501 * next-to-leaf node, etc. 502 * 503 * Returns the address of the tagged item on success, else NULL. ie: 504 * has the same return value and semantics as radix_tree_lookup(). 505 */ 506 void *radix_tree_tag_clear(struct radix_tree_root *root, 507 unsigned long index, unsigned int tag) 508 { 509 /* 510 * The radix tree path needs to be one longer than the maximum path 511 * since the "list" is null terminated. 512 */ 513 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 514 struct radix_tree_node *slot = NULL; 515 unsigned int height, shift; 516 517 height = root->height; 518 if (index > radix_tree_maxindex(height)) 519 goto out; 520 521 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 522 pathp->node = NULL; 523 slot = radix_tree_indirect_to_ptr(root->rnode); 524 525 while (height > 0) { 526 int offset; 527 528 if (slot == NULL) 529 goto out; 530 531 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 532 pathp[1].offset = offset; 533 pathp[1].node = slot; 534 slot = slot->slots[offset]; 535 pathp++; 536 shift -= RADIX_TREE_MAP_SHIFT; 537 height--; 538 } 539 540 if (slot == NULL) 541 goto out; 542 543 while (pathp->node) { 544 if (!tag_get(pathp->node, tag, pathp->offset)) 545 goto out; 546 tag_clear(pathp->node, tag, pathp->offset); 547 if (any_tag_set(pathp->node, tag)) 548 goto out; 549 pathp--; 550 } 551 552 /* clear the root's tag bit */ 553 if (root_tag_get(root, tag)) 554 root_tag_clear(root, tag); 555 556 out: 557 return slot; 558 } 559 EXPORT_SYMBOL(radix_tree_tag_clear); 560 561 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 562 /** 563 * radix_tree_tag_get - get a tag on a radix tree node 564 * @root: radix tree root 565 * @index: index key 566 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 567 * 568 * Return values: 569 * 570 * 0: tag not present or not set 571 * 1: tag set 572 */ 573 int radix_tree_tag_get(struct radix_tree_root *root, 574 unsigned long index, unsigned int tag) 575 { 576 unsigned int height, shift; 577 struct radix_tree_node *node; 578 int saw_unset_tag = 0; 579 580 /* check the root's tag bit */ 581 if (!root_tag_get(root, tag)) 582 return 0; 583 584 node = rcu_dereference(root->rnode); 585 if (node == NULL) 586 return 0; 587 588 if (!radix_tree_is_indirect_ptr(node)) 589 return (index == 0); 590 node = radix_tree_indirect_to_ptr(node); 591 592 height = node->height; 593 if (index > radix_tree_maxindex(height)) 594 return 0; 595 596 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 597 598 for ( ; ; ) { 599 int offset; 600 601 if (node == NULL) 602 return 0; 603 604 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 605 606 /* 607 * This is just a debug check. Later, we can bale as soon as 608 * we see an unset tag. 609 */ 610 if (!tag_get(node, tag, offset)) 611 saw_unset_tag = 1; 612 if (height == 1) { 613 int ret = tag_get(node, tag, offset); 614 615 BUG_ON(ret && saw_unset_tag); 616 return !!ret; 617 } 618 node = rcu_dereference(node->slots[offset]); 619 shift -= RADIX_TREE_MAP_SHIFT; 620 height--; 621 } 622 } 623 EXPORT_SYMBOL(radix_tree_tag_get); 624 #endif 625 626 /** 627 * radix_tree_next_hole - find the next hole (not-present entry) 628 * @root: tree root 629 * @index: index key 630 * @max_scan: maximum range to search 631 * 632 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 633 * indexed hole. 634 * 635 * Returns: the index of the hole if found, otherwise returns an index 636 * outside of the set specified (in which case 'return - index >= max_scan' 637 * will be true). 638 * 639 * radix_tree_next_hole may be called under rcu_read_lock. However, like 640 * radix_tree_gang_lookup, this will not atomically search a snapshot of the 641 * tree at a single point in time. For example, if a hole is created at index 642 * 5, then subsequently a hole is created at index 10, radix_tree_next_hole 643 * covering both indexes may return 10 if called under rcu_read_lock. 644 */ 645 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 646 unsigned long index, unsigned long max_scan) 647 { 648 unsigned long i; 649 650 for (i = 0; i < max_scan; i++) { 651 if (!radix_tree_lookup(root, index)) 652 break; 653 index++; 654 if (index == 0) 655 break; 656 } 657 658 return index; 659 } 660 EXPORT_SYMBOL(radix_tree_next_hole); 661 662 static unsigned int 663 __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 664 unsigned int max_items, unsigned long *next_index) 665 { 666 unsigned int nr_found = 0; 667 unsigned int shift, height; 668 unsigned long i; 669 670 height = slot->height; 671 if (height == 0) 672 goto out; 673 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 674 675 for ( ; height > 1; height--) { 676 i = (index >> shift) & RADIX_TREE_MAP_MASK; 677 for (;;) { 678 if (slot->slots[i] != NULL) 679 break; 680 index &= ~((1UL << shift) - 1); 681 index += 1UL << shift; 682 if (index == 0) 683 goto out; /* 32-bit wraparound */ 684 i++; 685 if (i == RADIX_TREE_MAP_SIZE) 686 goto out; 687 } 688 689 shift -= RADIX_TREE_MAP_SHIFT; 690 slot = rcu_dereference(slot->slots[i]); 691 if (slot == NULL) 692 goto out; 693 } 694 695 /* Bottom level: grab some items */ 696 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 697 struct radix_tree_node *node; 698 index++; 699 node = slot->slots[i]; 700 if (node) { 701 results[nr_found++] = rcu_dereference(node); 702 if (nr_found == max_items) 703 goto out; 704 } 705 } 706 out: 707 *next_index = index; 708 return nr_found; 709 } 710 711 /** 712 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 713 * @root: radix tree root 714 * @results: where the results of the lookup are placed 715 * @first_index: start the lookup from this key 716 * @max_items: place up to this many items at *results 717 * 718 * Performs an index-ascending scan of the tree for present items. Places 719 * them at *@results and returns the number of items which were placed at 720 * *@results. 721 * 722 * The implementation is naive. 723 * 724 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 725 * rcu_read_lock. In this case, rather than the returned results being 726 * an atomic snapshot of the tree at a single point in time, the semantics 727 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 728 * have been issued in individual locks, and results stored in 'results'. 729 */ 730 unsigned int 731 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 732 unsigned long first_index, unsigned int max_items) 733 { 734 unsigned long max_index; 735 struct radix_tree_node *node; 736 unsigned long cur_index = first_index; 737 unsigned int ret; 738 739 node = rcu_dereference(root->rnode); 740 if (!node) 741 return 0; 742 743 if (!radix_tree_is_indirect_ptr(node)) { 744 if (first_index > 0) 745 return 0; 746 results[0] = node; 747 return 1; 748 } 749 node = radix_tree_indirect_to_ptr(node); 750 751 max_index = radix_tree_maxindex(node->height); 752 753 ret = 0; 754 while (ret < max_items) { 755 unsigned int nr_found; 756 unsigned long next_index; /* Index of next search */ 757 758 if (cur_index > max_index) 759 break; 760 nr_found = __lookup(node, results + ret, cur_index, 761 max_items - ret, &next_index); 762 ret += nr_found; 763 if (next_index == 0) 764 break; 765 cur_index = next_index; 766 } 767 768 return ret; 769 } 770 EXPORT_SYMBOL(radix_tree_gang_lookup); 771 772 /* 773 * FIXME: the two tag_get()s here should use find_next_bit() instead of 774 * open-coding the search. 775 */ 776 static unsigned int 777 __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 778 unsigned int max_items, unsigned long *next_index, unsigned int tag) 779 { 780 unsigned int nr_found = 0; 781 unsigned int shift, height; 782 783 height = slot->height; 784 if (height == 0) 785 goto out; 786 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 787 788 while (height > 0) { 789 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 790 791 for (;;) { 792 if (tag_get(slot, tag, i)) 793 break; 794 index &= ~((1UL << shift) - 1); 795 index += 1UL << shift; 796 if (index == 0) 797 goto out; /* 32-bit wraparound */ 798 i++; 799 if (i == RADIX_TREE_MAP_SIZE) 800 goto out; 801 } 802 height--; 803 if (height == 0) { /* Bottom level: grab some items */ 804 unsigned long j = index & RADIX_TREE_MAP_MASK; 805 806 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 807 struct radix_tree_node *node; 808 index++; 809 if (!tag_get(slot, tag, j)) 810 continue; 811 node = slot->slots[j]; 812 /* 813 * Even though the tag was found set, we need to 814 * recheck that we have a non-NULL node, because 815 * if this lookup is lockless, it may have been 816 * subsequently deleted. 817 * 818 * Similar care must be taken in any place that 819 * lookup ->slots[x] without a lock (ie. can't 820 * rely on its value remaining the same). 821 */ 822 if (node) { 823 node = rcu_dereference(node); 824 results[nr_found++] = node; 825 if (nr_found == max_items) 826 goto out; 827 } 828 } 829 } 830 shift -= RADIX_TREE_MAP_SHIFT; 831 slot = rcu_dereference(slot->slots[i]); 832 if (slot == NULL) 833 break; 834 } 835 out: 836 *next_index = index; 837 return nr_found; 838 } 839 840 /** 841 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 842 * based on a tag 843 * @root: radix tree root 844 * @results: where the results of the lookup are placed 845 * @first_index: start the lookup from this key 846 * @max_items: place up to this many items at *results 847 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 848 * 849 * Performs an index-ascending scan of the tree for present items which 850 * have the tag indexed by @tag set. Places the items at *@results and 851 * returns the number of items which were placed at *@results. 852 */ 853 unsigned int 854 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 855 unsigned long first_index, unsigned int max_items, 856 unsigned int tag) 857 { 858 struct radix_tree_node *node; 859 unsigned long max_index; 860 unsigned long cur_index = first_index; 861 unsigned int ret; 862 863 /* check the root's tag bit */ 864 if (!root_tag_get(root, tag)) 865 return 0; 866 867 node = rcu_dereference(root->rnode); 868 if (!node) 869 return 0; 870 871 if (!radix_tree_is_indirect_ptr(node)) { 872 if (first_index > 0) 873 return 0; 874 results[0] = node; 875 return 1; 876 } 877 node = radix_tree_indirect_to_ptr(node); 878 879 max_index = radix_tree_maxindex(node->height); 880 881 ret = 0; 882 while (ret < max_items) { 883 unsigned int nr_found; 884 unsigned long next_index; /* Index of next search */ 885 886 if (cur_index > max_index) 887 break; 888 nr_found = __lookup_tag(node, results + ret, cur_index, 889 max_items - ret, &next_index, tag); 890 ret += nr_found; 891 if (next_index == 0) 892 break; 893 cur_index = next_index; 894 } 895 896 return ret; 897 } 898 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 899 900 /** 901 * radix_tree_shrink - shrink height of a radix tree to minimal 902 * @root radix tree root 903 */ 904 static inline void radix_tree_shrink(struct radix_tree_root *root) 905 { 906 /* try to shrink tree height */ 907 while (root->height > 0) { 908 struct radix_tree_node *to_free = root->rnode; 909 void *newptr; 910 911 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 912 to_free = radix_tree_indirect_to_ptr(to_free); 913 914 /* 915 * The candidate node has more than one child, or its child 916 * is not at the leftmost slot, we cannot shrink. 917 */ 918 if (to_free->count != 1) 919 break; 920 if (!to_free->slots[0]) 921 break; 922 923 /* 924 * We don't need rcu_assign_pointer(), since we are simply 925 * moving the node from one part of the tree to another. If 926 * it was safe to dereference the old pointer to it 927 * (to_free->slots[0]), it will be safe to dereference the new 928 * one (root->rnode). 929 */ 930 newptr = to_free->slots[0]; 931 if (root->height > 1) 932 newptr = radix_tree_ptr_to_indirect(newptr); 933 root->rnode = newptr; 934 root->height--; 935 /* must only free zeroed nodes into the slab */ 936 tag_clear(to_free, 0, 0); 937 tag_clear(to_free, 1, 0); 938 to_free->slots[0] = NULL; 939 to_free->count = 0; 940 radix_tree_node_free(to_free); 941 } 942 } 943 944 /** 945 * radix_tree_delete - delete an item from a radix tree 946 * @root: radix tree root 947 * @index: index key 948 * 949 * Remove the item at @index from the radix tree rooted at @root. 950 * 951 * Returns the address of the deleted item, or NULL if it was not present. 952 */ 953 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 954 { 955 /* 956 * The radix tree path needs to be one longer than the maximum path 957 * since the "list" is null terminated. 958 */ 959 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 960 struct radix_tree_node *slot = NULL; 961 struct radix_tree_node *to_free; 962 unsigned int height, shift; 963 int tag; 964 int offset; 965 966 height = root->height; 967 if (index > radix_tree_maxindex(height)) 968 goto out; 969 970 slot = root->rnode; 971 if (height == 0) { 972 root_tag_clear_all(root); 973 root->rnode = NULL; 974 goto out; 975 } 976 slot = radix_tree_indirect_to_ptr(slot); 977 978 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 979 pathp->node = NULL; 980 981 do { 982 if (slot == NULL) 983 goto out; 984 985 pathp++; 986 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 987 pathp->offset = offset; 988 pathp->node = slot; 989 slot = slot->slots[offset]; 990 shift -= RADIX_TREE_MAP_SHIFT; 991 height--; 992 } while (height > 0); 993 994 if (slot == NULL) 995 goto out; 996 997 /* 998 * Clear all tags associated with the just-deleted item 999 */ 1000 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1001 if (tag_get(pathp->node, tag, pathp->offset)) 1002 radix_tree_tag_clear(root, index, tag); 1003 } 1004 1005 to_free = NULL; 1006 /* Now free the nodes we do not need anymore */ 1007 while (pathp->node) { 1008 pathp->node->slots[pathp->offset] = NULL; 1009 pathp->node->count--; 1010 /* 1011 * Queue the node for deferred freeing after the 1012 * last reference to it disappears (set NULL, above). 1013 */ 1014 if (to_free) 1015 radix_tree_node_free(to_free); 1016 1017 if (pathp->node->count) { 1018 if (pathp->node == 1019 radix_tree_indirect_to_ptr(root->rnode)) 1020 radix_tree_shrink(root); 1021 goto out; 1022 } 1023 1024 /* Node with zero slots in use so free it */ 1025 to_free = pathp->node; 1026 pathp--; 1027 1028 } 1029 root_tag_clear_all(root); 1030 root->height = 0; 1031 root->rnode = NULL; 1032 if (to_free) 1033 radix_tree_node_free(to_free); 1034 1035 out: 1036 return slot; 1037 } 1038 EXPORT_SYMBOL(radix_tree_delete); 1039 1040 /** 1041 * radix_tree_tagged - test whether any items in the tree are tagged 1042 * @root: radix tree root 1043 * @tag: tag to test 1044 */ 1045 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1046 { 1047 return root_tag_get(root, tag); 1048 } 1049 EXPORT_SYMBOL(radix_tree_tagged); 1050 1051 static void 1052 radix_tree_node_ctor(struct kmem_cache *cachep, void *node) 1053 { 1054 memset(node, 0, sizeof(struct radix_tree_node)); 1055 } 1056 1057 static __init unsigned long __maxindex(unsigned int height) 1058 { 1059 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1060 int shift = RADIX_TREE_INDEX_BITS - width; 1061 1062 if (shift < 0) 1063 return ~0UL; 1064 if (shift >= BITS_PER_LONG) 1065 return 0UL; 1066 return ~0UL >> shift; 1067 } 1068 1069 static __init void radix_tree_init_maxindex(void) 1070 { 1071 unsigned int i; 1072 1073 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1074 height_to_maxindex[i] = __maxindex(i); 1075 } 1076 1077 static int radix_tree_callback(struct notifier_block *nfb, 1078 unsigned long action, 1079 void *hcpu) 1080 { 1081 int cpu = (long)hcpu; 1082 struct radix_tree_preload *rtp; 1083 1084 /* Free per-cpu pool of perloaded nodes */ 1085 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1086 rtp = &per_cpu(radix_tree_preloads, cpu); 1087 while (rtp->nr) { 1088 kmem_cache_free(radix_tree_node_cachep, 1089 rtp->nodes[rtp->nr-1]); 1090 rtp->nodes[rtp->nr-1] = NULL; 1091 rtp->nr--; 1092 } 1093 } 1094 return NOTIFY_OK; 1095 } 1096 1097 void __init radix_tree_init(void) 1098 { 1099 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1100 sizeof(struct radix_tree_node), 0, 1101 SLAB_PANIC, radix_tree_node_ctor); 1102 radix_tree_init_maxindex(); 1103 hotcpu_notifier(radix_tree_callback, 0); 1104 } 1105