1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 92 int offset) 93 { 94 __set_bit(offset, node->tags[tag]); 95 } 96 97 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 98 int offset) 99 { 100 __clear_bit(offset, node->tags[tag]); 101 } 102 103 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 104 int offset) 105 { 106 return test_bit(offset, node->tags[tag]); 107 } 108 109 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 110 { 111 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 112 } 113 114 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 115 { 116 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 117 } 118 119 static inline void root_tag_clear_all(struct radix_tree_root *root) 120 { 121 root->gfp_mask &= __GFP_BITS_MASK; 122 } 123 124 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 125 { 126 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 127 } 128 129 /* 130 * Returns 1 if any slot in the node has this tag set. 131 * Otherwise returns 0. 132 */ 133 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 134 { 135 int idx; 136 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 137 if (node->tags[tag][idx]) 138 return 1; 139 } 140 return 0; 141 } 142 /* 143 * This assumes that the caller has performed appropriate preallocation, and 144 * that the caller has pinned this thread of control to the current CPU. 145 */ 146 static struct radix_tree_node * 147 radix_tree_node_alloc(struct radix_tree_root *root) 148 { 149 struct radix_tree_node *ret = NULL; 150 gfp_t gfp_mask = root_gfp_mask(root); 151 152 if (!(gfp_mask & __GFP_WAIT)) { 153 struct radix_tree_preload *rtp; 154 155 /* 156 * Provided the caller has preloaded here, we will always 157 * succeed in getting a node here (and never reach 158 * kmem_cache_alloc) 159 */ 160 rtp = &__get_cpu_var(radix_tree_preloads); 161 if (rtp->nr) { 162 ret = rtp->nodes[rtp->nr - 1]; 163 rtp->nodes[rtp->nr - 1] = NULL; 164 rtp->nr--; 165 } 166 } 167 if (ret == NULL) 168 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 169 170 BUG_ON(radix_tree_is_indirect_ptr(ret)); 171 return ret; 172 } 173 174 static void radix_tree_node_rcu_free(struct rcu_head *head) 175 { 176 struct radix_tree_node *node = 177 container_of(head, struct radix_tree_node, rcu_head); 178 179 /* 180 * must only free zeroed nodes into the slab. radix_tree_shrink 181 * can leave us with a non-NULL entry in the first slot, so clear 182 * that here to make sure. 183 */ 184 tag_clear(node, 0, 0); 185 tag_clear(node, 1, 0); 186 node->slots[0] = NULL; 187 node->count = 0; 188 189 kmem_cache_free(radix_tree_node_cachep, node); 190 } 191 192 static inline void 193 radix_tree_node_free(struct radix_tree_node *node) 194 { 195 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 196 } 197 198 /* 199 * Load up this CPU's radix_tree_node buffer with sufficient objects to 200 * ensure that the addition of a single element in the tree cannot fail. On 201 * success, return zero, with preemption disabled. On error, return -ENOMEM 202 * with preemption not disabled. 203 */ 204 int radix_tree_preload(gfp_t gfp_mask) 205 { 206 struct radix_tree_preload *rtp; 207 struct radix_tree_node *node; 208 int ret = -ENOMEM; 209 210 preempt_disable(); 211 rtp = &__get_cpu_var(radix_tree_preloads); 212 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 213 preempt_enable(); 214 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 215 if (node == NULL) 216 goto out; 217 preempt_disable(); 218 rtp = &__get_cpu_var(radix_tree_preloads); 219 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 220 rtp->nodes[rtp->nr++] = node; 221 else 222 kmem_cache_free(radix_tree_node_cachep, node); 223 } 224 ret = 0; 225 out: 226 return ret; 227 } 228 EXPORT_SYMBOL(radix_tree_preload); 229 230 /* 231 * Return the maximum key which can be store into a 232 * radix tree with height HEIGHT. 233 */ 234 static inline unsigned long radix_tree_maxindex(unsigned int height) 235 { 236 return height_to_maxindex[height]; 237 } 238 239 /* 240 * Extend a radix tree so it can store key @index. 241 */ 242 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 243 { 244 struct radix_tree_node *node; 245 unsigned int height; 246 int tag; 247 248 /* Figure out what the height should be. */ 249 height = root->height + 1; 250 while (index > radix_tree_maxindex(height)) 251 height++; 252 253 if (root->rnode == NULL) { 254 root->height = height; 255 goto out; 256 } 257 258 do { 259 unsigned int newheight; 260 if (!(node = radix_tree_node_alloc(root))) 261 return -ENOMEM; 262 263 /* Increase the height. */ 264 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 265 266 /* Propagate the aggregated tag info into the new root */ 267 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 268 if (root_tag_get(root, tag)) 269 tag_set(node, tag, 0); 270 } 271 272 newheight = root->height+1; 273 node->height = newheight; 274 node->count = 1; 275 node = radix_tree_ptr_to_indirect(node); 276 rcu_assign_pointer(root->rnode, node); 277 root->height = newheight; 278 } while (height > root->height); 279 out: 280 return 0; 281 } 282 283 /** 284 * radix_tree_insert - insert into a radix tree 285 * @root: radix tree root 286 * @index: index key 287 * @item: item to insert 288 * 289 * Insert an item into the radix tree at position @index. 290 */ 291 int radix_tree_insert(struct radix_tree_root *root, 292 unsigned long index, void *item) 293 { 294 struct radix_tree_node *node = NULL, *slot; 295 unsigned int height, shift; 296 int offset; 297 int error; 298 299 BUG_ON(radix_tree_is_indirect_ptr(item)); 300 301 /* Make sure the tree is high enough. */ 302 if (index > radix_tree_maxindex(root->height)) { 303 error = radix_tree_extend(root, index); 304 if (error) 305 return error; 306 } 307 308 slot = radix_tree_indirect_to_ptr(root->rnode); 309 310 height = root->height; 311 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 312 313 offset = 0; /* uninitialised var warning */ 314 while (height > 0) { 315 if (slot == NULL) { 316 /* Have to add a child node. */ 317 if (!(slot = radix_tree_node_alloc(root))) 318 return -ENOMEM; 319 slot->height = height; 320 if (node) { 321 rcu_assign_pointer(node->slots[offset], slot); 322 node->count++; 323 } else 324 rcu_assign_pointer(root->rnode, 325 radix_tree_ptr_to_indirect(slot)); 326 } 327 328 /* Go a level down */ 329 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 330 node = slot; 331 slot = node->slots[offset]; 332 shift -= RADIX_TREE_MAP_SHIFT; 333 height--; 334 } 335 336 if (slot != NULL) 337 return -EEXIST; 338 339 if (node) { 340 node->count++; 341 rcu_assign_pointer(node->slots[offset], item); 342 BUG_ON(tag_get(node, 0, offset)); 343 BUG_ON(tag_get(node, 1, offset)); 344 } else { 345 rcu_assign_pointer(root->rnode, item); 346 BUG_ON(root_tag_get(root, 0)); 347 BUG_ON(root_tag_get(root, 1)); 348 } 349 350 return 0; 351 } 352 EXPORT_SYMBOL(radix_tree_insert); 353 354 /** 355 * radix_tree_lookup_slot - lookup a slot in a radix tree 356 * @root: radix tree root 357 * @index: index key 358 * 359 * Returns: the slot corresponding to the position @index in the 360 * radix tree @root. This is useful for update-if-exists operations. 361 * 362 * This function can be called under rcu_read_lock iff the slot is not 363 * modified by radix_tree_replace_slot, otherwise it must be called 364 * exclusive from other writers. Any dereference of the slot must be done 365 * using radix_tree_deref_slot. 366 */ 367 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 368 { 369 unsigned int height, shift; 370 struct radix_tree_node *node, **slot; 371 372 node = rcu_dereference(root->rnode); 373 if (node == NULL) 374 return NULL; 375 376 if (!radix_tree_is_indirect_ptr(node)) { 377 if (index > 0) 378 return NULL; 379 return (void **)&root->rnode; 380 } 381 node = radix_tree_indirect_to_ptr(node); 382 383 height = node->height; 384 if (index > radix_tree_maxindex(height)) 385 return NULL; 386 387 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 388 389 do { 390 slot = (struct radix_tree_node **) 391 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 392 node = rcu_dereference(*slot); 393 if (node == NULL) 394 return NULL; 395 396 shift -= RADIX_TREE_MAP_SHIFT; 397 height--; 398 } while (height > 0); 399 400 return (void **)slot; 401 } 402 EXPORT_SYMBOL(radix_tree_lookup_slot); 403 404 /** 405 * radix_tree_lookup - perform lookup operation on a radix tree 406 * @root: radix tree root 407 * @index: index key 408 * 409 * Lookup the item at the position @index in the radix tree @root. 410 * 411 * This function can be called under rcu_read_lock, however the caller 412 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 413 * them safely). No RCU barriers are required to access or modify the 414 * returned item, however. 415 */ 416 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 417 { 418 unsigned int height, shift; 419 struct radix_tree_node *node, **slot; 420 421 node = rcu_dereference(root->rnode); 422 if (node == NULL) 423 return NULL; 424 425 if (!radix_tree_is_indirect_ptr(node)) { 426 if (index > 0) 427 return NULL; 428 return node; 429 } 430 node = radix_tree_indirect_to_ptr(node); 431 432 height = node->height; 433 if (index > radix_tree_maxindex(height)) 434 return NULL; 435 436 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 437 438 do { 439 slot = (struct radix_tree_node **) 440 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 441 node = rcu_dereference(*slot); 442 if (node == NULL) 443 return NULL; 444 445 shift -= RADIX_TREE_MAP_SHIFT; 446 height--; 447 } while (height > 0); 448 449 return node; 450 } 451 EXPORT_SYMBOL(radix_tree_lookup); 452 453 /** 454 * radix_tree_tag_set - set a tag on a radix tree node 455 * @root: radix tree root 456 * @index: index key 457 * @tag: tag index 458 * 459 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 460 * corresponding to @index in the radix tree. From 461 * the root all the way down to the leaf node. 462 * 463 * Returns the address of the tagged item. Setting a tag on a not-present 464 * item is a bug. 465 */ 466 void *radix_tree_tag_set(struct radix_tree_root *root, 467 unsigned long index, unsigned int tag) 468 { 469 unsigned int height, shift; 470 struct radix_tree_node *slot; 471 472 height = root->height; 473 BUG_ON(index > radix_tree_maxindex(height)); 474 475 slot = radix_tree_indirect_to_ptr(root->rnode); 476 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 477 478 while (height > 0) { 479 int offset; 480 481 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 482 if (!tag_get(slot, tag, offset)) 483 tag_set(slot, tag, offset); 484 slot = slot->slots[offset]; 485 BUG_ON(slot == NULL); 486 shift -= RADIX_TREE_MAP_SHIFT; 487 height--; 488 } 489 490 /* set the root's tag bit */ 491 if (slot && !root_tag_get(root, tag)) 492 root_tag_set(root, tag); 493 494 return slot; 495 } 496 EXPORT_SYMBOL(radix_tree_tag_set); 497 498 /** 499 * radix_tree_tag_clear - clear a tag on a radix tree node 500 * @root: radix tree root 501 * @index: index key 502 * @tag: tag index 503 * 504 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 505 * corresponding to @index in the radix tree. If 506 * this causes the leaf node to have no tags set then clear the tag in the 507 * next-to-leaf node, etc. 508 * 509 * Returns the address of the tagged item on success, else NULL. ie: 510 * has the same return value and semantics as radix_tree_lookup(). 511 */ 512 void *radix_tree_tag_clear(struct radix_tree_root *root, 513 unsigned long index, unsigned int tag) 514 { 515 /* 516 * The radix tree path needs to be one longer than the maximum path 517 * since the "list" is null terminated. 518 */ 519 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 520 struct radix_tree_node *slot = NULL; 521 unsigned int height, shift; 522 523 height = root->height; 524 if (index > radix_tree_maxindex(height)) 525 goto out; 526 527 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 528 pathp->node = NULL; 529 slot = radix_tree_indirect_to_ptr(root->rnode); 530 531 while (height > 0) { 532 int offset; 533 534 if (slot == NULL) 535 goto out; 536 537 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 538 pathp[1].offset = offset; 539 pathp[1].node = slot; 540 slot = slot->slots[offset]; 541 pathp++; 542 shift -= RADIX_TREE_MAP_SHIFT; 543 height--; 544 } 545 546 if (slot == NULL) 547 goto out; 548 549 while (pathp->node) { 550 if (!tag_get(pathp->node, tag, pathp->offset)) 551 goto out; 552 tag_clear(pathp->node, tag, pathp->offset); 553 if (any_tag_set(pathp->node, tag)) 554 goto out; 555 pathp--; 556 } 557 558 /* clear the root's tag bit */ 559 if (root_tag_get(root, tag)) 560 root_tag_clear(root, tag); 561 562 out: 563 return slot; 564 } 565 EXPORT_SYMBOL(radix_tree_tag_clear); 566 567 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 568 /** 569 * radix_tree_tag_get - get a tag on a radix tree node 570 * @root: radix tree root 571 * @index: index key 572 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 573 * 574 * Return values: 575 * 576 * 0: tag not present or not set 577 * 1: tag set 578 */ 579 int radix_tree_tag_get(struct radix_tree_root *root, 580 unsigned long index, unsigned int tag) 581 { 582 unsigned int height, shift; 583 struct radix_tree_node *node; 584 int saw_unset_tag = 0; 585 586 /* check the root's tag bit */ 587 if (!root_tag_get(root, tag)) 588 return 0; 589 590 node = rcu_dereference(root->rnode); 591 if (node == NULL) 592 return 0; 593 594 if (!radix_tree_is_indirect_ptr(node)) 595 return (index == 0); 596 node = radix_tree_indirect_to_ptr(node); 597 598 height = node->height; 599 if (index > radix_tree_maxindex(height)) 600 return 0; 601 602 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 603 604 for ( ; ; ) { 605 int offset; 606 607 if (node == NULL) 608 return 0; 609 610 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 611 612 /* 613 * This is just a debug check. Later, we can bale as soon as 614 * we see an unset tag. 615 */ 616 if (!tag_get(node, tag, offset)) 617 saw_unset_tag = 1; 618 if (height == 1) { 619 int ret = tag_get(node, tag, offset); 620 621 BUG_ON(ret && saw_unset_tag); 622 return !!ret; 623 } 624 node = rcu_dereference(node->slots[offset]); 625 shift -= RADIX_TREE_MAP_SHIFT; 626 height--; 627 } 628 } 629 EXPORT_SYMBOL(radix_tree_tag_get); 630 #endif 631 632 /** 633 * radix_tree_next_hole - find the next hole (not-present entry) 634 * @root: tree root 635 * @index: index key 636 * @max_scan: maximum range to search 637 * 638 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 639 * indexed hole. 640 * 641 * Returns: the index of the hole if found, otherwise returns an index 642 * outside of the set specified (in which case 'return - index >= max_scan' 643 * will be true). In rare cases of index wrap-around, 0 will be returned. 644 * 645 * radix_tree_next_hole may be called under rcu_read_lock. However, like 646 * radix_tree_gang_lookup, this will not atomically search a snapshot of 647 * the tree at a single point in time. For example, if a hole is created 648 * at index 5, then subsequently a hole is created at index 10, 649 * radix_tree_next_hole covering both indexes may return 10 if called 650 * under rcu_read_lock. 651 */ 652 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 653 unsigned long index, unsigned long max_scan) 654 { 655 unsigned long i; 656 657 for (i = 0; i < max_scan; i++) { 658 if (!radix_tree_lookup(root, index)) 659 break; 660 index++; 661 if (index == 0) 662 break; 663 } 664 665 return index; 666 } 667 EXPORT_SYMBOL(radix_tree_next_hole); 668 669 static unsigned int 670 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 671 unsigned int max_items, unsigned long *next_index) 672 { 673 unsigned int nr_found = 0; 674 unsigned int shift, height; 675 unsigned long i; 676 677 height = slot->height; 678 if (height == 0) 679 goto out; 680 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 681 682 for ( ; height > 1; height--) { 683 i = (index >> shift) & RADIX_TREE_MAP_MASK; 684 for (;;) { 685 if (slot->slots[i] != NULL) 686 break; 687 index &= ~((1UL << shift) - 1); 688 index += 1UL << shift; 689 if (index == 0) 690 goto out; /* 32-bit wraparound */ 691 i++; 692 if (i == RADIX_TREE_MAP_SIZE) 693 goto out; 694 } 695 696 shift -= RADIX_TREE_MAP_SHIFT; 697 slot = rcu_dereference(slot->slots[i]); 698 if (slot == NULL) 699 goto out; 700 } 701 702 /* Bottom level: grab some items */ 703 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 704 index++; 705 if (slot->slots[i]) { 706 results[nr_found++] = &(slot->slots[i]); 707 if (nr_found == max_items) 708 goto out; 709 } 710 } 711 out: 712 *next_index = index; 713 return nr_found; 714 } 715 716 /** 717 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 718 * @root: radix tree root 719 * @results: where the results of the lookup are placed 720 * @first_index: start the lookup from this key 721 * @max_items: place up to this many items at *results 722 * 723 * Performs an index-ascending scan of the tree for present items. Places 724 * them at *@results and returns the number of items which were placed at 725 * *@results. 726 * 727 * The implementation is naive. 728 * 729 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 730 * rcu_read_lock. In this case, rather than the returned results being 731 * an atomic snapshot of the tree at a single point in time, the semantics 732 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 733 * have been issued in individual locks, and results stored in 'results'. 734 */ 735 unsigned int 736 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 737 unsigned long first_index, unsigned int max_items) 738 { 739 unsigned long max_index; 740 struct radix_tree_node *node; 741 unsigned long cur_index = first_index; 742 unsigned int ret; 743 744 node = rcu_dereference(root->rnode); 745 if (!node) 746 return 0; 747 748 if (!radix_tree_is_indirect_ptr(node)) { 749 if (first_index > 0) 750 return 0; 751 results[0] = node; 752 return 1; 753 } 754 node = radix_tree_indirect_to_ptr(node); 755 756 max_index = radix_tree_maxindex(node->height); 757 758 ret = 0; 759 while (ret < max_items) { 760 unsigned int nr_found, slots_found, i; 761 unsigned long next_index; /* Index of next search */ 762 763 if (cur_index > max_index) 764 break; 765 slots_found = __lookup(node, (void ***)results + ret, cur_index, 766 max_items - ret, &next_index); 767 nr_found = 0; 768 for (i = 0; i < slots_found; i++) { 769 struct radix_tree_node *slot; 770 slot = *(((void ***)results)[ret + i]); 771 if (!slot) 772 continue; 773 results[ret + nr_found] = rcu_dereference(slot); 774 nr_found++; 775 } 776 ret += nr_found; 777 if (next_index == 0) 778 break; 779 cur_index = next_index; 780 } 781 782 return ret; 783 } 784 EXPORT_SYMBOL(radix_tree_gang_lookup); 785 786 /** 787 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 788 * @root: radix tree root 789 * @results: where the results of the lookup are placed 790 * @first_index: start the lookup from this key 791 * @max_items: place up to this many items at *results 792 * 793 * Performs an index-ascending scan of the tree for present items. Places 794 * their slots at *@results and returns the number of items which were 795 * placed at *@results. 796 * 797 * The implementation is naive. 798 * 799 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 800 * be dereferenced with radix_tree_deref_slot, and if using only RCU 801 * protection, radix_tree_deref_slot may fail requiring a retry. 802 */ 803 unsigned int 804 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 805 unsigned long first_index, unsigned int max_items) 806 { 807 unsigned long max_index; 808 struct radix_tree_node *node; 809 unsigned long cur_index = first_index; 810 unsigned int ret; 811 812 node = rcu_dereference(root->rnode); 813 if (!node) 814 return 0; 815 816 if (!radix_tree_is_indirect_ptr(node)) { 817 if (first_index > 0) 818 return 0; 819 results[0] = (void **)&root->rnode; 820 return 1; 821 } 822 node = radix_tree_indirect_to_ptr(node); 823 824 max_index = radix_tree_maxindex(node->height); 825 826 ret = 0; 827 while (ret < max_items) { 828 unsigned int slots_found; 829 unsigned long next_index; /* Index of next search */ 830 831 if (cur_index > max_index) 832 break; 833 slots_found = __lookup(node, results + ret, cur_index, 834 max_items - ret, &next_index); 835 ret += slots_found; 836 if (next_index == 0) 837 break; 838 cur_index = next_index; 839 } 840 841 return ret; 842 } 843 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 844 845 /* 846 * FIXME: the two tag_get()s here should use find_next_bit() instead of 847 * open-coding the search. 848 */ 849 static unsigned int 850 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 851 unsigned int max_items, unsigned long *next_index, unsigned int tag) 852 { 853 unsigned int nr_found = 0; 854 unsigned int shift, height; 855 856 height = slot->height; 857 if (height == 0) 858 goto out; 859 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 860 861 while (height > 0) { 862 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 863 864 for (;;) { 865 if (tag_get(slot, tag, i)) 866 break; 867 index &= ~((1UL << shift) - 1); 868 index += 1UL << shift; 869 if (index == 0) 870 goto out; /* 32-bit wraparound */ 871 i++; 872 if (i == RADIX_TREE_MAP_SIZE) 873 goto out; 874 } 875 height--; 876 if (height == 0) { /* Bottom level: grab some items */ 877 unsigned long j = index & RADIX_TREE_MAP_MASK; 878 879 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 880 index++; 881 if (!tag_get(slot, tag, j)) 882 continue; 883 /* 884 * Even though the tag was found set, we need to 885 * recheck that we have a non-NULL node, because 886 * if this lookup is lockless, it may have been 887 * subsequently deleted. 888 * 889 * Similar care must be taken in any place that 890 * lookup ->slots[x] without a lock (ie. can't 891 * rely on its value remaining the same). 892 */ 893 if (slot->slots[j]) { 894 results[nr_found++] = &(slot->slots[j]); 895 if (nr_found == max_items) 896 goto out; 897 } 898 } 899 } 900 shift -= RADIX_TREE_MAP_SHIFT; 901 slot = rcu_dereference(slot->slots[i]); 902 if (slot == NULL) 903 break; 904 } 905 out: 906 *next_index = index; 907 return nr_found; 908 } 909 910 /** 911 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 912 * based on a tag 913 * @root: radix tree root 914 * @results: where the results of the lookup are placed 915 * @first_index: start the lookup from this key 916 * @max_items: place up to this many items at *results 917 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 918 * 919 * Performs an index-ascending scan of the tree for present items which 920 * have the tag indexed by @tag set. Places the items at *@results and 921 * returns the number of items which were placed at *@results. 922 */ 923 unsigned int 924 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 925 unsigned long first_index, unsigned int max_items, 926 unsigned int tag) 927 { 928 struct radix_tree_node *node; 929 unsigned long max_index; 930 unsigned long cur_index = first_index; 931 unsigned int ret; 932 933 /* check the root's tag bit */ 934 if (!root_tag_get(root, tag)) 935 return 0; 936 937 node = rcu_dereference(root->rnode); 938 if (!node) 939 return 0; 940 941 if (!radix_tree_is_indirect_ptr(node)) { 942 if (first_index > 0) 943 return 0; 944 results[0] = node; 945 return 1; 946 } 947 node = radix_tree_indirect_to_ptr(node); 948 949 max_index = radix_tree_maxindex(node->height); 950 951 ret = 0; 952 while (ret < max_items) { 953 unsigned int nr_found, slots_found, i; 954 unsigned long next_index; /* Index of next search */ 955 956 if (cur_index > max_index) 957 break; 958 slots_found = __lookup_tag(node, (void ***)results + ret, 959 cur_index, max_items - ret, &next_index, tag); 960 nr_found = 0; 961 for (i = 0; i < slots_found; i++) { 962 struct radix_tree_node *slot; 963 slot = *(((void ***)results)[ret + i]); 964 if (!slot) 965 continue; 966 results[ret + nr_found] = rcu_dereference(slot); 967 nr_found++; 968 } 969 ret += nr_found; 970 if (next_index == 0) 971 break; 972 cur_index = next_index; 973 } 974 975 return ret; 976 } 977 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 978 979 /** 980 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 981 * radix tree based on a tag 982 * @root: radix tree root 983 * @results: where the results of the lookup are placed 984 * @first_index: start the lookup from this key 985 * @max_items: place up to this many items at *results 986 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 987 * 988 * Performs an index-ascending scan of the tree for present items which 989 * have the tag indexed by @tag set. Places the slots at *@results and 990 * returns the number of slots which were placed at *@results. 991 */ 992 unsigned int 993 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 994 unsigned long first_index, unsigned int max_items, 995 unsigned int tag) 996 { 997 struct radix_tree_node *node; 998 unsigned long max_index; 999 unsigned long cur_index = first_index; 1000 unsigned int ret; 1001 1002 /* check the root's tag bit */ 1003 if (!root_tag_get(root, tag)) 1004 return 0; 1005 1006 node = rcu_dereference(root->rnode); 1007 if (!node) 1008 return 0; 1009 1010 if (!radix_tree_is_indirect_ptr(node)) { 1011 if (first_index > 0) 1012 return 0; 1013 results[0] = (void **)&root->rnode; 1014 return 1; 1015 } 1016 node = radix_tree_indirect_to_ptr(node); 1017 1018 max_index = radix_tree_maxindex(node->height); 1019 1020 ret = 0; 1021 while (ret < max_items) { 1022 unsigned int slots_found; 1023 unsigned long next_index; /* Index of next search */ 1024 1025 if (cur_index > max_index) 1026 break; 1027 slots_found = __lookup_tag(node, results + ret, 1028 cur_index, max_items - ret, &next_index, tag); 1029 ret += slots_found; 1030 if (next_index == 0) 1031 break; 1032 cur_index = next_index; 1033 } 1034 1035 return ret; 1036 } 1037 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1038 1039 1040 /** 1041 * radix_tree_shrink - shrink height of a radix tree to minimal 1042 * @root radix tree root 1043 */ 1044 static inline void radix_tree_shrink(struct radix_tree_root *root) 1045 { 1046 /* try to shrink tree height */ 1047 while (root->height > 0) { 1048 struct radix_tree_node *to_free = root->rnode; 1049 void *newptr; 1050 1051 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1052 to_free = radix_tree_indirect_to_ptr(to_free); 1053 1054 /* 1055 * The candidate node has more than one child, or its child 1056 * is not at the leftmost slot, we cannot shrink. 1057 */ 1058 if (to_free->count != 1) 1059 break; 1060 if (!to_free->slots[0]) 1061 break; 1062 1063 /* 1064 * We don't need rcu_assign_pointer(), since we are simply 1065 * moving the node from one part of the tree to another. If 1066 * it was safe to dereference the old pointer to it 1067 * (to_free->slots[0]), it will be safe to dereference the new 1068 * one (root->rnode). 1069 */ 1070 newptr = to_free->slots[0]; 1071 if (root->height > 1) 1072 newptr = radix_tree_ptr_to_indirect(newptr); 1073 root->rnode = newptr; 1074 root->height--; 1075 radix_tree_node_free(to_free); 1076 } 1077 } 1078 1079 /** 1080 * radix_tree_delete - delete an item from a radix tree 1081 * @root: radix tree root 1082 * @index: index key 1083 * 1084 * Remove the item at @index from the radix tree rooted at @root. 1085 * 1086 * Returns the address of the deleted item, or NULL if it was not present. 1087 */ 1088 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1089 { 1090 /* 1091 * The radix tree path needs to be one longer than the maximum path 1092 * since the "list" is null terminated. 1093 */ 1094 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 1095 struct radix_tree_node *slot = NULL; 1096 struct radix_tree_node *to_free; 1097 unsigned int height, shift; 1098 int tag; 1099 int offset; 1100 1101 height = root->height; 1102 if (index > radix_tree_maxindex(height)) 1103 goto out; 1104 1105 slot = root->rnode; 1106 if (height == 0) { 1107 root_tag_clear_all(root); 1108 root->rnode = NULL; 1109 goto out; 1110 } 1111 slot = radix_tree_indirect_to_ptr(slot); 1112 1113 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1114 pathp->node = NULL; 1115 1116 do { 1117 if (slot == NULL) 1118 goto out; 1119 1120 pathp++; 1121 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1122 pathp->offset = offset; 1123 pathp->node = slot; 1124 slot = slot->slots[offset]; 1125 shift -= RADIX_TREE_MAP_SHIFT; 1126 height--; 1127 } while (height > 0); 1128 1129 if (slot == NULL) 1130 goto out; 1131 1132 /* 1133 * Clear all tags associated with the just-deleted item 1134 */ 1135 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1136 if (tag_get(pathp->node, tag, pathp->offset)) 1137 radix_tree_tag_clear(root, index, tag); 1138 } 1139 1140 to_free = NULL; 1141 /* Now free the nodes we do not need anymore */ 1142 while (pathp->node) { 1143 pathp->node->slots[pathp->offset] = NULL; 1144 pathp->node->count--; 1145 /* 1146 * Queue the node for deferred freeing after the 1147 * last reference to it disappears (set NULL, above). 1148 */ 1149 if (to_free) 1150 radix_tree_node_free(to_free); 1151 1152 if (pathp->node->count) { 1153 if (pathp->node == 1154 radix_tree_indirect_to_ptr(root->rnode)) 1155 radix_tree_shrink(root); 1156 goto out; 1157 } 1158 1159 /* Node with zero slots in use so free it */ 1160 to_free = pathp->node; 1161 pathp--; 1162 1163 } 1164 root_tag_clear_all(root); 1165 root->height = 0; 1166 root->rnode = NULL; 1167 if (to_free) 1168 radix_tree_node_free(to_free); 1169 1170 out: 1171 return slot; 1172 } 1173 EXPORT_SYMBOL(radix_tree_delete); 1174 1175 /** 1176 * radix_tree_tagged - test whether any items in the tree are tagged 1177 * @root: radix tree root 1178 * @tag: tag to test 1179 */ 1180 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1181 { 1182 return root_tag_get(root, tag); 1183 } 1184 EXPORT_SYMBOL(radix_tree_tagged); 1185 1186 static void 1187 radix_tree_node_ctor(void *node) 1188 { 1189 memset(node, 0, sizeof(struct radix_tree_node)); 1190 } 1191 1192 static __init unsigned long __maxindex(unsigned int height) 1193 { 1194 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1195 int shift = RADIX_TREE_INDEX_BITS - width; 1196 1197 if (shift < 0) 1198 return ~0UL; 1199 if (shift >= BITS_PER_LONG) 1200 return 0UL; 1201 return ~0UL >> shift; 1202 } 1203 1204 static __init void radix_tree_init_maxindex(void) 1205 { 1206 unsigned int i; 1207 1208 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1209 height_to_maxindex[i] = __maxindex(i); 1210 } 1211 1212 static int radix_tree_callback(struct notifier_block *nfb, 1213 unsigned long action, 1214 void *hcpu) 1215 { 1216 int cpu = (long)hcpu; 1217 struct radix_tree_preload *rtp; 1218 1219 /* Free per-cpu pool of perloaded nodes */ 1220 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1221 rtp = &per_cpu(radix_tree_preloads, cpu); 1222 while (rtp->nr) { 1223 kmem_cache_free(radix_tree_node_cachep, 1224 rtp->nodes[rtp->nr-1]); 1225 rtp->nodes[rtp->nr-1] = NULL; 1226 rtp->nr--; 1227 } 1228 } 1229 return NOTIFY_OK; 1230 } 1231 1232 void __init radix_tree_init(void) 1233 { 1234 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1235 sizeof(struct radix_tree_node), 0, 1236 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1237 radix_tree_node_ctor); 1238 radix_tree_init_maxindex(); 1239 hotcpu_notifier(radix_tree_callback, 0); 1240 } 1241