1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) 64 65 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; 66 67 /* 68 * Radix tree node cache. 69 */ 70 static struct kmem_cache *radix_tree_node_cachep; 71 72 /* 73 * Per-cpu pool of preloaded nodes 74 */ 75 struct radix_tree_preload { 76 int nr; 77 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 78 }; 79 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 80 81 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 82 { 83 return root->gfp_mask & __GFP_BITS_MASK; 84 } 85 86 /* 87 * This assumes that the caller has performed appropriate preallocation, and 88 * that the caller has pinned this thread of control to the current CPU. 89 */ 90 static struct radix_tree_node * 91 radix_tree_node_alloc(struct radix_tree_root *root) 92 { 93 struct radix_tree_node *ret; 94 gfp_t gfp_mask = root_gfp_mask(root); 95 96 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 97 if (ret == NULL && !(gfp_mask & __GFP_WAIT)) { 98 struct radix_tree_preload *rtp; 99 100 rtp = &__get_cpu_var(radix_tree_preloads); 101 if (rtp->nr) { 102 ret = rtp->nodes[rtp->nr - 1]; 103 rtp->nodes[rtp->nr - 1] = NULL; 104 rtp->nr--; 105 } 106 } 107 BUG_ON(radix_tree_is_direct_ptr(ret)); 108 return ret; 109 } 110 111 static void radix_tree_node_rcu_free(struct rcu_head *head) 112 { 113 struct radix_tree_node *node = 114 container_of(head, struct radix_tree_node, rcu_head); 115 kmem_cache_free(radix_tree_node_cachep, node); 116 } 117 118 static inline void 119 radix_tree_node_free(struct radix_tree_node *node) 120 { 121 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 122 } 123 124 /* 125 * Load up this CPU's radix_tree_node buffer with sufficient objects to 126 * ensure that the addition of a single element in the tree cannot fail. On 127 * success, return zero, with preemption disabled. On error, return -ENOMEM 128 * with preemption not disabled. 129 */ 130 int radix_tree_preload(gfp_t gfp_mask) 131 { 132 struct radix_tree_preload *rtp; 133 struct radix_tree_node *node; 134 int ret = -ENOMEM; 135 136 preempt_disable(); 137 rtp = &__get_cpu_var(radix_tree_preloads); 138 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 139 preempt_enable(); 140 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 141 if (node == NULL) 142 goto out; 143 preempt_disable(); 144 rtp = &__get_cpu_var(radix_tree_preloads); 145 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 146 rtp->nodes[rtp->nr++] = node; 147 else 148 kmem_cache_free(radix_tree_node_cachep, node); 149 } 150 ret = 0; 151 out: 152 return ret; 153 } 154 155 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 156 int offset) 157 { 158 __set_bit(offset, node->tags[tag]); 159 } 160 161 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 162 int offset) 163 { 164 __clear_bit(offset, node->tags[tag]); 165 } 166 167 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 168 int offset) 169 { 170 return test_bit(offset, node->tags[tag]); 171 } 172 173 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 174 { 175 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 176 } 177 178 179 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 180 { 181 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 182 } 183 184 static inline void root_tag_clear_all(struct radix_tree_root *root) 185 { 186 root->gfp_mask &= __GFP_BITS_MASK; 187 } 188 189 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 190 { 191 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 192 } 193 194 /* 195 * Returns 1 if any slot in the node has this tag set. 196 * Otherwise returns 0. 197 */ 198 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 199 { 200 int idx; 201 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 202 if (node->tags[tag][idx]) 203 return 1; 204 } 205 return 0; 206 } 207 208 /* 209 * Return the maximum key which can be store into a 210 * radix tree with height HEIGHT. 211 */ 212 static inline unsigned long radix_tree_maxindex(unsigned int height) 213 { 214 return height_to_maxindex[height]; 215 } 216 217 /* 218 * Extend a radix tree so it can store key @index. 219 */ 220 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 221 { 222 struct radix_tree_node *node; 223 unsigned int height; 224 int tag; 225 226 /* Figure out what the height should be. */ 227 height = root->height + 1; 228 while (index > radix_tree_maxindex(height)) 229 height++; 230 231 if (root->rnode == NULL) { 232 root->height = height; 233 goto out; 234 } 235 236 do { 237 unsigned int newheight; 238 if (!(node = radix_tree_node_alloc(root))) 239 return -ENOMEM; 240 241 /* Increase the height. */ 242 node->slots[0] = radix_tree_direct_to_ptr(root->rnode); 243 244 /* Propagate the aggregated tag info into the new root */ 245 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 246 if (root_tag_get(root, tag)) 247 tag_set(node, tag, 0); 248 } 249 250 newheight = root->height+1; 251 node->height = newheight; 252 node->count = 1; 253 rcu_assign_pointer(root->rnode, node); 254 root->height = newheight; 255 } while (height > root->height); 256 out: 257 return 0; 258 } 259 260 /** 261 * radix_tree_insert - insert into a radix tree 262 * @root: radix tree root 263 * @index: index key 264 * @item: item to insert 265 * 266 * Insert an item into the radix tree at position @index. 267 */ 268 int radix_tree_insert(struct radix_tree_root *root, 269 unsigned long index, void *item) 270 { 271 struct radix_tree_node *node = NULL, *slot; 272 unsigned int height, shift; 273 int offset; 274 int error; 275 276 BUG_ON(radix_tree_is_direct_ptr(item)); 277 278 /* Make sure the tree is high enough. */ 279 if (index > radix_tree_maxindex(root->height)) { 280 error = radix_tree_extend(root, index); 281 if (error) 282 return error; 283 } 284 285 slot = root->rnode; 286 height = root->height; 287 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 288 289 offset = 0; /* uninitialised var warning */ 290 while (height > 0) { 291 if (slot == NULL) { 292 /* Have to add a child node. */ 293 if (!(slot = radix_tree_node_alloc(root))) 294 return -ENOMEM; 295 slot->height = height; 296 if (node) { 297 rcu_assign_pointer(node->slots[offset], slot); 298 node->count++; 299 } else 300 rcu_assign_pointer(root->rnode, slot); 301 } 302 303 /* Go a level down */ 304 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 305 node = slot; 306 slot = node->slots[offset]; 307 shift -= RADIX_TREE_MAP_SHIFT; 308 height--; 309 } 310 311 if (slot != NULL) 312 return -EEXIST; 313 314 if (node) { 315 node->count++; 316 rcu_assign_pointer(node->slots[offset], item); 317 BUG_ON(tag_get(node, 0, offset)); 318 BUG_ON(tag_get(node, 1, offset)); 319 } else { 320 rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item)); 321 BUG_ON(root_tag_get(root, 0)); 322 BUG_ON(root_tag_get(root, 1)); 323 } 324 325 return 0; 326 } 327 EXPORT_SYMBOL(radix_tree_insert); 328 329 /** 330 * radix_tree_lookup_slot - lookup a slot in a radix tree 331 * @root: radix tree root 332 * @index: index key 333 * 334 * Returns: the slot corresponding to the position @index in the 335 * radix tree @root. This is useful for update-if-exists operations. 336 * 337 * This function cannot be called under rcu_read_lock, it must be 338 * excluded from writers, as must the returned slot for subsequent 339 * use by radix_tree_deref_slot() and radix_tree_replace slot. 340 * Caller must hold tree write locked across slot lookup and 341 * replace. 342 */ 343 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 344 { 345 unsigned int height, shift; 346 struct radix_tree_node *node, **slot; 347 348 node = root->rnode; 349 if (node == NULL) 350 return NULL; 351 352 if (radix_tree_is_direct_ptr(node)) { 353 if (index > 0) 354 return NULL; 355 return (void **)&root->rnode; 356 } 357 358 height = node->height; 359 if (index > radix_tree_maxindex(height)) 360 return NULL; 361 362 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 363 364 do { 365 slot = (struct radix_tree_node **) 366 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 367 node = *slot; 368 if (node == NULL) 369 return NULL; 370 371 shift -= RADIX_TREE_MAP_SHIFT; 372 height--; 373 } while (height > 0); 374 375 return (void **)slot; 376 } 377 EXPORT_SYMBOL(radix_tree_lookup_slot); 378 379 /** 380 * radix_tree_lookup - perform lookup operation on a radix tree 381 * @root: radix tree root 382 * @index: index key 383 * 384 * Lookup the item at the position @index in the radix tree @root. 385 * 386 * This function can be called under rcu_read_lock, however the caller 387 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 388 * them safely). No RCU barriers are required to access or modify the 389 * returned item, however. 390 */ 391 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 392 { 393 unsigned int height, shift; 394 struct radix_tree_node *node, **slot; 395 396 node = rcu_dereference(root->rnode); 397 if (node == NULL) 398 return NULL; 399 400 if (radix_tree_is_direct_ptr(node)) { 401 if (index > 0) 402 return NULL; 403 return radix_tree_direct_to_ptr(node); 404 } 405 406 height = node->height; 407 if (index > radix_tree_maxindex(height)) 408 return NULL; 409 410 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 411 412 do { 413 slot = (struct radix_tree_node **) 414 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 415 node = rcu_dereference(*slot); 416 if (node == NULL) 417 return NULL; 418 419 shift -= RADIX_TREE_MAP_SHIFT; 420 height--; 421 } while (height > 0); 422 423 return node; 424 } 425 EXPORT_SYMBOL(radix_tree_lookup); 426 427 /** 428 * radix_tree_tag_set - set a tag on a radix tree node 429 * @root: radix tree root 430 * @index: index key 431 * @tag: tag index 432 * 433 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 434 * corresponding to @index in the radix tree. From 435 * the root all the way down to the leaf node. 436 * 437 * Returns the address of the tagged item. Setting a tag on a not-present 438 * item is a bug. 439 */ 440 void *radix_tree_tag_set(struct radix_tree_root *root, 441 unsigned long index, unsigned int tag) 442 { 443 unsigned int height, shift; 444 struct radix_tree_node *slot; 445 446 height = root->height; 447 BUG_ON(index > radix_tree_maxindex(height)); 448 449 slot = root->rnode; 450 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 451 452 while (height > 0) { 453 int offset; 454 455 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 456 if (!tag_get(slot, tag, offset)) 457 tag_set(slot, tag, offset); 458 slot = slot->slots[offset]; 459 BUG_ON(slot == NULL); 460 shift -= RADIX_TREE_MAP_SHIFT; 461 height--; 462 } 463 464 /* set the root's tag bit */ 465 if (slot && !root_tag_get(root, tag)) 466 root_tag_set(root, tag); 467 468 return slot; 469 } 470 EXPORT_SYMBOL(radix_tree_tag_set); 471 472 /** 473 * radix_tree_tag_clear - clear a tag on a radix tree node 474 * @root: radix tree root 475 * @index: index key 476 * @tag: tag index 477 * 478 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 479 * corresponding to @index in the radix tree. If 480 * this causes the leaf node to have no tags set then clear the tag in the 481 * next-to-leaf node, etc. 482 * 483 * Returns the address of the tagged item on success, else NULL. ie: 484 * has the same return value and semantics as radix_tree_lookup(). 485 */ 486 void *radix_tree_tag_clear(struct radix_tree_root *root, 487 unsigned long index, unsigned int tag) 488 { 489 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; 490 struct radix_tree_node *slot = NULL; 491 unsigned int height, shift; 492 493 height = root->height; 494 if (index > radix_tree_maxindex(height)) 495 goto out; 496 497 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 498 pathp->node = NULL; 499 slot = root->rnode; 500 501 while (height > 0) { 502 int offset; 503 504 if (slot == NULL) 505 goto out; 506 507 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 508 pathp[1].offset = offset; 509 pathp[1].node = slot; 510 slot = slot->slots[offset]; 511 pathp++; 512 shift -= RADIX_TREE_MAP_SHIFT; 513 height--; 514 } 515 516 if (slot == NULL) 517 goto out; 518 519 while (pathp->node) { 520 if (!tag_get(pathp->node, tag, pathp->offset)) 521 goto out; 522 tag_clear(pathp->node, tag, pathp->offset); 523 if (any_tag_set(pathp->node, tag)) 524 goto out; 525 pathp--; 526 } 527 528 /* clear the root's tag bit */ 529 if (root_tag_get(root, tag)) 530 root_tag_clear(root, tag); 531 532 out: 533 return slot; 534 } 535 EXPORT_SYMBOL(radix_tree_tag_clear); 536 537 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 538 /** 539 * radix_tree_tag_get - get a tag on a radix tree node 540 * @root: radix tree root 541 * @index: index key 542 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 543 * 544 * Return values: 545 * 546 * 0: tag not present or not set 547 * 1: tag set 548 */ 549 int radix_tree_tag_get(struct radix_tree_root *root, 550 unsigned long index, unsigned int tag) 551 { 552 unsigned int height, shift; 553 struct radix_tree_node *node; 554 int saw_unset_tag = 0; 555 556 /* check the root's tag bit */ 557 if (!root_tag_get(root, tag)) 558 return 0; 559 560 node = rcu_dereference(root->rnode); 561 if (node == NULL) 562 return 0; 563 564 if (radix_tree_is_direct_ptr(node)) 565 return (index == 0); 566 567 height = node->height; 568 if (index > radix_tree_maxindex(height)) 569 return 0; 570 571 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 572 573 for ( ; ; ) { 574 int offset; 575 576 if (node == NULL) 577 return 0; 578 579 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 580 581 /* 582 * This is just a debug check. Later, we can bale as soon as 583 * we see an unset tag. 584 */ 585 if (!tag_get(node, tag, offset)) 586 saw_unset_tag = 1; 587 if (height == 1) { 588 int ret = tag_get(node, tag, offset); 589 590 BUG_ON(ret && saw_unset_tag); 591 return !!ret; 592 } 593 node = rcu_dereference(node->slots[offset]); 594 shift -= RADIX_TREE_MAP_SHIFT; 595 height--; 596 } 597 } 598 EXPORT_SYMBOL(radix_tree_tag_get); 599 #endif 600 601 static unsigned int 602 __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 603 unsigned int max_items, unsigned long *next_index) 604 { 605 unsigned int nr_found = 0; 606 unsigned int shift, height; 607 unsigned long i; 608 609 height = slot->height; 610 if (height == 0) 611 goto out; 612 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 613 614 for ( ; height > 1; height--) { 615 i = (index >> shift) & RADIX_TREE_MAP_MASK; 616 for (;;) { 617 if (slot->slots[i] != NULL) 618 break; 619 index &= ~((1UL << shift) - 1); 620 index += 1UL << shift; 621 if (index == 0) 622 goto out; /* 32-bit wraparound */ 623 i++; 624 if (i == RADIX_TREE_MAP_SIZE) 625 goto out; 626 } 627 628 shift -= RADIX_TREE_MAP_SHIFT; 629 slot = rcu_dereference(slot->slots[i]); 630 if (slot == NULL) 631 goto out; 632 } 633 634 /* Bottom level: grab some items */ 635 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 636 struct radix_tree_node *node; 637 index++; 638 node = slot->slots[i]; 639 if (node) { 640 results[nr_found++] = rcu_dereference(node); 641 if (nr_found == max_items) 642 goto out; 643 } 644 } 645 out: 646 *next_index = index; 647 return nr_found; 648 } 649 650 /** 651 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 652 * @root: radix tree root 653 * @results: where the results of the lookup are placed 654 * @first_index: start the lookup from this key 655 * @max_items: place up to this many items at *results 656 * 657 * Performs an index-ascending scan of the tree for present items. Places 658 * them at *@results and returns the number of items which were placed at 659 * *@results. 660 * 661 * The implementation is naive. 662 * 663 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 664 * rcu_read_lock. In this case, rather than the returned results being 665 * an atomic snapshot of the tree at a single point in time, the semantics 666 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 667 * have been issued in individual locks, and results stored in 'results'. 668 */ 669 unsigned int 670 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 671 unsigned long first_index, unsigned int max_items) 672 { 673 unsigned long max_index; 674 struct radix_tree_node *node; 675 unsigned long cur_index = first_index; 676 unsigned int ret; 677 678 node = rcu_dereference(root->rnode); 679 if (!node) 680 return 0; 681 682 if (radix_tree_is_direct_ptr(node)) { 683 if (first_index > 0) 684 return 0; 685 node = radix_tree_direct_to_ptr(node); 686 results[0] = rcu_dereference(node); 687 return 1; 688 } 689 690 max_index = radix_tree_maxindex(node->height); 691 692 ret = 0; 693 while (ret < max_items) { 694 unsigned int nr_found; 695 unsigned long next_index; /* Index of next search */ 696 697 if (cur_index > max_index) 698 break; 699 nr_found = __lookup(node, results + ret, cur_index, 700 max_items - ret, &next_index); 701 ret += nr_found; 702 if (next_index == 0) 703 break; 704 cur_index = next_index; 705 } 706 707 return ret; 708 } 709 EXPORT_SYMBOL(radix_tree_gang_lookup); 710 711 /* 712 * FIXME: the two tag_get()s here should use find_next_bit() instead of 713 * open-coding the search. 714 */ 715 static unsigned int 716 __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 717 unsigned int max_items, unsigned long *next_index, unsigned int tag) 718 { 719 unsigned int nr_found = 0; 720 unsigned int shift, height; 721 722 height = slot->height; 723 if (height == 0) 724 goto out; 725 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 726 727 while (height > 0) { 728 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 729 730 for (;;) { 731 if (tag_get(slot, tag, i)) 732 break; 733 index &= ~((1UL << shift) - 1); 734 index += 1UL << shift; 735 if (index == 0) 736 goto out; /* 32-bit wraparound */ 737 i++; 738 if (i == RADIX_TREE_MAP_SIZE) 739 goto out; 740 } 741 height--; 742 if (height == 0) { /* Bottom level: grab some items */ 743 unsigned long j = index & RADIX_TREE_MAP_MASK; 744 745 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 746 struct radix_tree_node *node; 747 index++; 748 if (!tag_get(slot, tag, j)) 749 continue; 750 node = slot->slots[j]; 751 /* 752 * Even though the tag was found set, we need to 753 * recheck that we have a non-NULL node, because 754 * if this lookup is lockless, it may have been 755 * subsequently deleted. 756 * 757 * Similar care must be taken in any place that 758 * lookup ->slots[x] without a lock (ie. can't 759 * rely on its value remaining the same). 760 */ 761 if (node) { 762 node = rcu_dereference(node); 763 results[nr_found++] = node; 764 if (nr_found == max_items) 765 goto out; 766 } 767 } 768 } 769 shift -= RADIX_TREE_MAP_SHIFT; 770 slot = rcu_dereference(slot->slots[i]); 771 if (slot == NULL) 772 break; 773 } 774 out: 775 *next_index = index; 776 return nr_found; 777 } 778 779 /** 780 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 781 * based on a tag 782 * @root: radix tree root 783 * @results: where the results of the lookup are placed 784 * @first_index: start the lookup from this key 785 * @max_items: place up to this many items at *results 786 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 787 * 788 * Performs an index-ascending scan of the tree for present items which 789 * have the tag indexed by @tag set. Places the items at *@results and 790 * returns the number of items which were placed at *@results. 791 */ 792 unsigned int 793 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 794 unsigned long first_index, unsigned int max_items, 795 unsigned int tag) 796 { 797 struct radix_tree_node *node; 798 unsigned long max_index; 799 unsigned long cur_index = first_index; 800 unsigned int ret; 801 802 /* check the root's tag bit */ 803 if (!root_tag_get(root, tag)) 804 return 0; 805 806 node = rcu_dereference(root->rnode); 807 if (!node) 808 return 0; 809 810 if (radix_tree_is_direct_ptr(node)) { 811 if (first_index > 0) 812 return 0; 813 node = radix_tree_direct_to_ptr(node); 814 results[0] = rcu_dereference(node); 815 return 1; 816 } 817 818 max_index = radix_tree_maxindex(node->height); 819 820 ret = 0; 821 while (ret < max_items) { 822 unsigned int nr_found; 823 unsigned long next_index; /* Index of next search */ 824 825 if (cur_index > max_index) 826 break; 827 nr_found = __lookup_tag(node, results + ret, cur_index, 828 max_items - ret, &next_index, tag); 829 ret += nr_found; 830 if (next_index == 0) 831 break; 832 cur_index = next_index; 833 } 834 835 return ret; 836 } 837 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 838 839 /** 840 * radix_tree_shrink - shrink height of a radix tree to minimal 841 * @root radix tree root 842 */ 843 static inline void radix_tree_shrink(struct radix_tree_root *root) 844 { 845 /* try to shrink tree height */ 846 while (root->height > 0 && 847 root->rnode->count == 1 && 848 root->rnode->slots[0]) { 849 struct radix_tree_node *to_free = root->rnode; 850 void *newptr; 851 852 /* 853 * We don't need rcu_assign_pointer(), since we are simply 854 * moving the node from one part of the tree to another. If 855 * it was safe to dereference the old pointer to it 856 * (to_free->slots[0]), it will be safe to dereference the new 857 * one (root->rnode). 858 */ 859 newptr = to_free->slots[0]; 860 if (root->height == 1) 861 newptr = radix_tree_ptr_to_direct(newptr); 862 root->rnode = newptr; 863 root->height--; 864 /* must only free zeroed nodes into the slab */ 865 tag_clear(to_free, 0, 0); 866 tag_clear(to_free, 1, 0); 867 to_free->slots[0] = NULL; 868 to_free->count = 0; 869 radix_tree_node_free(to_free); 870 } 871 } 872 873 /** 874 * radix_tree_delete - delete an item from a radix tree 875 * @root: radix tree root 876 * @index: index key 877 * 878 * Remove the item at @index from the radix tree rooted at @root. 879 * 880 * Returns the address of the deleted item, or NULL if it was not present. 881 */ 882 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 883 { 884 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; 885 struct radix_tree_node *slot = NULL; 886 struct radix_tree_node *to_free; 887 unsigned int height, shift; 888 int tag; 889 int offset; 890 891 height = root->height; 892 if (index > radix_tree_maxindex(height)) 893 goto out; 894 895 slot = root->rnode; 896 if (height == 0 && root->rnode) { 897 slot = radix_tree_direct_to_ptr(slot); 898 root_tag_clear_all(root); 899 root->rnode = NULL; 900 goto out; 901 } 902 903 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 904 pathp->node = NULL; 905 906 do { 907 if (slot == NULL) 908 goto out; 909 910 pathp++; 911 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 912 pathp->offset = offset; 913 pathp->node = slot; 914 slot = slot->slots[offset]; 915 shift -= RADIX_TREE_MAP_SHIFT; 916 height--; 917 } while (height > 0); 918 919 if (slot == NULL) 920 goto out; 921 922 /* 923 * Clear all tags associated with the just-deleted item 924 */ 925 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 926 if (tag_get(pathp->node, tag, pathp->offset)) 927 radix_tree_tag_clear(root, index, tag); 928 } 929 930 to_free = NULL; 931 /* Now free the nodes we do not need anymore */ 932 while (pathp->node) { 933 pathp->node->slots[pathp->offset] = NULL; 934 pathp->node->count--; 935 /* 936 * Queue the node for deferred freeing after the 937 * last reference to it disappears (set NULL, above). 938 */ 939 if (to_free) 940 radix_tree_node_free(to_free); 941 942 if (pathp->node->count) { 943 if (pathp->node == root->rnode) 944 radix_tree_shrink(root); 945 goto out; 946 } 947 948 /* Node with zero slots in use so free it */ 949 to_free = pathp->node; 950 pathp--; 951 952 } 953 root_tag_clear_all(root); 954 root->height = 0; 955 root->rnode = NULL; 956 if (to_free) 957 radix_tree_node_free(to_free); 958 959 out: 960 return slot; 961 } 962 EXPORT_SYMBOL(radix_tree_delete); 963 964 /** 965 * radix_tree_tagged - test whether any items in the tree are tagged 966 * @root: radix tree root 967 * @tag: tag to test 968 */ 969 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 970 { 971 return root_tag_get(root, tag); 972 } 973 EXPORT_SYMBOL(radix_tree_tagged); 974 975 static void 976 radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags) 977 { 978 memset(node, 0, sizeof(struct radix_tree_node)); 979 } 980 981 static __init unsigned long __maxindex(unsigned int height) 982 { 983 unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; 984 unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; 985 986 if (tmp >= RADIX_TREE_INDEX_BITS) 987 index = ~0UL; 988 return index; 989 } 990 991 static __init void radix_tree_init_maxindex(void) 992 { 993 unsigned int i; 994 995 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 996 height_to_maxindex[i] = __maxindex(i); 997 } 998 999 static int radix_tree_callback(struct notifier_block *nfb, 1000 unsigned long action, 1001 void *hcpu) 1002 { 1003 int cpu = (long)hcpu; 1004 struct radix_tree_preload *rtp; 1005 1006 /* Free per-cpu pool of perloaded nodes */ 1007 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1008 rtp = &per_cpu(radix_tree_preloads, cpu); 1009 while (rtp->nr) { 1010 kmem_cache_free(radix_tree_node_cachep, 1011 rtp->nodes[rtp->nr-1]); 1012 rtp->nodes[rtp->nr-1] = NULL; 1013 rtp->nr--; 1014 } 1015 } 1016 return NOTIFY_OK; 1017 } 1018 1019 void __init radix_tree_init(void) 1020 { 1021 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1022 sizeof(struct radix_tree_node), 0, 1023 SLAB_PANIC, radix_tree_node_ctor, NULL); 1024 radix_tree_init_maxindex(); 1025 hotcpu_notifier(radix_tree_callback, 0); 1026 } 1027