1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * Copyright (C) 2012 Konstantin Khlebnikov 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License as 10 * published by the Free Software Foundation; either version 2, or (at 11 * your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 */ 22 23 #include <linux/errno.h> 24 #include <linux/init.h> 25 #include <linux/kernel.h> 26 #include <linux/export.h> 27 #include <linux/radix-tree.h> 28 #include <linux/percpu.h> 29 #include <linux/slab.h> 30 #include <linux/kmemleak.h> 31 #include <linux/notifier.h> 32 #include <linux/cpu.h> 33 #include <linux/string.h> 34 #include <linux/bitops.h> 35 #include <linux/rcupdate.h> 36 #include <linux/preempt.h> /* in_interrupt() */ 37 38 39 /* 40 * The height_to_maxindex array needs to be one deeper than the maximum 41 * path as height 0 holds only 1 entry. 42 */ 43 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 44 45 /* 46 * Radix tree node cache. 47 */ 48 static struct kmem_cache *radix_tree_node_cachep; 49 50 /* 51 * The radix tree is variable-height, so an insert operation not only has 52 * to build the branch to its corresponding item, it also has to build the 53 * branch to existing items if the size has to be increased (by 54 * radix_tree_extend). 55 * 56 * The worst case is a zero height tree with just a single item at index 0, 57 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 58 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 59 * Hence: 60 */ 61 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 62 63 /* 64 * Per-cpu pool of preloaded nodes 65 */ 66 struct radix_tree_preload { 67 int nr; 68 /* nodes->private_data points to next preallocated node */ 69 struct radix_tree_node *nodes; 70 }; 71 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 72 73 static inline void *ptr_to_indirect(void *ptr) 74 { 75 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); 76 } 77 78 static inline void *indirect_to_ptr(void *ptr) 79 { 80 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); 81 } 82 83 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 84 { 85 return root->gfp_mask & __GFP_BITS_MASK; 86 } 87 88 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 89 int offset) 90 { 91 __set_bit(offset, node->tags[tag]); 92 } 93 94 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 95 int offset) 96 { 97 __clear_bit(offset, node->tags[tag]); 98 } 99 100 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 101 int offset) 102 { 103 return test_bit(offset, node->tags[tag]); 104 } 105 106 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 107 { 108 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 109 } 110 111 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 112 { 113 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 114 } 115 116 static inline void root_tag_clear_all(struct radix_tree_root *root) 117 { 118 root->gfp_mask &= __GFP_BITS_MASK; 119 } 120 121 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 122 { 123 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 124 } 125 126 /* 127 * Returns 1 if any slot in the node has this tag set. 128 * Otherwise returns 0. 129 */ 130 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 131 { 132 int idx; 133 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 134 if (node->tags[tag][idx]) 135 return 1; 136 } 137 return 0; 138 } 139 140 /** 141 * radix_tree_find_next_bit - find the next set bit in a memory region 142 * 143 * @addr: The address to base the search on 144 * @size: The bitmap size in bits 145 * @offset: The bitnumber to start searching at 146 * 147 * Unrollable variant of find_next_bit() for constant size arrays. 148 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 149 * Returns next bit offset, or size if nothing found. 150 */ 151 static __always_inline unsigned long 152 radix_tree_find_next_bit(const unsigned long *addr, 153 unsigned long size, unsigned long offset) 154 { 155 if (!__builtin_constant_p(size)) 156 return find_next_bit(addr, size, offset); 157 158 if (offset < size) { 159 unsigned long tmp; 160 161 addr += offset / BITS_PER_LONG; 162 tmp = *addr >> (offset % BITS_PER_LONG); 163 if (tmp) 164 return __ffs(tmp) + offset; 165 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 166 while (offset < size) { 167 tmp = *++addr; 168 if (tmp) 169 return __ffs(tmp) + offset; 170 offset += BITS_PER_LONG; 171 } 172 } 173 return size; 174 } 175 176 #if 0 177 static void dump_node(void *slot, int height, int offset) 178 { 179 struct radix_tree_node *node; 180 int i; 181 182 if (!slot) 183 return; 184 185 if (height == 0) { 186 pr_debug("radix entry %p offset %d\n", slot, offset); 187 return; 188 } 189 190 node = indirect_to_ptr(slot); 191 pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", 192 slot, offset, node->tags[0][0], node->tags[1][0], 193 node->tags[2][0], node->path, node->count, node->parent); 194 195 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) 196 dump_node(node->slots[i], height - 1, i); 197 } 198 199 /* For debug */ 200 static void radix_tree_dump(struct radix_tree_root *root) 201 { 202 pr_debug("radix root: %p height %d rnode %p tags %x\n", 203 root, root->height, root->rnode, 204 root->gfp_mask >> __GFP_BITS_SHIFT); 205 if (!radix_tree_is_indirect_ptr(root->rnode)) 206 return; 207 dump_node(root->rnode, root->height, 0); 208 } 209 #endif 210 211 /* 212 * This assumes that the caller has performed appropriate preallocation, and 213 * that the caller has pinned this thread of control to the current CPU. 214 */ 215 static struct radix_tree_node * 216 radix_tree_node_alloc(struct radix_tree_root *root) 217 { 218 struct radix_tree_node *ret = NULL; 219 gfp_t gfp_mask = root_gfp_mask(root); 220 221 /* 222 * Preload code isn't irq safe and it doesn't make sence to use 223 * preloading in the interrupt anyway as all the allocations have to 224 * be atomic. So just do normal allocation when in interrupt. 225 */ 226 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 227 struct radix_tree_preload *rtp; 228 229 /* 230 * Even if the caller has preloaded, try to allocate from the 231 * cache first for the new node to get accounted. 232 */ 233 ret = kmem_cache_alloc(radix_tree_node_cachep, 234 gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN); 235 if (ret) 236 goto out; 237 238 /* 239 * Provided the caller has preloaded here, we will always 240 * succeed in getting a node here (and never reach 241 * kmem_cache_alloc) 242 */ 243 rtp = this_cpu_ptr(&radix_tree_preloads); 244 if (rtp->nr) { 245 ret = rtp->nodes; 246 rtp->nodes = ret->private_data; 247 ret->private_data = NULL; 248 rtp->nr--; 249 } 250 /* 251 * Update the allocation stack trace as this is more useful 252 * for debugging. 253 */ 254 kmemleak_update_trace(ret); 255 goto out; 256 } 257 ret = kmem_cache_alloc(radix_tree_node_cachep, 258 gfp_mask | __GFP_ACCOUNT); 259 out: 260 BUG_ON(radix_tree_is_indirect_ptr(ret)); 261 return ret; 262 } 263 264 static void radix_tree_node_rcu_free(struct rcu_head *head) 265 { 266 struct radix_tree_node *node = 267 container_of(head, struct radix_tree_node, rcu_head); 268 int i; 269 270 /* 271 * must only free zeroed nodes into the slab. radix_tree_shrink 272 * can leave us with a non-NULL entry in the first slot, so clear 273 * that here to make sure. 274 */ 275 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) 276 tag_clear(node, i, 0); 277 278 node->slots[0] = NULL; 279 node->count = 0; 280 281 kmem_cache_free(radix_tree_node_cachep, node); 282 } 283 284 static inline void 285 radix_tree_node_free(struct radix_tree_node *node) 286 { 287 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 288 } 289 290 /* 291 * Load up this CPU's radix_tree_node buffer with sufficient objects to 292 * ensure that the addition of a single element in the tree cannot fail. On 293 * success, return zero, with preemption disabled. On error, return -ENOMEM 294 * with preemption not disabled. 295 * 296 * To make use of this facility, the radix tree must be initialised without 297 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 298 */ 299 static int __radix_tree_preload(gfp_t gfp_mask) 300 { 301 struct radix_tree_preload *rtp; 302 struct radix_tree_node *node; 303 int ret = -ENOMEM; 304 305 preempt_disable(); 306 rtp = this_cpu_ptr(&radix_tree_preloads); 307 while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { 308 preempt_enable(); 309 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 310 if (node == NULL) 311 goto out; 312 preempt_disable(); 313 rtp = this_cpu_ptr(&radix_tree_preloads); 314 if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { 315 node->private_data = rtp->nodes; 316 rtp->nodes = node; 317 rtp->nr++; 318 } else { 319 kmem_cache_free(radix_tree_node_cachep, node); 320 } 321 } 322 ret = 0; 323 out: 324 return ret; 325 } 326 327 /* 328 * Load up this CPU's radix_tree_node buffer with sufficient objects to 329 * ensure that the addition of a single element in the tree cannot fail. On 330 * success, return zero, with preemption disabled. On error, return -ENOMEM 331 * with preemption not disabled. 332 * 333 * To make use of this facility, the radix tree must be initialised without 334 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 335 */ 336 int radix_tree_preload(gfp_t gfp_mask) 337 { 338 /* Warn on non-sensical use... */ 339 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 340 return __radix_tree_preload(gfp_mask); 341 } 342 EXPORT_SYMBOL(radix_tree_preload); 343 344 /* 345 * The same as above function, except we don't guarantee preloading happens. 346 * We do it, if we decide it helps. On success, return zero with preemption 347 * disabled. On error, return -ENOMEM with preemption not disabled. 348 */ 349 int radix_tree_maybe_preload(gfp_t gfp_mask) 350 { 351 if (gfpflags_allow_blocking(gfp_mask)) 352 return __radix_tree_preload(gfp_mask); 353 /* Preloading doesn't help anything with this gfp mask, skip it */ 354 preempt_disable(); 355 return 0; 356 } 357 EXPORT_SYMBOL(radix_tree_maybe_preload); 358 359 /* 360 * Return the maximum key which can be store into a 361 * radix tree with height HEIGHT. 362 */ 363 static inline unsigned long radix_tree_maxindex(unsigned int height) 364 { 365 return height_to_maxindex[height]; 366 } 367 368 /* 369 * Extend a radix tree so it can store key @index. 370 */ 371 static int radix_tree_extend(struct radix_tree_root *root, 372 unsigned long index, unsigned order) 373 { 374 struct radix_tree_node *node; 375 struct radix_tree_node *slot; 376 unsigned int height; 377 int tag; 378 379 /* Figure out what the height should be. */ 380 height = root->height + 1; 381 while (index > radix_tree_maxindex(height)) 382 height++; 383 384 if ((root->rnode == NULL) && (order == 0)) { 385 root->height = height; 386 goto out; 387 } 388 389 do { 390 unsigned int newheight; 391 if (!(node = radix_tree_node_alloc(root))) 392 return -ENOMEM; 393 394 /* Propagate the aggregated tag info into the new root */ 395 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 396 if (root_tag_get(root, tag)) 397 tag_set(node, tag, 0); 398 } 399 400 /* Increase the height. */ 401 newheight = root->height+1; 402 BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK); 403 node->path = newheight; 404 node->count = 1; 405 node->parent = NULL; 406 slot = root->rnode; 407 if (radix_tree_is_indirect_ptr(slot) && newheight > 1) { 408 slot = indirect_to_ptr(slot); 409 slot->parent = node; 410 slot = ptr_to_indirect(slot); 411 } 412 node->slots[0] = slot; 413 node = ptr_to_indirect(node); 414 rcu_assign_pointer(root->rnode, node); 415 root->height = newheight; 416 } while (height > root->height); 417 out: 418 return 0; 419 } 420 421 /** 422 * __radix_tree_create - create a slot in a radix tree 423 * @root: radix tree root 424 * @index: index key 425 * @order: index occupies 2^order aligned slots 426 * @nodep: returns node 427 * @slotp: returns slot 428 * 429 * Create, if necessary, and return the node and slot for an item 430 * at position @index in the radix tree @root. 431 * 432 * Until there is more than one item in the tree, no nodes are 433 * allocated and @root->rnode is used as a direct slot instead of 434 * pointing to a node, in which case *@nodep will be NULL. 435 * 436 * Returns -ENOMEM, or 0 for success. 437 */ 438 int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 439 unsigned order, struct radix_tree_node **nodep, 440 void ***slotp) 441 { 442 struct radix_tree_node *node = NULL, *slot; 443 unsigned int height, shift, offset; 444 int error; 445 446 BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT)); 447 448 /* Make sure the tree is high enough. */ 449 if (index > radix_tree_maxindex(root->height)) { 450 error = radix_tree_extend(root, index, order); 451 if (error) 452 return error; 453 } 454 455 slot = root->rnode; 456 457 height = root->height; 458 shift = height * RADIX_TREE_MAP_SHIFT; 459 460 offset = 0; /* uninitialised var warning */ 461 while (shift > order) { 462 if (slot == NULL) { 463 /* Have to add a child node. */ 464 if (!(slot = radix_tree_node_alloc(root))) 465 return -ENOMEM; 466 slot->path = height; 467 slot->parent = node; 468 if (node) { 469 rcu_assign_pointer(node->slots[offset], 470 ptr_to_indirect(slot)); 471 node->count++; 472 slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; 473 } else 474 rcu_assign_pointer(root->rnode, 475 ptr_to_indirect(slot)); 476 } else if (!radix_tree_is_indirect_ptr(slot)) 477 break; 478 479 /* Go a level down */ 480 height--; 481 shift -= RADIX_TREE_MAP_SHIFT; 482 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 483 node = indirect_to_ptr(slot); 484 slot = node->slots[offset]; 485 } 486 487 /* Insert pointers to the canonical entry */ 488 if ((shift - order) > 0) { 489 int i, n = 1 << (shift - order); 490 offset = offset & ~(n - 1); 491 slot = ptr_to_indirect(&node->slots[offset]); 492 for (i = 0; i < n; i++) { 493 if (node->slots[offset + i]) 494 return -EEXIST; 495 } 496 497 for (i = 1; i < n; i++) { 498 rcu_assign_pointer(node->slots[offset + i], slot); 499 node->count++; 500 } 501 } 502 503 if (nodep) 504 *nodep = node; 505 if (slotp) 506 *slotp = node ? node->slots + offset : (void **)&root->rnode; 507 return 0; 508 } 509 510 /** 511 * __radix_tree_insert - insert into a radix tree 512 * @root: radix tree root 513 * @index: index key 514 * @order: key covers the 2^order indices around index 515 * @item: item to insert 516 * 517 * Insert an item into the radix tree at position @index. 518 */ 519 int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, 520 unsigned order, void *item) 521 { 522 struct radix_tree_node *node; 523 void **slot; 524 int error; 525 526 BUG_ON(radix_tree_is_indirect_ptr(item)); 527 528 error = __radix_tree_create(root, index, order, &node, &slot); 529 if (error) 530 return error; 531 if (*slot != NULL) 532 return -EEXIST; 533 rcu_assign_pointer(*slot, item); 534 535 if (node) { 536 node->count++; 537 BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); 538 BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); 539 } else { 540 BUG_ON(root_tag_get(root, 0)); 541 BUG_ON(root_tag_get(root, 1)); 542 } 543 544 return 0; 545 } 546 EXPORT_SYMBOL(__radix_tree_insert); 547 548 /** 549 * __radix_tree_lookup - lookup an item in a radix tree 550 * @root: radix tree root 551 * @index: index key 552 * @nodep: returns node 553 * @slotp: returns slot 554 * 555 * Lookup and return the item at position @index in the radix 556 * tree @root. 557 * 558 * Until there is more than one item in the tree, no nodes are 559 * allocated and @root->rnode is used as a direct slot instead of 560 * pointing to a node, in which case *@nodep will be NULL. 561 */ 562 void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, 563 struct radix_tree_node **nodep, void ***slotp) 564 { 565 struct radix_tree_node *node, *parent; 566 unsigned int height, shift; 567 void **slot; 568 569 node = rcu_dereference_raw(root->rnode); 570 if (node == NULL) 571 return NULL; 572 573 if (!radix_tree_is_indirect_ptr(node)) { 574 if (index > 0) 575 return NULL; 576 577 if (nodep) 578 *nodep = NULL; 579 if (slotp) 580 *slotp = (void **)&root->rnode; 581 return node; 582 } 583 node = indirect_to_ptr(node); 584 585 height = node->path & RADIX_TREE_HEIGHT_MASK; 586 if (index > radix_tree_maxindex(height)) 587 return NULL; 588 589 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 590 591 do { 592 parent = node; 593 slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK); 594 node = rcu_dereference_raw(*slot); 595 if (node == NULL) 596 return NULL; 597 if (!radix_tree_is_indirect_ptr(node)) 598 break; 599 node = indirect_to_ptr(node); 600 601 shift -= RADIX_TREE_MAP_SHIFT; 602 height--; 603 } while (height > 0); 604 605 if (nodep) 606 *nodep = parent; 607 if (slotp) 608 *slotp = slot; 609 return node; 610 } 611 612 /** 613 * radix_tree_lookup_slot - lookup a slot in a radix tree 614 * @root: radix tree root 615 * @index: index key 616 * 617 * Returns: the slot corresponding to the position @index in the 618 * radix tree @root. This is useful for update-if-exists operations. 619 * 620 * This function can be called under rcu_read_lock iff the slot is not 621 * modified by radix_tree_replace_slot, otherwise it must be called 622 * exclusive from other writers. Any dereference of the slot must be done 623 * using radix_tree_deref_slot. 624 */ 625 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 626 { 627 void **slot; 628 629 if (!__radix_tree_lookup(root, index, NULL, &slot)) 630 return NULL; 631 return slot; 632 } 633 EXPORT_SYMBOL(radix_tree_lookup_slot); 634 635 /** 636 * radix_tree_lookup - perform lookup operation on a radix tree 637 * @root: radix tree root 638 * @index: index key 639 * 640 * Lookup the item at the position @index in the radix tree @root. 641 * 642 * This function can be called under rcu_read_lock, however the caller 643 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 644 * them safely). No RCU barriers are required to access or modify the 645 * returned item, however. 646 */ 647 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 648 { 649 return __radix_tree_lookup(root, index, NULL, NULL); 650 } 651 EXPORT_SYMBOL(radix_tree_lookup); 652 653 /** 654 * radix_tree_tag_set - set a tag on a radix tree node 655 * @root: radix tree root 656 * @index: index key 657 * @tag: tag index 658 * 659 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 660 * corresponding to @index in the radix tree. From 661 * the root all the way down to the leaf node. 662 * 663 * Returns the address of the tagged item. Setting a tag on a not-present 664 * item is a bug. 665 */ 666 void *radix_tree_tag_set(struct radix_tree_root *root, 667 unsigned long index, unsigned int tag) 668 { 669 unsigned int height, shift; 670 struct radix_tree_node *slot; 671 672 height = root->height; 673 BUG_ON(index > radix_tree_maxindex(height)); 674 675 slot = indirect_to_ptr(root->rnode); 676 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 677 678 while (height > 0) { 679 int offset; 680 681 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 682 if (!tag_get(slot, tag, offset)) 683 tag_set(slot, tag, offset); 684 slot = slot->slots[offset]; 685 BUG_ON(slot == NULL); 686 if (!radix_tree_is_indirect_ptr(slot)) 687 break; 688 slot = indirect_to_ptr(slot); 689 shift -= RADIX_TREE_MAP_SHIFT; 690 height--; 691 } 692 693 /* set the root's tag bit */ 694 if (slot && !root_tag_get(root, tag)) 695 root_tag_set(root, tag); 696 697 return slot; 698 } 699 EXPORT_SYMBOL(radix_tree_tag_set); 700 701 /** 702 * radix_tree_tag_clear - clear a tag on a radix tree node 703 * @root: radix tree root 704 * @index: index key 705 * @tag: tag index 706 * 707 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 708 * corresponding to @index in the radix tree. If 709 * this causes the leaf node to have no tags set then clear the tag in the 710 * next-to-leaf node, etc. 711 * 712 * Returns the address of the tagged item on success, else NULL. ie: 713 * has the same return value and semantics as radix_tree_lookup(). 714 */ 715 void *radix_tree_tag_clear(struct radix_tree_root *root, 716 unsigned long index, unsigned int tag) 717 { 718 struct radix_tree_node *node = NULL; 719 struct radix_tree_node *slot = NULL; 720 unsigned int height, shift; 721 int uninitialized_var(offset); 722 723 height = root->height; 724 if (index > radix_tree_maxindex(height)) 725 goto out; 726 727 shift = height * RADIX_TREE_MAP_SHIFT; 728 slot = root->rnode; 729 730 while (shift) { 731 if (slot == NULL) 732 goto out; 733 if (!radix_tree_is_indirect_ptr(slot)) 734 break; 735 slot = indirect_to_ptr(slot); 736 737 shift -= RADIX_TREE_MAP_SHIFT; 738 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 739 node = slot; 740 slot = slot->slots[offset]; 741 } 742 743 if (slot == NULL) 744 goto out; 745 746 while (node) { 747 if (!tag_get(node, tag, offset)) 748 goto out; 749 tag_clear(node, tag, offset); 750 if (any_tag_set(node, tag)) 751 goto out; 752 753 index >>= RADIX_TREE_MAP_SHIFT; 754 offset = index & RADIX_TREE_MAP_MASK; 755 node = node->parent; 756 } 757 758 /* clear the root's tag bit */ 759 if (root_tag_get(root, tag)) 760 root_tag_clear(root, tag); 761 762 out: 763 return slot; 764 } 765 EXPORT_SYMBOL(radix_tree_tag_clear); 766 767 /** 768 * radix_tree_tag_get - get a tag on a radix tree node 769 * @root: radix tree root 770 * @index: index key 771 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 772 * 773 * Return values: 774 * 775 * 0: tag not present or not set 776 * 1: tag set 777 * 778 * Note that the return value of this function may not be relied on, even if 779 * the RCU lock is held, unless tag modification and node deletion are excluded 780 * from concurrency. 781 */ 782 int radix_tree_tag_get(struct radix_tree_root *root, 783 unsigned long index, unsigned int tag) 784 { 785 unsigned int height, shift; 786 struct radix_tree_node *node; 787 788 /* check the root's tag bit */ 789 if (!root_tag_get(root, tag)) 790 return 0; 791 792 node = rcu_dereference_raw(root->rnode); 793 if (node == NULL) 794 return 0; 795 796 if (!radix_tree_is_indirect_ptr(node)) 797 return (index == 0); 798 node = indirect_to_ptr(node); 799 800 height = node->path & RADIX_TREE_HEIGHT_MASK; 801 if (index > radix_tree_maxindex(height)) 802 return 0; 803 804 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 805 806 for ( ; ; ) { 807 int offset; 808 809 if (node == NULL) 810 return 0; 811 node = indirect_to_ptr(node); 812 813 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 814 if (!tag_get(node, tag, offset)) 815 return 0; 816 if (height == 1) 817 return 1; 818 node = rcu_dereference_raw(node->slots[offset]); 819 if (!radix_tree_is_indirect_ptr(node)) 820 return 1; 821 shift -= RADIX_TREE_MAP_SHIFT; 822 height--; 823 } 824 } 825 EXPORT_SYMBOL(radix_tree_tag_get); 826 827 /** 828 * radix_tree_next_chunk - find next chunk of slots for iteration 829 * 830 * @root: radix tree root 831 * @iter: iterator state 832 * @flags: RADIX_TREE_ITER_* flags and tag index 833 * Returns: pointer to chunk first slot, or NULL if iteration is over 834 */ 835 void **radix_tree_next_chunk(struct radix_tree_root *root, 836 struct radix_tree_iter *iter, unsigned flags) 837 { 838 unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; 839 struct radix_tree_node *rnode, *node; 840 unsigned long index, offset, height; 841 842 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 843 return NULL; 844 845 /* 846 * Catch next_index overflow after ~0UL. iter->index never overflows 847 * during iterating; it can be zero only at the beginning. 848 * And we cannot overflow iter->next_index in a single step, 849 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 850 * 851 * This condition also used by radix_tree_next_slot() to stop 852 * contiguous iterating, and forbid swithing to the next chunk. 853 */ 854 index = iter->next_index; 855 if (!index && iter->index) 856 return NULL; 857 858 rnode = rcu_dereference_raw(root->rnode); 859 if (radix_tree_is_indirect_ptr(rnode)) { 860 rnode = indirect_to_ptr(rnode); 861 } else if (rnode && !index) { 862 /* Single-slot tree */ 863 iter->index = 0; 864 iter->next_index = 1; 865 iter->tags = 1; 866 return (void **)&root->rnode; 867 } else 868 return NULL; 869 870 restart: 871 height = rnode->path & RADIX_TREE_HEIGHT_MASK; 872 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 873 offset = index >> shift; 874 875 /* Index outside of the tree */ 876 if (offset >= RADIX_TREE_MAP_SIZE) 877 return NULL; 878 879 node = rnode; 880 while (1) { 881 struct radix_tree_node *slot; 882 if ((flags & RADIX_TREE_ITER_TAGGED) ? 883 !test_bit(offset, node->tags[tag]) : 884 !node->slots[offset]) { 885 /* Hole detected */ 886 if (flags & RADIX_TREE_ITER_CONTIG) 887 return NULL; 888 889 if (flags & RADIX_TREE_ITER_TAGGED) 890 offset = radix_tree_find_next_bit( 891 node->tags[tag], 892 RADIX_TREE_MAP_SIZE, 893 offset + 1); 894 else 895 while (++offset < RADIX_TREE_MAP_SIZE) { 896 if (node->slots[offset]) 897 break; 898 } 899 index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); 900 index += offset << shift; 901 /* Overflow after ~0UL */ 902 if (!index) 903 return NULL; 904 if (offset == RADIX_TREE_MAP_SIZE) 905 goto restart; 906 } 907 908 /* This is leaf-node */ 909 if (!shift) 910 break; 911 912 slot = rcu_dereference_raw(node->slots[offset]); 913 if (slot == NULL) 914 goto restart; 915 if (!radix_tree_is_indirect_ptr(slot)) 916 break; 917 node = indirect_to_ptr(slot); 918 shift -= RADIX_TREE_MAP_SHIFT; 919 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 920 } 921 922 /* Update the iterator state */ 923 iter->index = index; 924 iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1; 925 926 /* Construct iter->tags bit-mask from node->tags[tag] array */ 927 if (flags & RADIX_TREE_ITER_TAGGED) { 928 unsigned tag_long, tag_bit; 929 930 tag_long = offset / BITS_PER_LONG; 931 tag_bit = offset % BITS_PER_LONG; 932 iter->tags = node->tags[tag][tag_long] >> tag_bit; 933 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 934 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 935 /* Pick tags from next element */ 936 if (tag_bit) 937 iter->tags |= node->tags[tag][tag_long + 1] << 938 (BITS_PER_LONG - tag_bit); 939 /* Clip chunk size, here only BITS_PER_LONG tags */ 940 iter->next_index = index + BITS_PER_LONG; 941 } 942 } 943 944 return node->slots + offset; 945 } 946 EXPORT_SYMBOL(radix_tree_next_chunk); 947 948 /** 949 * radix_tree_range_tag_if_tagged - for each item in given range set given 950 * tag if item has another tag set 951 * @root: radix tree root 952 * @first_indexp: pointer to a starting index of a range to scan 953 * @last_index: last index of a range to scan 954 * @nr_to_tag: maximum number items to tag 955 * @iftag: tag index to test 956 * @settag: tag index to set if tested tag is set 957 * 958 * This function scans range of radix tree from first_index to last_index 959 * (inclusive). For each item in the range if iftag is set, the function sets 960 * also settag. The function stops either after tagging nr_to_tag items or 961 * after reaching last_index. 962 * 963 * The tags must be set from the leaf level only and propagated back up the 964 * path to the root. We must do this so that we resolve the full path before 965 * setting any tags on intermediate nodes. If we set tags as we descend, then 966 * we can get to the leaf node and find that the index that has the iftag 967 * set is outside the range we are scanning. This reults in dangling tags and 968 * can lead to problems with later tag operations (e.g. livelocks on lookups). 969 * 970 * The function returns number of leaves where the tag was set and sets 971 * *first_indexp to the first unscanned index. 972 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must 973 * be prepared to handle that. 974 */ 975 unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, 976 unsigned long *first_indexp, unsigned long last_index, 977 unsigned long nr_to_tag, 978 unsigned int iftag, unsigned int settag) 979 { 980 unsigned int height = root->height; 981 struct radix_tree_node *node = NULL; 982 struct radix_tree_node *slot; 983 unsigned int shift; 984 unsigned long tagged = 0; 985 unsigned long index = *first_indexp; 986 987 last_index = min(last_index, radix_tree_maxindex(height)); 988 if (index > last_index) 989 return 0; 990 if (!nr_to_tag) 991 return 0; 992 if (!root_tag_get(root, iftag)) { 993 *first_indexp = last_index + 1; 994 return 0; 995 } 996 if (height == 0) { 997 *first_indexp = last_index + 1; 998 root_tag_set(root, settag); 999 return 1; 1000 } 1001 1002 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1003 slot = indirect_to_ptr(root->rnode); 1004 1005 for (;;) { 1006 unsigned long upindex; 1007 int offset; 1008 1009 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1010 if (!slot->slots[offset]) 1011 goto next; 1012 if (!tag_get(slot, iftag, offset)) 1013 goto next; 1014 if (shift) { 1015 node = slot; 1016 slot = slot->slots[offset]; 1017 if (radix_tree_is_indirect_ptr(slot)) { 1018 slot = indirect_to_ptr(slot); 1019 shift -= RADIX_TREE_MAP_SHIFT; 1020 continue; 1021 } else { 1022 slot = node; 1023 node = node->parent; 1024 } 1025 } 1026 1027 /* tag the leaf */ 1028 tagged += 1 << shift; 1029 tag_set(slot, settag, offset); 1030 1031 /* walk back up the path tagging interior nodes */ 1032 upindex = index; 1033 while (node) { 1034 upindex >>= RADIX_TREE_MAP_SHIFT; 1035 offset = upindex & RADIX_TREE_MAP_MASK; 1036 1037 /* stop if we find a node with the tag already set */ 1038 if (tag_get(node, settag, offset)) 1039 break; 1040 tag_set(node, settag, offset); 1041 node = node->parent; 1042 } 1043 1044 /* 1045 * Small optimization: now clear that node pointer. 1046 * Since all of this slot's ancestors now have the tag set 1047 * from setting it above, we have no further need to walk 1048 * back up the tree setting tags, until we update slot to 1049 * point to another radix_tree_node. 1050 */ 1051 node = NULL; 1052 1053 next: 1054 /* Go to next item at level determined by 'shift' */ 1055 index = ((index >> shift) + 1) << shift; 1056 /* Overflow can happen when last_index is ~0UL... */ 1057 if (index > last_index || !index) 1058 break; 1059 if (tagged >= nr_to_tag) 1060 break; 1061 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { 1062 /* 1063 * We've fully scanned this node. Go up. Because 1064 * last_index is guaranteed to be in the tree, what 1065 * we do below cannot wander astray. 1066 */ 1067 slot = slot->parent; 1068 shift += RADIX_TREE_MAP_SHIFT; 1069 } 1070 } 1071 /* 1072 * We need not to tag the root tag if there is no tag which is set with 1073 * settag within the range from *first_indexp to last_index. 1074 */ 1075 if (tagged > 0) 1076 root_tag_set(root, settag); 1077 *first_indexp = index; 1078 1079 return tagged; 1080 } 1081 EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); 1082 1083 /** 1084 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1085 * @root: radix tree root 1086 * @results: where the results of the lookup are placed 1087 * @first_index: start the lookup from this key 1088 * @max_items: place up to this many items at *results 1089 * 1090 * Performs an index-ascending scan of the tree for present items. Places 1091 * them at *@results and returns the number of items which were placed at 1092 * *@results. 1093 * 1094 * The implementation is naive. 1095 * 1096 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1097 * rcu_read_lock. In this case, rather than the returned results being 1098 * an atomic snapshot of the tree at a single point in time, the semantics 1099 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 1100 * have been issued in individual locks, and results stored in 'results'. 1101 */ 1102 unsigned int 1103 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 1104 unsigned long first_index, unsigned int max_items) 1105 { 1106 struct radix_tree_iter iter; 1107 void **slot; 1108 unsigned int ret = 0; 1109 1110 if (unlikely(!max_items)) 1111 return 0; 1112 1113 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1114 results[ret] = rcu_dereference_raw(*slot); 1115 if (!results[ret]) 1116 continue; 1117 if (radix_tree_is_indirect_ptr(results[ret])) { 1118 slot = radix_tree_iter_retry(&iter); 1119 continue; 1120 } 1121 if (++ret == max_items) 1122 break; 1123 } 1124 1125 return ret; 1126 } 1127 EXPORT_SYMBOL(radix_tree_gang_lookup); 1128 1129 /** 1130 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 1131 * @root: radix tree root 1132 * @results: where the results of the lookup are placed 1133 * @indices: where their indices should be placed (but usually NULL) 1134 * @first_index: start the lookup from this key 1135 * @max_items: place up to this many items at *results 1136 * 1137 * Performs an index-ascending scan of the tree for present items. Places 1138 * their slots at *@results and returns the number of items which were 1139 * placed at *@results. 1140 * 1141 * The implementation is naive. 1142 * 1143 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 1144 * be dereferenced with radix_tree_deref_slot, and if using only RCU 1145 * protection, radix_tree_deref_slot may fail requiring a retry. 1146 */ 1147 unsigned int 1148 radix_tree_gang_lookup_slot(struct radix_tree_root *root, 1149 void ***results, unsigned long *indices, 1150 unsigned long first_index, unsigned int max_items) 1151 { 1152 struct radix_tree_iter iter; 1153 void **slot; 1154 unsigned int ret = 0; 1155 1156 if (unlikely(!max_items)) 1157 return 0; 1158 1159 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1160 results[ret] = slot; 1161 if (indices) 1162 indices[ret] = iter.index; 1163 if (++ret == max_items) 1164 break; 1165 } 1166 1167 return ret; 1168 } 1169 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 1170 1171 /** 1172 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1173 * based on a tag 1174 * @root: radix tree root 1175 * @results: where the results of the lookup are placed 1176 * @first_index: start the lookup from this key 1177 * @max_items: place up to this many items at *results 1178 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1179 * 1180 * Performs an index-ascending scan of the tree for present items which 1181 * have the tag indexed by @tag set. Places the items at *@results and 1182 * returns the number of items which were placed at *@results. 1183 */ 1184 unsigned int 1185 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 1186 unsigned long first_index, unsigned int max_items, 1187 unsigned int tag) 1188 { 1189 struct radix_tree_iter iter; 1190 void **slot; 1191 unsigned int ret = 0; 1192 1193 if (unlikely(!max_items)) 1194 return 0; 1195 1196 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1197 results[ret] = rcu_dereference_raw(*slot); 1198 if (!results[ret]) 1199 continue; 1200 if (radix_tree_is_indirect_ptr(results[ret])) { 1201 slot = radix_tree_iter_retry(&iter); 1202 continue; 1203 } 1204 if (++ret == max_items) 1205 break; 1206 } 1207 1208 return ret; 1209 } 1210 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1211 1212 /** 1213 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1214 * radix tree based on a tag 1215 * @root: radix tree root 1216 * @results: where the results of the lookup are placed 1217 * @first_index: start the lookup from this key 1218 * @max_items: place up to this many items at *results 1219 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1220 * 1221 * Performs an index-ascending scan of the tree for present items which 1222 * have the tag indexed by @tag set. Places the slots at *@results and 1223 * returns the number of slots which were placed at *@results. 1224 */ 1225 unsigned int 1226 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 1227 unsigned long first_index, unsigned int max_items, 1228 unsigned int tag) 1229 { 1230 struct radix_tree_iter iter; 1231 void **slot; 1232 unsigned int ret = 0; 1233 1234 if (unlikely(!max_items)) 1235 return 0; 1236 1237 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1238 results[ret] = slot; 1239 if (++ret == max_items) 1240 break; 1241 } 1242 1243 return ret; 1244 } 1245 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1246 1247 #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) 1248 #include <linux/sched.h> /* for cond_resched() */ 1249 1250 /* 1251 * This linear search is at present only useful to shmem_unuse_inode(). 1252 */ 1253 static unsigned long __locate(struct radix_tree_node *slot, void *item, 1254 unsigned long index, unsigned long *found_index) 1255 { 1256 unsigned int shift, height; 1257 unsigned long i; 1258 1259 height = slot->path & RADIX_TREE_HEIGHT_MASK; 1260 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 1261 1262 for ( ; height > 1; height--) { 1263 i = (index >> shift) & RADIX_TREE_MAP_MASK; 1264 for (;;) { 1265 if (slot->slots[i] != NULL) 1266 break; 1267 index &= ~((1UL << shift) - 1); 1268 index += 1UL << shift; 1269 if (index == 0) 1270 goto out; /* 32-bit wraparound */ 1271 i++; 1272 if (i == RADIX_TREE_MAP_SIZE) 1273 goto out; 1274 } 1275 1276 slot = rcu_dereference_raw(slot->slots[i]); 1277 if (slot == NULL) 1278 goto out; 1279 if (!radix_tree_is_indirect_ptr(slot)) { 1280 if (slot == item) { 1281 *found_index = index + i; 1282 index = 0; 1283 } else { 1284 index += shift; 1285 } 1286 goto out; 1287 } 1288 slot = indirect_to_ptr(slot); 1289 shift -= RADIX_TREE_MAP_SHIFT; 1290 } 1291 1292 /* Bottom level: check items */ 1293 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 1294 if (slot->slots[i] == item) { 1295 *found_index = index + i; 1296 index = 0; 1297 goto out; 1298 } 1299 } 1300 index += RADIX_TREE_MAP_SIZE; 1301 out: 1302 return index; 1303 } 1304 1305 /** 1306 * radix_tree_locate_item - search through radix tree for item 1307 * @root: radix tree root 1308 * @item: item to be found 1309 * 1310 * Returns index where item was found, or -1 if not found. 1311 * Caller must hold no lock (since this time-consuming function needs 1312 * to be preemptible), and must check afterwards if item is still there. 1313 */ 1314 unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) 1315 { 1316 struct radix_tree_node *node; 1317 unsigned long max_index; 1318 unsigned long cur_index = 0; 1319 unsigned long found_index = -1; 1320 1321 do { 1322 rcu_read_lock(); 1323 node = rcu_dereference_raw(root->rnode); 1324 if (!radix_tree_is_indirect_ptr(node)) { 1325 rcu_read_unlock(); 1326 if (node == item) 1327 found_index = 0; 1328 break; 1329 } 1330 1331 node = indirect_to_ptr(node); 1332 max_index = radix_tree_maxindex(node->path & 1333 RADIX_TREE_HEIGHT_MASK); 1334 if (cur_index > max_index) { 1335 rcu_read_unlock(); 1336 break; 1337 } 1338 1339 cur_index = __locate(node, item, cur_index, &found_index); 1340 rcu_read_unlock(); 1341 cond_resched(); 1342 } while (cur_index != 0 && cur_index <= max_index); 1343 1344 return found_index; 1345 } 1346 #else 1347 unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) 1348 { 1349 return -1; 1350 } 1351 #endif /* CONFIG_SHMEM && CONFIG_SWAP */ 1352 1353 /** 1354 * radix_tree_shrink - shrink height of a radix tree to minimal 1355 * @root radix tree root 1356 */ 1357 static inline void radix_tree_shrink(struct radix_tree_root *root) 1358 { 1359 /* try to shrink tree height */ 1360 while (root->height > 0) { 1361 struct radix_tree_node *to_free = root->rnode; 1362 struct radix_tree_node *slot; 1363 1364 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1365 to_free = indirect_to_ptr(to_free); 1366 1367 /* 1368 * The candidate node has more than one child, or its child 1369 * is not at the leftmost slot, or it is a multiorder entry, 1370 * we cannot shrink. 1371 */ 1372 if (to_free->count != 1) 1373 break; 1374 slot = to_free->slots[0]; 1375 if (!slot) 1376 break; 1377 1378 /* 1379 * We don't need rcu_assign_pointer(), since we are simply 1380 * moving the node from one part of the tree to another: if it 1381 * was safe to dereference the old pointer to it 1382 * (to_free->slots[0]), it will be safe to dereference the new 1383 * one (root->rnode) as far as dependent read barriers go. 1384 */ 1385 if (root->height > 1) { 1386 if (!radix_tree_is_indirect_ptr(slot)) 1387 break; 1388 1389 slot = indirect_to_ptr(slot); 1390 slot->parent = NULL; 1391 slot = ptr_to_indirect(slot); 1392 } 1393 root->rnode = slot; 1394 root->height--; 1395 1396 /* 1397 * We have a dilemma here. The node's slot[0] must not be 1398 * NULLed in case there are concurrent lookups expecting to 1399 * find the item. However if this was a bottom-level node, 1400 * then it may be subject to the slot pointer being visible 1401 * to callers dereferencing it. If item corresponding to 1402 * slot[0] is subsequently deleted, these callers would expect 1403 * their slot to become empty sooner or later. 1404 * 1405 * For example, lockless pagecache will look up a slot, deref 1406 * the page pointer, and if the page is 0 refcount it means it 1407 * was concurrently deleted from pagecache so try the deref 1408 * again. Fortunately there is already a requirement for logic 1409 * to retry the entire slot lookup -- the indirect pointer 1410 * problem (replacing direct root node with an indirect pointer 1411 * also results in a stale slot). So tag the slot as indirect 1412 * to force callers to retry. 1413 */ 1414 if (root->height == 0) 1415 *((unsigned long *)&to_free->slots[0]) |= 1416 RADIX_TREE_INDIRECT_PTR; 1417 1418 radix_tree_node_free(to_free); 1419 } 1420 } 1421 1422 /** 1423 * __radix_tree_delete_node - try to free node after clearing a slot 1424 * @root: radix tree root 1425 * @node: node containing @index 1426 * 1427 * After clearing the slot at @index in @node from radix tree 1428 * rooted at @root, call this function to attempt freeing the 1429 * node and shrinking the tree. 1430 * 1431 * Returns %true if @node was freed, %false otherwise. 1432 */ 1433 bool __radix_tree_delete_node(struct radix_tree_root *root, 1434 struct radix_tree_node *node) 1435 { 1436 bool deleted = false; 1437 1438 do { 1439 struct radix_tree_node *parent; 1440 1441 if (node->count) { 1442 if (node == indirect_to_ptr(root->rnode)) { 1443 radix_tree_shrink(root); 1444 if (root->height == 0) 1445 deleted = true; 1446 } 1447 return deleted; 1448 } 1449 1450 parent = node->parent; 1451 if (parent) { 1452 unsigned int offset; 1453 1454 offset = node->path >> RADIX_TREE_HEIGHT_SHIFT; 1455 parent->slots[offset] = NULL; 1456 parent->count--; 1457 } else { 1458 root_tag_clear_all(root); 1459 root->height = 0; 1460 root->rnode = NULL; 1461 } 1462 1463 radix_tree_node_free(node); 1464 deleted = true; 1465 1466 node = parent; 1467 } while (node); 1468 1469 return deleted; 1470 } 1471 1472 /** 1473 * radix_tree_delete_item - delete an item from a radix tree 1474 * @root: radix tree root 1475 * @index: index key 1476 * @item: expected item 1477 * 1478 * Remove @item at @index from the radix tree rooted at @root. 1479 * 1480 * Returns the address of the deleted item, or NULL if it was not present 1481 * or the entry at the given @index was not @item. 1482 */ 1483 void *radix_tree_delete_item(struct radix_tree_root *root, 1484 unsigned long index, void *item) 1485 { 1486 struct radix_tree_node *node; 1487 unsigned int offset, i; 1488 void **slot; 1489 void *entry; 1490 int tag; 1491 1492 entry = __radix_tree_lookup(root, index, &node, &slot); 1493 if (!entry) 1494 return NULL; 1495 1496 if (item && entry != item) 1497 return NULL; 1498 1499 if (!node) { 1500 root_tag_clear_all(root); 1501 root->rnode = NULL; 1502 return entry; 1503 } 1504 1505 offset = index & RADIX_TREE_MAP_MASK; 1506 1507 /* 1508 * Clear all tags associated with the item to be deleted. 1509 * This way of doing it would be inefficient, but seldom is any set. 1510 */ 1511 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1512 if (tag_get(node, tag, offset)) 1513 radix_tree_tag_clear(root, index, tag); 1514 } 1515 1516 /* Delete any sibling slots pointing to this slot */ 1517 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { 1518 if (node->slots[offset + i] != ptr_to_indirect(slot)) 1519 break; 1520 node->slots[offset + i] = NULL; 1521 node->count--; 1522 } 1523 node->slots[offset] = NULL; 1524 node->count--; 1525 1526 __radix_tree_delete_node(root, node); 1527 1528 return entry; 1529 } 1530 EXPORT_SYMBOL(radix_tree_delete_item); 1531 1532 /** 1533 * radix_tree_delete - delete an item from a radix tree 1534 * @root: radix tree root 1535 * @index: index key 1536 * 1537 * Remove the item at @index from the radix tree rooted at @root. 1538 * 1539 * Returns the address of the deleted item, or NULL if it was not present. 1540 */ 1541 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1542 { 1543 return radix_tree_delete_item(root, index, NULL); 1544 } 1545 EXPORT_SYMBOL(radix_tree_delete); 1546 1547 /** 1548 * radix_tree_tagged - test whether any items in the tree are tagged 1549 * @root: radix tree root 1550 * @tag: tag to test 1551 */ 1552 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1553 { 1554 return root_tag_get(root, tag); 1555 } 1556 EXPORT_SYMBOL(radix_tree_tagged); 1557 1558 static void 1559 radix_tree_node_ctor(void *arg) 1560 { 1561 struct radix_tree_node *node = arg; 1562 1563 memset(node, 0, sizeof(*node)); 1564 INIT_LIST_HEAD(&node->private_list); 1565 } 1566 1567 static __init unsigned long __maxindex(unsigned int height) 1568 { 1569 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1570 int shift = RADIX_TREE_INDEX_BITS - width; 1571 1572 if (shift < 0) 1573 return ~0UL; 1574 if (shift >= BITS_PER_LONG) 1575 return 0UL; 1576 return ~0UL >> shift; 1577 } 1578 1579 static __init void radix_tree_init_maxindex(void) 1580 { 1581 unsigned int i; 1582 1583 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1584 height_to_maxindex[i] = __maxindex(i); 1585 } 1586 1587 static int radix_tree_callback(struct notifier_block *nfb, 1588 unsigned long action, 1589 void *hcpu) 1590 { 1591 int cpu = (long)hcpu; 1592 struct radix_tree_preload *rtp; 1593 struct radix_tree_node *node; 1594 1595 /* Free per-cpu pool of perloaded nodes */ 1596 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1597 rtp = &per_cpu(radix_tree_preloads, cpu); 1598 while (rtp->nr) { 1599 node = rtp->nodes; 1600 rtp->nodes = node->private_data; 1601 kmem_cache_free(radix_tree_node_cachep, node); 1602 rtp->nr--; 1603 } 1604 } 1605 return NOTIFY_OK; 1606 } 1607 1608 void __init radix_tree_init(void) 1609 { 1610 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1611 sizeof(struct radix_tree_node), 0, 1612 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1613 radix_tree_node_ctor); 1614 radix_tree_init_maxindex(); 1615 hotcpu_notifier(radix_tree_callback, 0); 1616 } 1617