1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2001 Momchil Velikov 4 * Portions Copyright (C) 2001 Christoph Hellwig 5 * Copyright (C) 2005 SGI, Christoph Lameter 6 * Copyright (C) 2006 Nick Piggin 7 * Copyright (C) 2012 Konstantin Khlebnikov 8 * Copyright (C) 2016 Intel, Matthew Wilcox 9 * Copyright (C) 2016 Intel, Ross Zwisler 10 */ 11 12 #include <linux/bitmap.h> 13 #include <linux/bitops.h> 14 #include <linux/bug.h> 15 #include <linux/cpu.h> 16 #include <linux/errno.h> 17 #include <linux/export.h> 18 #include <linux/idr.h> 19 #include <linux/init.h> 20 #include <linux/kernel.h> 21 #include <linux/kmemleak.h> 22 #include <linux/percpu.h> 23 #include <linux/preempt.h> /* in_interrupt() */ 24 #include <linux/radix-tree.h> 25 #include <linux/rcupdate.h> 26 #include <linux/slab.h> 27 #include <linux/string.h> 28 #include <linux/xarray.h> 29 30 31 /* 32 * Radix tree node cache. 33 */ 34 struct kmem_cache *radix_tree_node_cachep; 35 36 /* 37 * The radix tree is variable-height, so an insert operation not only has 38 * to build the branch to its corresponding item, it also has to build the 39 * branch to existing items if the size has to be increased (by 40 * radix_tree_extend). 41 * 42 * The worst case is a zero height tree with just a single item at index 0, 43 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 44 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 45 * Hence: 46 */ 47 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 48 49 /* 50 * The IDR does not have to be as high as the radix tree since it uses 51 * signed integers, not unsigned longs. 52 */ 53 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 54 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 55 RADIX_TREE_MAP_SHIFT)) 56 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 57 58 /* 59 * The IDA is even shorter since it uses a bitmap at the last level. 60 */ 61 #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 62 #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 63 RADIX_TREE_MAP_SHIFT)) 64 #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 65 66 /* 67 * Per-cpu pool of preloaded nodes 68 */ 69 struct radix_tree_preload { 70 unsigned nr; 71 /* nodes->parent points to next preallocated node */ 72 struct radix_tree_node *nodes; 73 }; 74 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 75 76 static inline struct radix_tree_node *entry_to_node(void *ptr) 77 { 78 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 79 } 80 81 static inline void *node_to_entry(void *ptr) 82 { 83 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 84 } 85 86 #define RADIX_TREE_RETRY XA_RETRY_ENTRY 87 88 static inline unsigned long 89 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 90 { 91 return parent ? slot - parent->slots : 0; 92 } 93 94 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 95 struct radix_tree_node **nodep, unsigned long index) 96 { 97 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 98 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 99 100 *nodep = (void *)entry; 101 return offset; 102 } 103 104 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 105 { 106 return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 107 } 108 109 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 110 int offset) 111 { 112 __set_bit(offset, node->tags[tag]); 113 } 114 115 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 116 int offset) 117 { 118 __clear_bit(offset, node->tags[tag]); 119 } 120 121 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 122 int offset) 123 { 124 return test_bit(offset, node->tags[tag]); 125 } 126 127 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 128 { 129 root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 130 } 131 132 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 133 { 134 root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 135 } 136 137 static inline void root_tag_clear_all(struct radix_tree_root *root) 138 { 139 root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 140 } 141 142 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 143 { 144 return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 145 } 146 147 static inline unsigned root_tags_get(const struct radix_tree_root *root) 148 { 149 return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 150 } 151 152 static inline bool is_idr(const struct radix_tree_root *root) 153 { 154 return !!(root->xa_flags & ROOT_IS_IDR); 155 } 156 157 /* 158 * Returns 1 if any slot in the node has this tag set. 159 * Otherwise returns 0. 160 */ 161 static inline int any_tag_set(const struct radix_tree_node *node, 162 unsigned int tag) 163 { 164 unsigned idx; 165 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 166 if (node->tags[tag][idx]) 167 return 1; 168 } 169 return 0; 170 } 171 172 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 173 { 174 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 175 } 176 177 /** 178 * radix_tree_find_next_bit - find the next set bit in a memory region 179 * 180 * @addr: The address to base the search on 181 * @size: The bitmap size in bits 182 * @offset: The bitnumber to start searching at 183 * 184 * Unrollable variant of find_next_bit() for constant size arrays. 185 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 186 * Returns next bit offset, or size if nothing found. 187 */ 188 static __always_inline unsigned long 189 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 190 unsigned long offset) 191 { 192 const unsigned long *addr = node->tags[tag]; 193 194 if (offset < RADIX_TREE_MAP_SIZE) { 195 unsigned long tmp; 196 197 addr += offset / BITS_PER_LONG; 198 tmp = *addr >> (offset % BITS_PER_LONG); 199 if (tmp) 200 return __ffs(tmp) + offset; 201 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 202 while (offset < RADIX_TREE_MAP_SIZE) { 203 tmp = *++addr; 204 if (tmp) 205 return __ffs(tmp) + offset; 206 offset += BITS_PER_LONG; 207 } 208 } 209 return RADIX_TREE_MAP_SIZE; 210 } 211 212 static unsigned int iter_offset(const struct radix_tree_iter *iter) 213 { 214 return iter->index & RADIX_TREE_MAP_MASK; 215 } 216 217 /* 218 * The maximum index which can be stored in a radix tree 219 */ 220 static inline unsigned long shift_maxindex(unsigned int shift) 221 { 222 return (RADIX_TREE_MAP_SIZE << shift) - 1; 223 } 224 225 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 226 { 227 return shift_maxindex(node->shift); 228 } 229 230 static unsigned long next_index(unsigned long index, 231 const struct radix_tree_node *node, 232 unsigned long offset) 233 { 234 return (index & ~node_maxindex(node)) + (offset << node->shift); 235 } 236 237 /* 238 * This assumes that the caller has performed appropriate preallocation, and 239 * that the caller has pinned this thread of control to the current CPU. 240 */ 241 static struct radix_tree_node * 242 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 243 struct radix_tree_root *root, 244 unsigned int shift, unsigned int offset, 245 unsigned int count, unsigned int nr_values) 246 { 247 struct radix_tree_node *ret = NULL; 248 249 /* 250 * Preload code isn't irq safe and it doesn't make sense to use 251 * preloading during an interrupt anyway as all the allocations have 252 * to be atomic. So just do normal allocation when in interrupt. 253 */ 254 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 255 struct radix_tree_preload *rtp; 256 257 /* 258 * Even if the caller has preloaded, try to allocate from the 259 * cache first for the new node to get accounted to the memory 260 * cgroup. 261 */ 262 ret = kmem_cache_alloc(radix_tree_node_cachep, 263 gfp_mask | __GFP_NOWARN); 264 if (ret) 265 goto out; 266 267 /* 268 * Provided the caller has preloaded here, we will always 269 * succeed in getting a node here (and never reach 270 * kmem_cache_alloc) 271 */ 272 rtp = this_cpu_ptr(&radix_tree_preloads); 273 if (rtp->nr) { 274 ret = rtp->nodes; 275 rtp->nodes = ret->parent; 276 rtp->nr--; 277 } 278 /* 279 * Update the allocation stack trace as this is more useful 280 * for debugging. 281 */ 282 kmemleak_update_trace(ret); 283 goto out; 284 } 285 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 286 out: 287 BUG_ON(radix_tree_is_internal_node(ret)); 288 if (ret) { 289 ret->shift = shift; 290 ret->offset = offset; 291 ret->count = count; 292 ret->nr_values = nr_values; 293 ret->parent = parent; 294 ret->array = root; 295 } 296 return ret; 297 } 298 299 void radix_tree_node_rcu_free(struct rcu_head *head) 300 { 301 struct radix_tree_node *node = 302 container_of(head, struct radix_tree_node, rcu_head); 303 304 /* 305 * Must only free zeroed nodes into the slab. We can be left with 306 * non-NULL entries by radix_tree_free_nodes, so clear the entries 307 * and tags here. 308 */ 309 memset(node->slots, 0, sizeof(node->slots)); 310 memset(node->tags, 0, sizeof(node->tags)); 311 INIT_LIST_HEAD(&node->private_list); 312 313 kmem_cache_free(radix_tree_node_cachep, node); 314 } 315 316 static inline void 317 radix_tree_node_free(struct radix_tree_node *node) 318 { 319 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 320 } 321 322 /* 323 * Load up this CPU's radix_tree_node buffer with sufficient objects to 324 * ensure that the addition of a single element in the tree cannot fail. On 325 * success, return zero, with preemption disabled. On error, return -ENOMEM 326 * with preemption not disabled. 327 * 328 * To make use of this facility, the radix tree must be initialised without 329 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 330 */ 331 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 332 { 333 struct radix_tree_preload *rtp; 334 struct radix_tree_node *node; 335 int ret = -ENOMEM; 336 337 /* 338 * Nodes preloaded by one cgroup can be be used by another cgroup, so 339 * they should never be accounted to any particular memory cgroup. 340 */ 341 gfp_mask &= ~__GFP_ACCOUNT; 342 343 preempt_disable(); 344 rtp = this_cpu_ptr(&radix_tree_preloads); 345 while (rtp->nr < nr) { 346 preempt_enable(); 347 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 348 if (node == NULL) 349 goto out; 350 preempt_disable(); 351 rtp = this_cpu_ptr(&radix_tree_preloads); 352 if (rtp->nr < nr) { 353 node->parent = rtp->nodes; 354 rtp->nodes = node; 355 rtp->nr++; 356 } else { 357 kmem_cache_free(radix_tree_node_cachep, node); 358 } 359 } 360 ret = 0; 361 out: 362 return ret; 363 } 364 365 /* 366 * Load up this CPU's radix_tree_node buffer with sufficient objects to 367 * ensure that the addition of a single element in the tree cannot fail. On 368 * success, return zero, with preemption disabled. On error, return -ENOMEM 369 * with preemption not disabled. 370 * 371 * To make use of this facility, the radix tree must be initialised without 372 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 373 */ 374 int radix_tree_preload(gfp_t gfp_mask) 375 { 376 /* Warn on non-sensical use... */ 377 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 378 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 379 } 380 EXPORT_SYMBOL(radix_tree_preload); 381 382 /* 383 * The same as above function, except we don't guarantee preloading happens. 384 * We do it, if we decide it helps. On success, return zero with preemption 385 * disabled. On error, return -ENOMEM with preemption not disabled. 386 */ 387 int radix_tree_maybe_preload(gfp_t gfp_mask) 388 { 389 if (gfpflags_allow_blocking(gfp_mask)) 390 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 391 /* Preloading doesn't help anything with this gfp mask, skip it */ 392 preempt_disable(); 393 return 0; 394 } 395 EXPORT_SYMBOL(radix_tree_maybe_preload); 396 397 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 398 struct radix_tree_node **nodep, unsigned long *maxindex) 399 { 400 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 401 402 *nodep = node; 403 404 if (likely(radix_tree_is_internal_node(node))) { 405 node = entry_to_node(node); 406 *maxindex = node_maxindex(node); 407 return node->shift + RADIX_TREE_MAP_SHIFT; 408 } 409 410 *maxindex = 0; 411 return 0; 412 } 413 414 /* 415 * Extend a radix tree so it can store key @index. 416 */ 417 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 418 unsigned long index, unsigned int shift) 419 { 420 void *entry; 421 unsigned int maxshift; 422 int tag; 423 424 /* Figure out what the shift should be. */ 425 maxshift = shift; 426 while (index > shift_maxindex(maxshift)) 427 maxshift += RADIX_TREE_MAP_SHIFT; 428 429 entry = rcu_dereference_raw(root->xa_head); 430 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 431 goto out; 432 433 do { 434 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 435 root, shift, 0, 1, 0); 436 if (!node) 437 return -ENOMEM; 438 439 if (is_idr(root)) { 440 all_tag_set(node, IDR_FREE); 441 if (!root_tag_get(root, IDR_FREE)) { 442 tag_clear(node, IDR_FREE, 0); 443 root_tag_set(root, IDR_FREE); 444 } 445 } else { 446 /* Propagate the aggregated tag info to the new child */ 447 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 448 if (root_tag_get(root, tag)) 449 tag_set(node, tag, 0); 450 } 451 } 452 453 BUG_ON(shift > BITS_PER_LONG); 454 if (radix_tree_is_internal_node(entry)) { 455 entry_to_node(entry)->parent = node; 456 } else if (xa_is_value(entry)) { 457 /* Moving a value entry root->xa_head to a node */ 458 node->nr_values = 1; 459 } 460 /* 461 * entry was already in the radix tree, so we do not need 462 * rcu_assign_pointer here 463 */ 464 node->slots[0] = (void __rcu *)entry; 465 entry = node_to_entry(node); 466 rcu_assign_pointer(root->xa_head, entry); 467 shift += RADIX_TREE_MAP_SHIFT; 468 } while (shift <= maxshift); 469 out: 470 return maxshift + RADIX_TREE_MAP_SHIFT; 471 } 472 473 /** 474 * radix_tree_shrink - shrink radix tree to minimum height 475 * @root radix tree root 476 */ 477 static inline bool radix_tree_shrink(struct radix_tree_root *root) 478 { 479 bool shrunk = false; 480 481 for (;;) { 482 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 483 struct radix_tree_node *child; 484 485 if (!radix_tree_is_internal_node(node)) 486 break; 487 node = entry_to_node(node); 488 489 /* 490 * The candidate node has more than one child, or its child 491 * is not at the leftmost slot, we cannot shrink. 492 */ 493 if (node->count != 1) 494 break; 495 child = rcu_dereference_raw(node->slots[0]); 496 if (!child) 497 break; 498 499 /* 500 * For an IDR, we must not shrink entry 0 into the root in 501 * case somebody calls idr_replace() with a pointer that 502 * appears to be an internal entry 503 */ 504 if (!node->shift && is_idr(root)) 505 break; 506 507 if (radix_tree_is_internal_node(child)) 508 entry_to_node(child)->parent = NULL; 509 510 /* 511 * We don't need rcu_assign_pointer(), since we are simply 512 * moving the node from one part of the tree to another: if it 513 * was safe to dereference the old pointer to it 514 * (node->slots[0]), it will be safe to dereference the new 515 * one (root->xa_head) as far as dependent read barriers go. 516 */ 517 root->xa_head = (void __rcu *)child; 518 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 519 root_tag_clear(root, IDR_FREE); 520 521 /* 522 * We have a dilemma here. The node's slot[0] must not be 523 * NULLed in case there are concurrent lookups expecting to 524 * find the item. However if this was a bottom-level node, 525 * then it may be subject to the slot pointer being visible 526 * to callers dereferencing it. If item corresponding to 527 * slot[0] is subsequently deleted, these callers would expect 528 * their slot to become empty sooner or later. 529 * 530 * For example, lockless pagecache will look up a slot, deref 531 * the page pointer, and if the page has 0 refcount it means it 532 * was concurrently deleted from pagecache so try the deref 533 * again. Fortunately there is already a requirement for logic 534 * to retry the entire slot lookup -- the indirect pointer 535 * problem (replacing direct root node with an indirect pointer 536 * also results in a stale slot). So tag the slot as indirect 537 * to force callers to retry. 538 */ 539 node->count = 0; 540 if (!radix_tree_is_internal_node(child)) { 541 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 542 } 543 544 WARN_ON_ONCE(!list_empty(&node->private_list)); 545 radix_tree_node_free(node); 546 shrunk = true; 547 } 548 549 return shrunk; 550 } 551 552 static bool delete_node(struct radix_tree_root *root, 553 struct radix_tree_node *node) 554 { 555 bool deleted = false; 556 557 do { 558 struct radix_tree_node *parent; 559 560 if (node->count) { 561 if (node_to_entry(node) == 562 rcu_dereference_raw(root->xa_head)) 563 deleted |= radix_tree_shrink(root); 564 return deleted; 565 } 566 567 parent = node->parent; 568 if (parent) { 569 parent->slots[node->offset] = NULL; 570 parent->count--; 571 } else { 572 /* 573 * Shouldn't the tags already have all been cleared 574 * by the caller? 575 */ 576 if (!is_idr(root)) 577 root_tag_clear_all(root); 578 root->xa_head = NULL; 579 } 580 581 WARN_ON_ONCE(!list_empty(&node->private_list)); 582 radix_tree_node_free(node); 583 deleted = true; 584 585 node = parent; 586 } while (node); 587 588 return deleted; 589 } 590 591 /** 592 * __radix_tree_create - create a slot in a radix tree 593 * @root: radix tree root 594 * @index: index key 595 * @nodep: returns node 596 * @slotp: returns slot 597 * 598 * Create, if necessary, and return the node and slot for an item 599 * at position @index in the radix tree @root. 600 * 601 * Until there is more than one item in the tree, no nodes are 602 * allocated and @root->xa_head is used as a direct slot instead of 603 * pointing to a node, in which case *@nodep will be NULL. 604 * 605 * Returns -ENOMEM, or 0 for success. 606 */ 607 static int __radix_tree_create(struct radix_tree_root *root, 608 unsigned long index, struct radix_tree_node **nodep, 609 void __rcu ***slotp) 610 { 611 struct radix_tree_node *node = NULL, *child; 612 void __rcu **slot = (void __rcu **)&root->xa_head; 613 unsigned long maxindex; 614 unsigned int shift, offset = 0; 615 unsigned long max = index; 616 gfp_t gfp = root_gfp_mask(root); 617 618 shift = radix_tree_load_root(root, &child, &maxindex); 619 620 /* Make sure the tree is high enough. */ 621 if (max > maxindex) { 622 int error = radix_tree_extend(root, gfp, max, shift); 623 if (error < 0) 624 return error; 625 shift = error; 626 child = rcu_dereference_raw(root->xa_head); 627 } 628 629 while (shift > 0) { 630 shift -= RADIX_TREE_MAP_SHIFT; 631 if (child == NULL) { 632 /* Have to add a child node. */ 633 child = radix_tree_node_alloc(gfp, node, root, shift, 634 offset, 0, 0); 635 if (!child) 636 return -ENOMEM; 637 rcu_assign_pointer(*slot, node_to_entry(child)); 638 if (node) 639 node->count++; 640 } else if (!radix_tree_is_internal_node(child)) 641 break; 642 643 /* Go a level down */ 644 node = entry_to_node(child); 645 offset = radix_tree_descend(node, &child, index); 646 slot = &node->slots[offset]; 647 } 648 649 if (nodep) 650 *nodep = node; 651 if (slotp) 652 *slotp = slot; 653 return 0; 654 } 655 656 /* 657 * Free any nodes below this node. The tree is presumed to not need 658 * shrinking, and any user data in the tree is presumed to not need a 659 * destructor called on it. If we need to add a destructor, we can 660 * add that functionality later. Note that we may not clear tags or 661 * slots from the tree as an RCU walker may still have a pointer into 662 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 663 * but we'll still have to clear those in rcu_free. 664 */ 665 static void radix_tree_free_nodes(struct radix_tree_node *node) 666 { 667 unsigned offset = 0; 668 struct radix_tree_node *child = entry_to_node(node); 669 670 for (;;) { 671 void *entry = rcu_dereference_raw(child->slots[offset]); 672 if (xa_is_node(entry) && child->shift) { 673 child = entry_to_node(entry); 674 offset = 0; 675 continue; 676 } 677 offset++; 678 while (offset == RADIX_TREE_MAP_SIZE) { 679 struct radix_tree_node *old = child; 680 offset = child->offset + 1; 681 child = child->parent; 682 WARN_ON_ONCE(!list_empty(&old->private_list)); 683 radix_tree_node_free(old); 684 if (old == entry_to_node(node)) 685 return; 686 } 687 } 688 } 689 690 static inline int insert_entries(struct radix_tree_node *node, 691 void __rcu **slot, void *item, bool replace) 692 { 693 if (*slot) 694 return -EEXIST; 695 rcu_assign_pointer(*slot, item); 696 if (node) { 697 node->count++; 698 if (xa_is_value(item)) 699 node->nr_values++; 700 } 701 return 1; 702 } 703 704 /** 705 * __radix_tree_insert - insert into a radix tree 706 * @root: radix tree root 707 * @index: index key 708 * @item: item to insert 709 * 710 * Insert an item into the radix tree at position @index. 711 */ 712 int radix_tree_insert(struct radix_tree_root *root, unsigned long index, 713 void *item) 714 { 715 struct radix_tree_node *node; 716 void __rcu **slot; 717 int error; 718 719 BUG_ON(radix_tree_is_internal_node(item)); 720 721 error = __radix_tree_create(root, index, &node, &slot); 722 if (error) 723 return error; 724 725 error = insert_entries(node, slot, item, false); 726 if (error < 0) 727 return error; 728 729 if (node) { 730 unsigned offset = get_slot_offset(node, slot); 731 BUG_ON(tag_get(node, 0, offset)); 732 BUG_ON(tag_get(node, 1, offset)); 733 BUG_ON(tag_get(node, 2, offset)); 734 } else { 735 BUG_ON(root_tags_get(root)); 736 } 737 738 return 0; 739 } 740 EXPORT_SYMBOL(radix_tree_insert); 741 742 /** 743 * __radix_tree_lookup - lookup an item in a radix tree 744 * @root: radix tree root 745 * @index: index key 746 * @nodep: returns node 747 * @slotp: returns slot 748 * 749 * Lookup and return the item at position @index in the radix 750 * tree @root. 751 * 752 * Until there is more than one item in the tree, no nodes are 753 * allocated and @root->xa_head is used as a direct slot instead of 754 * pointing to a node, in which case *@nodep will be NULL. 755 */ 756 void *__radix_tree_lookup(const struct radix_tree_root *root, 757 unsigned long index, struct radix_tree_node **nodep, 758 void __rcu ***slotp) 759 { 760 struct radix_tree_node *node, *parent; 761 unsigned long maxindex; 762 void __rcu **slot; 763 764 restart: 765 parent = NULL; 766 slot = (void __rcu **)&root->xa_head; 767 radix_tree_load_root(root, &node, &maxindex); 768 if (index > maxindex) 769 return NULL; 770 771 while (radix_tree_is_internal_node(node)) { 772 unsigned offset; 773 774 parent = entry_to_node(node); 775 offset = radix_tree_descend(parent, &node, index); 776 slot = parent->slots + offset; 777 if (node == RADIX_TREE_RETRY) 778 goto restart; 779 if (parent->shift == 0) 780 break; 781 } 782 783 if (nodep) 784 *nodep = parent; 785 if (slotp) 786 *slotp = slot; 787 return node; 788 } 789 790 /** 791 * radix_tree_lookup_slot - lookup a slot in a radix tree 792 * @root: radix tree root 793 * @index: index key 794 * 795 * Returns: the slot corresponding to the position @index in the 796 * radix tree @root. This is useful for update-if-exists operations. 797 * 798 * This function can be called under rcu_read_lock iff the slot is not 799 * modified by radix_tree_replace_slot, otherwise it must be called 800 * exclusive from other writers. Any dereference of the slot must be done 801 * using radix_tree_deref_slot. 802 */ 803 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 804 unsigned long index) 805 { 806 void __rcu **slot; 807 808 if (!__radix_tree_lookup(root, index, NULL, &slot)) 809 return NULL; 810 return slot; 811 } 812 EXPORT_SYMBOL(radix_tree_lookup_slot); 813 814 /** 815 * radix_tree_lookup - perform lookup operation on a radix tree 816 * @root: radix tree root 817 * @index: index key 818 * 819 * Lookup the item at the position @index in the radix tree @root. 820 * 821 * This function can be called under rcu_read_lock, however the caller 822 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 823 * them safely). No RCU barriers are required to access or modify the 824 * returned item, however. 825 */ 826 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 827 { 828 return __radix_tree_lookup(root, index, NULL, NULL); 829 } 830 EXPORT_SYMBOL(radix_tree_lookup); 831 832 static void replace_slot(void __rcu **slot, void *item, 833 struct radix_tree_node *node, int count, int values) 834 { 835 if (node && (count || values)) { 836 node->count += count; 837 node->nr_values += values; 838 } 839 840 rcu_assign_pointer(*slot, item); 841 } 842 843 static bool node_tag_get(const struct radix_tree_root *root, 844 const struct radix_tree_node *node, 845 unsigned int tag, unsigned int offset) 846 { 847 if (node) 848 return tag_get(node, tag, offset); 849 return root_tag_get(root, tag); 850 } 851 852 /* 853 * IDR users want to be able to store NULL in the tree, so if the slot isn't 854 * free, don't adjust the count, even if it's transitioning between NULL and 855 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 856 * have empty bits, but it only stores NULL in slots when they're being 857 * deleted. 858 */ 859 static int calculate_count(struct radix_tree_root *root, 860 struct radix_tree_node *node, void __rcu **slot, 861 void *item, void *old) 862 { 863 if (is_idr(root)) { 864 unsigned offset = get_slot_offset(node, slot); 865 bool free = node_tag_get(root, node, IDR_FREE, offset); 866 if (!free) 867 return 0; 868 if (!old) 869 return 1; 870 } 871 return !!item - !!old; 872 } 873 874 /** 875 * __radix_tree_replace - replace item in a slot 876 * @root: radix tree root 877 * @node: pointer to tree node 878 * @slot: pointer to slot in @node 879 * @item: new item to store in the slot. 880 * 881 * For use with __radix_tree_lookup(). Caller must hold tree write locked 882 * across slot lookup and replacement. 883 */ 884 void __radix_tree_replace(struct radix_tree_root *root, 885 struct radix_tree_node *node, 886 void __rcu **slot, void *item) 887 { 888 void *old = rcu_dereference_raw(*slot); 889 int values = !!xa_is_value(item) - !!xa_is_value(old); 890 int count = calculate_count(root, node, slot, item, old); 891 892 /* 893 * This function supports replacing value entries and 894 * deleting entries, but that needs accounting against the 895 * node unless the slot is root->xa_head. 896 */ 897 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 898 (count || values)); 899 replace_slot(slot, item, node, count, values); 900 901 if (!node) 902 return; 903 904 delete_node(root, node); 905 } 906 907 /** 908 * radix_tree_replace_slot - replace item in a slot 909 * @root: radix tree root 910 * @slot: pointer to slot 911 * @item: new item to store in the slot. 912 * 913 * For use with radix_tree_lookup_slot() and 914 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 915 * across slot lookup and replacement. 916 * 917 * NOTE: This cannot be used to switch between non-entries (empty slots), 918 * regular entries, and value entries, as that requires accounting 919 * inside the radix tree node. When switching from one type of entry or 920 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 921 * radix_tree_iter_replace(). 922 */ 923 void radix_tree_replace_slot(struct radix_tree_root *root, 924 void __rcu **slot, void *item) 925 { 926 __radix_tree_replace(root, NULL, slot, item); 927 } 928 EXPORT_SYMBOL(radix_tree_replace_slot); 929 930 /** 931 * radix_tree_iter_replace - replace item in a slot 932 * @root: radix tree root 933 * @slot: pointer to slot 934 * @item: new item to store in the slot. 935 * 936 * For use with radix_tree_for_each_slot(). 937 * Caller must hold tree write locked. 938 */ 939 void radix_tree_iter_replace(struct radix_tree_root *root, 940 const struct radix_tree_iter *iter, 941 void __rcu **slot, void *item) 942 { 943 __radix_tree_replace(root, iter->node, slot, item); 944 } 945 946 static void node_tag_set(struct radix_tree_root *root, 947 struct radix_tree_node *node, 948 unsigned int tag, unsigned int offset) 949 { 950 while (node) { 951 if (tag_get(node, tag, offset)) 952 return; 953 tag_set(node, tag, offset); 954 offset = node->offset; 955 node = node->parent; 956 } 957 958 if (!root_tag_get(root, tag)) 959 root_tag_set(root, tag); 960 } 961 962 /** 963 * radix_tree_tag_set - set a tag on a radix tree node 964 * @root: radix tree root 965 * @index: index key 966 * @tag: tag index 967 * 968 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 969 * corresponding to @index in the radix tree. From 970 * the root all the way down to the leaf node. 971 * 972 * Returns the address of the tagged item. Setting a tag on a not-present 973 * item is a bug. 974 */ 975 void *radix_tree_tag_set(struct radix_tree_root *root, 976 unsigned long index, unsigned int tag) 977 { 978 struct radix_tree_node *node, *parent; 979 unsigned long maxindex; 980 981 radix_tree_load_root(root, &node, &maxindex); 982 BUG_ON(index > maxindex); 983 984 while (radix_tree_is_internal_node(node)) { 985 unsigned offset; 986 987 parent = entry_to_node(node); 988 offset = radix_tree_descend(parent, &node, index); 989 BUG_ON(!node); 990 991 if (!tag_get(parent, tag, offset)) 992 tag_set(parent, tag, offset); 993 } 994 995 /* set the root's tag bit */ 996 if (!root_tag_get(root, tag)) 997 root_tag_set(root, tag); 998 999 return node; 1000 } 1001 EXPORT_SYMBOL(radix_tree_tag_set); 1002 1003 static void node_tag_clear(struct radix_tree_root *root, 1004 struct radix_tree_node *node, 1005 unsigned int tag, unsigned int offset) 1006 { 1007 while (node) { 1008 if (!tag_get(node, tag, offset)) 1009 return; 1010 tag_clear(node, tag, offset); 1011 if (any_tag_set(node, tag)) 1012 return; 1013 1014 offset = node->offset; 1015 node = node->parent; 1016 } 1017 1018 /* clear the root's tag bit */ 1019 if (root_tag_get(root, tag)) 1020 root_tag_clear(root, tag); 1021 } 1022 1023 /** 1024 * radix_tree_tag_clear - clear a tag on a radix tree node 1025 * @root: radix tree root 1026 * @index: index key 1027 * @tag: tag index 1028 * 1029 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1030 * corresponding to @index in the radix tree. If this causes 1031 * the leaf node to have no tags set then clear the tag in the 1032 * next-to-leaf node, etc. 1033 * 1034 * Returns the address of the tagged item on success, else NULL. ie: 1035 * has the same return value and semantics as radix_tree_lookup(). 1036 */ 1037 void *radix_tree_tag_clear(struct radix_tree_root *root, 1038 unsigned long index, unsigned int tag) 1039 { 1040 struct radix_tree_node *node, *parent; 1041 unsigned long maxindex; 1042 int uninitialized_var(offset); 1043 1044 radix_tree_load_root(root, &node, &maxindex); 1045 if (index > maxindex) 1046 return NULL; 1047 1048 parent = NULL; 1049 1050 while (radix_tree_is_internal_node(node)) { 1051 parent = entry_to_node(node); 1052 offset = radix_tree_descend(parent, &node, index); 1053 } 1054 1055 if (node) 1056 node_tag_clear(root, parent, tag, offset); 1057 1058 return node; 1059 } 1060 EXPORT_SYMBOL(radix_tree_tag_clear); 1061 1062 /** 1063 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1064 * @root: radix tree root 1065 * @iter: iterator state 1066 * @tag: tag to clear 1067 */ 1068 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1069 const struct radix_tree_iter *iter, unsigned int tag) 1070 { 1071 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1072 } 1073 1074 /** 1075 * radix_tree_tag_get - get a tag on a radix tree node 1076 * @root: radix tree root 1077 * @index: index key 1078 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1079 * 1080 * Return values: 1081 * 1082 * 0: tag not present or not set 1083 * 1: tag set 1084 * 1085 * Note that the return value of this function may not be relied on, even if 1086 * the RCU lock is held, unless tag modification and node deletion are excluded 1087 * from concurrency. 1088 */ 1089 int radix_tree_tag_get(const struct radix_tree_root *root, 1090 unsigned long index, unsigned int tag) 1091 { 1092 struct radix_tree_node *node, *parent; 1093 unsigned long maxindex; 1094 1095 if (!root_tag_get(root, tag)) 1096 return 0; 1097 1098 radix_tree_load_root(root, &node, &maxindex); 1099 if (index > maxindex) 1100 return 0; 1101 1102 while (radix_tree_is_internal_node(node)) { 1103 unsigned offset; 1104 1105 parent = entry_to_node(node); 1106 offset = radix_tree_descend(parent, &node, index); 1107 1108 if (!tag_get(parent, tag, offset)) 1109 return 0; 1110 if (node == RADIX_TREE_RETRY) 1111 break; 1112 } 1113 1114 return 1; 1115 } 1116 EXPORT_SYMBOL(radix_tree_tag_get); 1117 1118 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1119 static void set_iter_tags(struct radix_tree_iter *iter, 1120 struct radix_tree_node *node, unsigned offset, 1121 unsigned tag) 1122 { 1123 unsigned tag_long = offset / BITS_PER_LONG; 1124 unsigned tag_bit = offset % BITS_PER_LONG; 1125 1126 if (!node) { 1127 iter->tags = 1; 1128 return; 1129 } 1130 1131 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1132 1133 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1134 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1135 /* Pick tags from next element */ 1136 if (tag_bit) 1137 iter->tags |= node->tags[tag][tag_long + 1] << 1138 (BITS_PER_LONG - tag_bit); 1139 /* Clip chunk size, here only BITS_PER_LONG tags */ 1140 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1141 } 1142 } 1143 1144 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1145 struct radix_tree_iter *iter) 1146 { 1147 slot++; 1148 iter->index = __radix_tree_iter_add(iter, 1); 1149 iter->next_index = iter->index; 1150 iter->tags = 0; 1151 return NULL; 1152 } 1153 EXPORT_SYMBOL(radix_tree_iter_resume); 1154 1155 /** 1156 * radix_tree_next_chunk - find next chunk of slots for iteration 1157 * 1158 * @root: radix tree root 1159 * @iter: iterator state 1160 * @flags: RADIX_TREE_ITER_* flags and tag index 1161 * Returns: pointer to chunk first slot, or NULL if iteration is over 1162 */ 1163 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1164 struct radix_tree_iter *iter, unsigned flags) 1165 { 1166 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1167 struct radix_tree_node *node, *child; 1168 unsigned long index, offset, maxindex; 1169 1170 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1171 return NULL; 1172 1173 /* 1174 * Catch next_index overflow after ~0UL. iter->index never overflows 1175 * during iterating; it can be zero only at the beginning. 1176 * And we cannot overflow iter->next_index in a single step, 1177 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1178 * 1179 * This condition also used by radix_tree_next_slot() to stop 1180 * contiguous iterating, and forbid switching to the next chunk. 1181 */ 1182 index = iter->next_index; 1183 if (!index && iter->index) 1184 return NULL; 1185 1186 restart: 1187 radix_tree_load_root(root, &child, &maxindex); 1188 if (index > maxindex) 1189 return NULL; 1190 if (!child) 1191 return NULL; 1192 1193 if (!radix_tree_is_internal_node(child)) { 1194 /* Single-slot tree */ 1195 iter->index = index; 1196 iter->next_index = maxindex + 1; 1197 iter->tags = 1; 1198 iter->node = NULL; 1199 return (void __rcu **)&root->xa_head; 1200 } 1201 1202 do { 1203 node = entry_to_node(child); 1204 offset = radix_tree_descend(node, &child, index); 1205 1206 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1207 !tag_get(node, tag, offset) : !child) { 1208 /* Hole detected */ 1209 if (flags & RADIX_TREE_ITER_CONTIG) 1210 return NULL; 1211 1212 if (flags & RADIX_TREE_ITER_TAGGED) 1213 offset = radix_tree_find_next_bit(node, tag, 1214 offset + 1); 1215 else 1216 while (++offset < RADIX_TREE_MAP_SIZE) { 1217 void *slot = rcu_dereference_raw( 1218 node->slots[offset]); 1219 if (slot) 1220 break; 1221 } 1222 index &= ~node_maxindex(node); 1223 index += offset << node->shift; 1224 /* Overflow after ~0UL */ 1225 if (!index) 1226 return NULL; 1227 if (offset == RADIX_TREE_MAP_SIZE) 1228 goto restart; 1229 child = rcu_dereference_raw(node->slots[offset]); 1230 } 1231 1232 if (!child) 1233 goto restart; 1234 if (child == RADIX_TREE_RETRY) 1235 break; 1236 } while (node->shift && radix_tree_is_internal_node(child)); 1237 1238 /* Update the iterator state */ 1239 iter->index = (index &~ node_maxindex(node)) | offset; 1240 iter->next_index = (index | node_maxindex(node)) + 1; 1241 iter->node = node; 1242 1243 if (flags & RADIX_TREE_ITER_TAGGED) 1244 set_iter_tags(iter, node, offset, tag); 1245 1246 return node->slots + offset; 1247 } 1248 EXPORT_SYMBOL(radix_tree_next_chunk); 1249 1250 /** 1251 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1252 * @root: radix tree root 1253 * @results: where the results of the lookup are placed 1254 * @first_index: start the lookup from this key 1255 * @max_items: place up to this many items at *results 1256 * 1257 * Performs an index-ascending scan of the tree for present items. Places 1258 * them at *@results and returns the number of items which were placed at 1259 * *@results. 1260 * 1261 * The implementation is naive. 1262 * 1263 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1264 * rcu_read_lock. In this case, rather than the returned results being 1265 * an atomic snapshot of the tree at a single point in time, the 1266 * semantics of an RCU protected gang lookup are as though multiple 1267 * radix_tree_lookups have been issued in individual locks, and results 1268 * stored in 'results'. 1269 */ 1270 unsigned int 1271 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1272 unsigned long first_index, unsigned int max_items) 1273 { 1274 struct radix_tree_iter iter; 1275 void __rcu **slot; 1276 unsigned int ret = 0; 1277 1278 if (unlikely(!max_items)) 1279 return 0; 1280 1281 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1282 results[ret] = rcu_dereference_raw(*slot); 1283 if (!results[ret]) 1284 continue; 1285 if (radix_tree_is_internal_node(results[ret])) { 1286 slot = radix_tree_iter_retry(&iter); 1287 continue; 1288 } 1289 if (++ret == max_items) 1290 break; 1291 } 1292 1293 return ret; 1294 } 1295 EXPORT_SYMBOL(radix_tree_gang_lookup); 1296 1297 /** 1298 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1299 * based on a tag 1300 * @root: radix tree root 1301 * @results: where the results of the lookup are placed 1302 * @first_index: start the lookup from this key 1303 * @max_items: place up to this many items at *results 1304 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1305 * 1306 * Performs an index-ascending scan of the tree for present items which 1307 * have the tag indexed by @tag set. Places the items at *@results and 1308 * returns the number of items which were placed at *@results. 1309 */ 1310 unsigned int 1311 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1312 unsigned long first_index, unsigned int max_items, 1313 unsigned int tag) 1314 { 1315 struct radix_tree_iter iter; 1316 void __rcu **slot; 1317 unsigned int ret = 0; 1318 1319 if (unlikely(!max_items)) 1320 return 0; 1321 1322 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1323 results[ret] = rcu_dereference_raw(*slot); 1324 if (!results[ret]) 1325 continue; 1326 if (radix_tree_is_internal_node(results[ret])) { 1327 slot = radix_tree_iter_retry(&iter); 1328 continue; 1329 } 1330 if (++ret == max_items) 1331 break; 1332 } 1333 1334 return ret; 1335 } 1336 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1337 1338 /** 1339 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1340 * radix tree based on a tag 1341 * @root: radix tree root 1342 * @results: where the results of the lookup are placed 1343 * @first_index: start the lookup from this key 1344 * @max_items: place up to this many items at *results 1345 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1346 * 1347 * Performs an index-ascending scan of the tree for present items which 1348 * have the tag indexed by @tag set. Places the slots at *@results and 1349 * returns the number of slots which were placed at *@results. 1350 */ 1351 unsigned int 1352 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1353 void __rcu ***results, unsigned long first_index, 1354 unsigned int max_items, unsigned int tag) 1355 { 1356 struct radix_tree_iter iter; 1357 void __rcu **slot; 1358 unsigned int ret = 0; 1359 1360 if (unlikely(!max_items)) 1361 return 0; 1362 1363 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1364 results[ret] = slot; 1365 if (++ret == max_items) 1366 break; 1367 } 1368 1369 return ret; 1370 } 1371 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1372 1373 static bool __radix_tree_delete(struct radix_tree_root *root, 1374 struct radix_tree_node *node, void __rcu **slot) 1375 { 1376 void *old = rcu_dereference_raw(*slot); 1377 int values = xa_is_value(old) ? -1 : 0; 1378 unsigned offset = get_slot_offset(node, slot); 1379 int tag; 1380 1381 if (is_idr(root)) 1382 node_tag_set(root, node, IDR_FREE, offset); 1383 else 1384 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1385 node_tag_clear(root, node, tag, offset); 1386 1387 replace_slot(slot, NULL, node, -1, values); 1388 return node && delete_node(root, node); 1389 } 1390 1391 /** 1392 * radix_tree_iter_delete - delete the entry at this iterator position 1393 * @root: radix tree root 1394 * @iter: iterator state 1395 * @slot: pointer to slot 1396 * 1397 * Delete the entry at the position currently pointed to by the iterator. 1398 * This may result in the current node being freed; if it is, the iterator 1399 * is advanced so that it will not reference the freed memory. This 1400 * function may be called without any locking if there are no other threads 1401 * which can access this tree. 1402 */ 1403 void radix_tree_iter_delete(struct radix_tree_root *root, 1404 struct radix_tree_iter *iter, void __rcu **slot) 1405 { 1406 if (__radix_tree_delete(root, iter->node, slot)) 1407 iter->index = iter->next_index; 1408 } 1409 EXPORT_SYMBOL(radix_tree_iter_delete); 1410 1411 /** 1412 * radix_tree_delete_item - delete an item from a radix tree 1413 * @root: radix tree root 1414 * @index: index key 1415 * @item: expected item 1416 * 1417 * Remove @item at @index from the radix tree rooted at @root. 1418 * 1419 * Return: the deleted entry, or %NULL if it was not present 1420 * or the entry at the given @index was not @item. 1421 */ 1422 void *radix_tree_delete_item(struct radix_tree_root *root, 1423 unsigned long index, void *item) 1424 { 1425 struct radix_tree_node *node = NULL; 1426 void __rcu **slot = NULL; 1427 void *entry; 1428 1429 entry = __radix_tree_lookup(root, index, &node, &slot); 1430 if (!slot) 1431 return NULL; 1432 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 1433 get_slot_offset(node, slot)))) 1434 return NULL; 1435 1436 if (item && entry != item) 1437 return NULL; 1438 1439 __radix_tree_delete(root, node, slot); 1440 1441 return entry; 1442 } 1443 EXPORT_SYMBOL(radix_tree_delete_item); 1444 1445 /** 1446 * radix_tree_delete - delete an entry from a radix tree 1447 * @root: radix tree root 1448 * @index: index key 1449 * 1450 * Remove the entry at @index from the radix tree rooted at @root. 1451 * 1452 * Return: The deleted entry, or %NULL if it was not present. 1453 */ 1454 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1455 { 1456 return radix_tree_delete_item(root, index, NULL); 1457 } 1458 EXPORT_SYMBOL(radix_tree_delete); 1459 1460 /** 1461 * radix_tree_tagged - test whether any items in the tree are tagged 1462 * @root: radix tree root 1463 * @tag: tag to test 1464 */ 1465 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 1466 { 1467 return root_tag_get(root, tag); 1468 } 1469 EXPORT_SYMBOL(radix_tree_tagged); 1470 1471 /** 1472 * idr_preload - preload for idr_alloc() 1473 * @gfp_mask: allocation mask to use for preloading 1474 * 1475 * Preallocate memory to use for the next call to idr_alloc(). This function 1476 * returns with preemption disabled. It will be enabled by idr_preload_end(). 1477 */ 1478 void idr_preload(gfp_t gfp_mask) 1479 { 1480 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1481 preempt_disable(); 1482 } 1483 EXPORT_SYMBOL(idr_preload); 1484 1485 void __rcu **idr_get_free(struct radix_tree_root *root, 1486 struct radix_tree_iter *iter, gfp_t gfp, 1487 unsigned long max) 1488 { 1489 struct radix_tree_node *node = NULL, *child; 1490 void __rcu **slot = (void __rcu **)&root->xa_head; 1491 unsigned long maxindex, start = iter->next_index; 1492 unsigned int shift, offset = 0; 1493 1494 grow: 1495 shift = radix_tree_load_root(root, &child, &maxindex); 1496 if (!radix_tree_tagged(root, IDR_FREE)) 1497 start = max(start, maxindex + 1); 1498 if (start > max) 1499 return ERR_PTR(-ENOSPC); 1500 1501 if (start > maxindex) { 1502 int error = radix_tree_extend(root, gfp, start, shift); 1503 if (error < 0) 1504 return ERR_PTR(error); 1505 shift = error; 1506 child = rcu_dereference_raw(root->xa_head); 1507 } 1508 if (start == 0 && shift == 0) 1509 shift = RADIX_TREE_MAP_SHIFT; 1510 1511 while (shift) { 1512 shift -= RADIX_TREE_MAP_SHIFT; 1513 if (child == NULL) { 1514 /* Have to add a child node. */ 1515 child = radix_tree_node_alloc(gfp, node, root, shift, 1516 offset, 0, 0); 1517 if (!child) 1518 return ERR_PTR(-ENOMEM); 1519 all_tag_set(child, IDR_FREE); 1520 rcu_assign_pointer(*slot, node_to_entry(child)); 1521 if (node) 1522 node->count++; 1523 } else if (!radix_tree_is_internal_node(child)) 1524 break; 1525 1526 node = entry_to_node(child); 1527 offset = radix_tree_descend(node, &child, start); 1528 if (!tag_get(node, IDR_FREE, offset)) { 1529 offset = radix_tree_find_next_bit(node, IDR_FREE, 1530 offset + 1); 1531 start = next_index(start, node, offset); 1532 if (start > max) 1533 return ERR_PTR(-ENOSPC); 1534 while (offset == RADIX_TREE_MAP_SIZE) { 1535 offset = node->offset + 1; 1536 node = node->parent; 1537 if (!node) 1538 goto grow; 1539 shift = node->shift; 1540 } 1541 child = rcu_dereference_raw(node->slots[offset]); 1542 } 1543 slot = &node->slots[offset]; 1544 } 1545 1546 iter->index = start; 1547 if (node) 1548 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 1549 else 1550 iter->next_index = 1; 1551 iter->node = node; 1552 set_iter_tags(iter, node, offset, IDR_FREE); 1553 1554 return slot; 1555 } 1556 1557 /** 1558 * idr_destroy - release all internal memory from an IDR 1559 * @idr: idr handle 1560 * 1561 * After this function is called, the IDR is empty, and may be reused or 1562 * the data structure containing it may be freed. 1563 * 1564 * A typical clean-up sequence for objects stored in an idr tree will use 1565 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 1566 * free the memory used to keep track of those objects. 1567 */ 1568 void idr_destroy(struct idr *idr) 1569 { 1570 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 1571 if (radix_tree_is_internal_node(node)) 1572 radix_tree_free_nodes(node); 1573 idr->idr_rt.xa_head = NULL; 1574 root_tag_set(&idr->idr_rt, IDR_FREE); 1575 } 1576 EXPORT_SYMBOL(idr_destroy); 1577 1578 static void 1579 radix_tree_node_ctor(void *arg) 1580 { 1581 struct radix_tree_node *node = arg; 1582 1583 memset(node, 0, sizeof(*node)); 1584 INIT_LIST_HEAD(&node->private_list); 1585 } 1586 1587 static int radix_tree_cpu_dead(unsigned int cpu) 1588 { 1589 struct radix_tree_preload *rtp; 1590 struct radix_tree_node *node; 1591 1592 /* Free per-cpu pool of preloaded nodes */ 1593 rtp = &per_cpu(radix_tree_preloads, cpu); 1594 while (rtp->nr) { 1595 node = rtp->nodes; 1596 rtp->nodes = node->parent; 1597 kmem_cache_free(radix_tree_node_cachep, node); 1598 rtp->nr--; 1599 } 1600 return 0; 1601 } 1602 1603 void __init radix_tree_init(void) 1604 { 1605 int ret; 1606 1607 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1608 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 1609 BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 1610 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1611 sizeof(struct radix_tree_node), 0, 1612 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1613 radix_tree_node_ctor); 1614 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1615 NULL, radix_tree_cpu_dead); 1616 WARN_ON(ret < 0); 1617 } 1618