1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2001 Momchil Velikov 4 * Portions Copyright (C) 2001 Christoph Hellwig 5 * Copyright (C) 2005 SGI, Christoph Lameter 6 * Copyright (C) 2006 Nick Piggin 7 * Copyright (C) 2012 Konstantin Khlebnikov 8 * Copyright (C) 2016 Intel, Matthew Wilcox 9 * Copyright (C) 2016 Intel, Ross Zwisler 10 */ 11 12 #include <linux/bitmap.h> 13 #include <linux/bitops.h> 14 #include <linux/bug.h> 15 #include <linux/cpu.h> 16 #include <linux/errno.h> 17 #include <linux/export.h> 18 #include <linux/idr.h> 19 #include <linux/init.h> 20 #include <linux/kernel.h> 21 #include <linux/kmemleak.h> 22 #include <linux/percpu.h> 23 #include <linux/local_lock.h> 24 #include <linux/preempt.h> /* in_interrupt() */ 25 #include <linux/radix-tree.h> 26 #include <linux/rcupdate.h> 27 #include <linux/slab.h> 28 #include <linux/string.h> 29 #include <linux/xarray.h> 30 31 /* 32 * Radix tree node cache. 33 */ 34 struct kmem_cache *radix_tree_node_cachep; 35 36 /* 37 * The radix tree is variable-height, so an insert operation not only has 38 * to build the branch to its corresponding item, it also has to build the 39 * branch to existing items if the size has to be increased (by 40 * radix_tree_extend). 41 * 42 * The worst case is a zero height tree with just a single item at index 0, 43 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 44 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 45 * Hence: 46 */ 47 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 48 49 /* 50 * The IDR does not have to be as high as the radix tree since it uses 51 * signed integers, not unsigned longs. 52 */ 53 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 54 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 55 RADIX_TREE_MAP_SHIFT)) 56 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 57 58 /* 59 * Per-cpu pool of preloaded nodes 60 */ 61 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 62 .lock = INIT_LOCAL_LOCK(lock), 63 }; 64 EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads); 65 66 static inline struct radix_tree_node *entry_to_node(void *ptr) 67 { 68 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 69 } 70 71 static inline void *node_to_entry(void *ptr) 72 { 73 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 74 } 75 76 #define RADIX_TREE_RETRY XA_RETRY_ENTRY 77 78 static inline unsigned long 79 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 80 { 81 return parent ? slot - parent->slots : 0; 82 } 83 84 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 85 struct radix_tree_node **nodep, unsigned long index) 86 { 87 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 88 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 89 90 *nodep = (void *)entry; 91 return offset; 92 } 93 94 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 95 { 96 return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 97 } 98 99 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 100 int offset) 101 { 102 __set_bit(offset, node->tags[tag]); 103 } 104 105 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 106 int offset) 107 { 108 __clear_bit(offset, node->tags[tag]); 109 } 110 111 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 112 int offset) 113 { 114 return test_bit(offset, node->tags[tag]); 115 } 116 117 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 118 { 119 root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 120 } 121 122 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 123 { 124 root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 125 } 126 127 static inline void root_tag_clear_all(struct radix_tree_root *root) 128 { 129 root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 130 } 131 132 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 133 { 134 return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 135 } 136 137 static inline unsigned root_tags_get(const struct radix_tree_root *root) 138 { 139 return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 140 } 141 142 static inline bool is_idr(const struct radix_tree_root *root) 143 { 144 return !!(root->xa_flags & ROOT_IS_IDR); 145 } 146 147 /* 148 * Returns 1 if any slot in the node has this tag set. 149 * Otherwise returns 0. 150 */ 151 static inline int any_tag_set(const struct radix_tree_node *node, 152 unsigned int tag) 153 { 154 unsigned idx; 155 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 156 if (node->tags[tag][idx]) 157 return 1; 158 } 159 return 0; 160 } 161 162 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 163 { 164 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 165 } 166 167 /** 168 * radix_tree_find_next_bit - find the next set bit in a memory region 169 * 170 * @addr: The address to base the search on 171 * @size: The bitmap size in bits 172 * @offset: The bitnumber to start searching at 173 * 174 * Unrollable variant of find_next_bit() for constant size arrays. 175 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 176 * Returns next bit offset, or size if nothing found. 177 */ 178 static __always_inline unsigned long 179 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 180 unsigned long offset) 181 { 182 const unsigned long *addr = node->tags[tag]; 183 184 if (offset < RADIX_TREE_MAP_SIZE) { 185 unsigned long tmp; 186 187 addr += offset / BITS_PER_LONG; 188 tmp = *addr >> (offset % BITS_PER_LONG); 189 if (tmp) 190 return __ffs(tmp) + offset; 191 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 192 while (offset < RADIX_TREE_MAP_SIZE) { 193 tmp = *++addr; 194 if (tmp) 195 return __ffs(tmp) + offset; 196 offset += BITS_PER_LONG; 197 } 198 } 199 return RADIX_TREE_MAP_SIZE; 200 } 201 202 static unsigned int iter_offset(const struct radix_tree_iter *iter) 203 { 204 return iter->index & RADIX_TREE_MAP_MASK; 205 } 206 207 /* 208 * The maximum index which can be stored in a radix tree 209 */ 210 static inline unsigned long shift_maxindex(unsigned int shift) 211 { 212 return (RADIX_TREE_MAP_SIZE << shift) - 1; 213 } 214 215 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 216 { 217 return shift_maxindex(node->shift); 218 } 219 220 static unsigned long next_index(unsigned long index, 221 const struct radix_tree_node *node, 222 unsigned long offset) 223 { 224 return (index & ~node_maxindex(node)) + (offset << node->shift); 225 } 226 227 /* 228 * This assumes that the caller has performed appropriate preallocation, and 229 * that the caller has pinned this thread of control to the current CPU. 230 */ 231 static struct radix_tree_node * 232 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 233 struct radix_tree_root *root, 234 unsigned int shift, unsigned int offset, 235 unsigned int count, unsigned int nr_values) 236 { 237 struct radix_tree_node *ret = NULL; 238 239 /* 240 * Preload code isn't irq safe and it doesn't make sense to use 241 * preloading during an interrupt anyway as all the allocations have 242 * to be atomic. So just do normal allocation when in interrupt. 243 */ 244 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 245 struct radix_tree_preload *rtp; 246 247 /* 248 * Even if the caller has preloaded, try to allocate from the 249 * cache first for the new node to get accounted to the memory 250 * cgroup. 251 */ 252 ret = kmem_cache_alloc(radix_tree_node_cachep, 253 gfp_mask | __GFP_NOWARN); 254 if (ret) 255 goto out; 256 257 /* 258 * Provided the caller has preloaded here, we will always 259 * succeed in getting a node here (and never reach 260 * kmem_cache_alloc) 261 */ 262 rtp = this_cpu_ptr(&radix_tree_preloads); 263 if (rtp->nr) { 264 ret = rtp->nodes; 265 rtp->nodes = ret->parent; 266 rtp->nr--; 267 } 268 /* 269 * Update the allocation stack trace as this is more useful 270 * for debugging. 271 */ 272 kmemleak_update_trace(ret); 273 goto out; 274 } 275 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 276 out: 277 BUG_ON(radix_tree_is_internal_node(ret)); 278 if (ret) { 279 ret->shift = shift; 280 ret->offset = offset; 281 ret->count = count; 282 ret->nr_values = nr_values; 283 ret->parent = parent; 284 ret->array = root; 285 } 286 return ret; 287 } 288 289 void radix_tree_node_rcu_free(struct rcu_head *head) 290 { 291 struct radix_tree_node *node = 292 container_of(head, struct radix_tree_node, rcu_head); 293 294 /* 295 * Must only free zeroed nodes into the slab. We can be left with 296 * non-NULL entries by radix_tree_free_nodes, so clear the entries 297 * and tags here. 298 */ 299 memset(node->slots, 0, sizeof(node->slots)); 300 memset(node->tags, 0, sizeof(node->tags)); 301 INIT_LIST_HEAD(&node->private_list); 302 303 kmem_cache_free(radix_tree_node_cachep, node); 304 } 305 306 static inline void 307 radix_tree_node_free(struct radix_tree_node *node) 308 { 309 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 310 } 311 312 /* 313 * Load up this CPU's radix_tree_node buffer with sufficient objects to 314 * ensure that the addition of a single element in the tree cannot fail. On 315 * success, return zero, with preemption disabled. On error, return -ENOMEM 316 * with preemption not disabled. 317 * 318 * To make use of this facility, the radix tree must be initialised without 319 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 320 */ 321 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 322 { 323 struct radix_tree_preload *rtp; 324 struct radix_tree_node *node; 325 int ret = -ENOMEM; 326 327 /* 328 * Nodes preloaded by one cgroup can be be used by another cgroup, so 329 * they should never be accounted to any particular memory cgroup. 330 */ 331 gfp_mask &= ~__GFP_ACCOUNT; 332 333 local_lock(&radix_tree_preloads.lock); 334 rtp = this_cpu_ptr(&radix_tree_preloads); 335 while (rtp->nr < nr) { 336 local_unlock(&radix_tree_preloads.lock); 337 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 338 if (node == NULL) 339 goto out; 340 local_lock(&radix_tree_preloads.lock); 341 rtp = this_cpu_ptr(&radix_tree_preloads); 342 if (rtp->nr < nr) { 343 node->parent = rtp->nodes; 344 rtp->nodes = node; 345 rtp->nr++; 346 } else { 347 kmem_cache_free(radix_tree_node_cachep, node); 348 } 349 } 350 ret = 0; 351 out: 352 return ret; 353 } 354 355 /* 356 * Load up this CPU's radix_tree_node buffer with sufficient objects to 357 * ensure that the addition of a single element in the tree cannot fail. On 358 * success, return zero, with preemption disabled. On error, return -ENOMEM 359 * with preemption not disabled. 360 * 361 * To make use of this facility, the radix tree must be initialised without 362 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 363 */ 364 int radix_tree_preload(gfp_t gfp_mask) 365 { 366 /* Warn on non-sensical use... */ 367 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 368 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 369 } 370 EXPORT_SYMBOL(radix_tree_preload); 371 372 /* 373 * The same as above function, except we don't guarantee preloading happens. 374 * We do it, if we decide it helps. On success, return zero with preemption 375 * disabled. On error, return -ENOMEM with preemption not disabled. 376 */ 377 int radix_tree_maybe_preload(gfp_t gfp_mask) 378 { 379 if (gfpflags_allow_blocking(gfp_mask)) 380 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 381 /* Preloading doesn't help anything with this gfp mask, skip it */ 382 local_lock(&radix_tree_preloads.lock); 383 return 0; 384 } 385 EXPORT_SYMBOL(radix_tree_maybe_preload); 386 387 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 388 struct radix_tree_node **nodep, unsigned long *maxindex) 389 { 390 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 391 392 *nodep = node; 393 394 if (likely(radix_tree_is_internal_node(node))) { 395 node = entry_to_node(node); 396 *maxindex = node_maxindex(node); 397 return node->shift + RADIX_TREE_MAP_SHIFT; 398 } 399 400 *maxindex = 0; 401 return 0; 402 } 403 404 /* 405 * Extend a radix tree so it can store key @index. 406 */ 407 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 408 unsigned long index, unsigned int shift) 409 { 410 void *entry; 411 unsigned int maxshift; 412 int tag; 413 414 /* Figure out what the shift should be. */ 415 maxshift = shift; 416 while (index > shift_maxindex(maxshift)) 417 maxshift += RADIX_TREE_MAP_SHIFT; 418 419 entry = rcu_dereference_raw(root->xa_head); 420 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 421 goto out; 422 423 do { 424 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 425 root, shift, 0, 1, 0); 426 if (!node) 427 return -ENOMEM; 428 429 if (is_idr(root)) { 430 all_tag_set(node, IDR_FREE); 431 if (!root_tag_get(root, IDR_FREE)) { 432 tag_clear(node, IDR_FREE, 0); 433 root_tag_set(root, IDR_FREE); 434 } 435 } else { 436 /* Propagate the aggregated tag info to the new child */ 437 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 438 if (root_tag_get(root, tag)) 439 tag_set(node, tag, 0); 440 } 441 } 442 443 BUG_ON(shift > BITS_PER_LONG); 444 if (radix_tree_is_internal_node(entry)) { 445 entry_to_node(entry)->parent = node; 446 } else if (xa_is_value(entry)) { 447 /* Moving a value entry root->xa_head to a node */ 448 node->nr_values = 1; 449 } 450 /* 451 * entry was already in the radix tree, so we do not need 452 * rcu_assign_pointer here 453 */ 454 node->slots[0] = (void __rcu *)entry; 455 entry = node_to_entry(node); 456 rcu_assign_pointer(root->xa_head, entry); 457 shift += RADIX_TREE_MAP_SHIFT; 458 } while (shift <= maxshift); 459 out: 460 return maxshift + RADIX_TREE_MAP_SHIFT; 461 } 462 463 /** 464 * radix_tree_shrink - shrink radix tree to minimum height 465 * @root radix tree root 466 */ 467 static inline bool radix_tree_shrink(struct radix_tree_root *root) 468 { 469 bool shrunk = false; 470 471 for (;;) { 472 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 473 struct radix_tree_node *child; 474 475 if (!radix_tree_is_internal_node(node)) 476 break; 477 node = entry_to_node(node); 478 479 /* 480 * The candidate node has more than one child, or its child 481 * is not at the leftmost slot, we cannot shrink. 482 */ 483 if (node->count != 1) 484 break; 485 child = rcu_dereference_raw(node->slots[0]); 486 if (!child) 487 break; 488 489 /* 490 * For an IDR, we must not shrink entry 0 into the root in 491 * case somebody calls idr_replace() with a pointer that 492 * appears to be an internal entry 493 */ 494 if (!node->shift && is_idr(root)) 495 break; 496 497 if (radix_tree_is_internal_node(child)) 498 entry_to_node(child)->parent = NULL; 499 500 /* 501 * We don't need rcu_assign_pointer(), since we are simply 502 * moving the node from one part of the tree to another: if it 503 * was safe to dereference the old pointer to it 504 * (node->slots[0]), it will be safe to dereference the new 505 * one (root->xa_head) as far as dependent read barriers go. 506 */ 507 root->xa_head = (void __rcu *)child; 508 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 509 root_tag_clear(root, IDR_FREE); 510 511 /* 512 * We have a dilemma here. The node's slot[0] must not be 513 * NULLed in case there are concurrent lookups expecting to 514 * find the item. However if this was a bottom-level node, 515 * then it may be subject to the slot pointer being visible 516 * to callers dereferencing it. If item corresponding to 517 * slot[0] is subsequently deleted, these callers would expect 518 * their slot to become empty sooner or later. 519 * 520 * For example, lockless pagecache will look up a slot, deref 521 * the page pointer, and if the page has 0 refcount it means it 522 * was concurrently deleted from pagecache so try the deref 523 * again. Fortunately there is already a requirement for logic 524 * to retry the entire slot lookup -- the indirect pointer 525 * problem (replacing direct root node with an indirect pointer 526 * also results in a stale slot). So tag the slot as indirect 527 * to force callers to retry. 528 */ 529 node->count = 0; 530 if (!radix_tree_is_internal_node(child)) { 531 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 532 } 533 534 WARN_ON_ONCE(!list_empty(&node->private_list)); 535 radix_tree_node_free(node); 536 shrunk = true; 537 } 538 539 return shrunk; 540 } 541 542 static bool delete_node(struct radix_tree_root *root, 543 struct radix_tree_node *node) 544 { 545 bool deleted = false; 546 547 do { 548 struct radix_tree_node *parent; 549 550 if (node->count) { 551 if (node_to_entry(node) == 552 rcu_dereference_raw(root->xa_head)) 553 deleted |= radix_tree_shrink(root); 554 return deleted; 555 } 556 557 parent = node->parent; 558 if (parent) { 559 parent->slots[node->offset] = NULL; 560 parent->count--; 561 } else { 562 /* 563 * Shouldn't the tags already have all been cleared 564 * by the caller? 565 */ 566 if (!is_idr(root)) 567 root_tag_clear_all(root); 568 root->xa_head = NULL; 569 } 570 571 WARN_ON_ONCE(!list_empty(&node->private_list)); 572 radix_tree_node_free(node); 573 deleted = true; 574 575 node = parent; 576 } while (node); 577 578 return deleted; 579 } 580 581 /** 582 * __radix_tree_create - create a slot in a radix tree 583 * @root: radix tree root 584 * @index: index key 585 * @nodep: returns node 586 * @slotp: returns slot 587 * 588 * Create, if necessary, and return the node and slot for an item 589 * at position @index in the radix tree @root. 590 * 591 * Until there is more than one item in the tree, no nodes are 592 * allocated and @root->xa_head is used as a direct slot instead of 593 * pointing to a node, in which case *@nodep will be NULL. 594 * 595 * Returns -ENOMEM, or 0 for success. 596 */ 597 static int __radix_tree_create(struct radix_tree_root *root, 598 unsigned long index, struct radix_tree_node **nodep, 599 void __rcu ***slotp) 600 { 601 struct radix_tree_node *node = NULL, *child; 602 void __rcu **slot = (void __rcu **)&root->xa_head; 603 unsigned long maxindex; 604 unsigned int shift, offset = 0; 605 unsigned long max = index; 606 gfp_t gfp = root_gfp_mask(root); 607 608 shift = radix_tree_load_root(root, &child, &maxindex); 609 610 /* Make sure the tree is high enough. */ 611 if (max > maxindex) { 612 int error = radix_tree_extend(root, gfp, max, shift); 613 if (error < 0) 614 return error; 615 shift = error; 616 child = rcu_dereference_raw(root->xa_head); 617 } 618 619 while (shift > 0) { 620 shift -= RADIX_TREE_MAP_SHIFT; 621 if (child == NULL) { 622 /* Have to add a child node. */ 623 child = radix_tree_node_alloc(gfp, node, root, shift, 624 offset, 0, 0); 625 if (!child) 626 return -ENOMEM; 627 rcu_assign_pointer(*slot, node_to_entry(child)); 628 if (node) 629 node->count++; 630 } else if (!radix_tree_is_internal_node(child)) 631 break; 632 633 /* Go a level down */ 634 node = entry_to_node(child); 635 offset = radix_tree_descend(node, &child, index); 636 slot = &node->slots[offset]; 637 } 638 639 if (nodep) 640 *nodep = node; 641 if (slotp) 642 *slotp = slot; 643 return 0; 644 } 645 646 /* 647 * Free any nodes below this node. The tree is presumed to not need 648 * shrinking, and any user data in the tree is presumed to not need a 649 * destructor called on it. If we need to add a destructor, we can 650 * add that functionality later. Note that we may not clear tags or 651 * slots from the tree as an RCU walker may still have a pointer into 652 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 653 * but we'll still have to clear those in rcu_free. 654 */ 655 static void radix_tree_free_nodes(struct radix_tree_node *node) 656 { 657 unsigned offset = 0; 658 struct radix_tree_node *child = entry_to_node(node); 659 660 for (;;) { 661 void *entry = rcu_dereference_raw(child->slots[offset]); 662 if (xa_is_node(entry) && child->shift) { 663 child = entry_to_node(entry); 664 offset = 0; 665 continue; 666 } 667 offset++; 668 while (offset == RADIX_TREE_MAP_SIZE) { 669 struct radix_tree_node *old = child; 670 offset = child->offset + 1; 671 child = child->parent; 672 WARN_ON_ONCE(!list_empty(&old->private_list)); 673 radix_tree_node_free(old); 674 if (old == entry_to_node(node)) 675 return; 676 } 677 } 678 } 679 680 static inline int insert_entries(struct radix_tree_node *node, 681 void __rcu **slot, void *item, bool replace) 682 { 683 if (*slot) 684 return -EEXIST; 685 rcu_assign_pointer(*slot, item); 686 if (node) { 687 node->count++; 688 if (xa_is_value(item)) 689 node->nr_values++; 690 } 691 return 1; 692 } 693 694 /** 695 * __radix_tree_insert - insert into a radix tree 696 * @root: radix tree root 697 * @index: index key 698 * @item: item to insert 699 * 700 * Insert an item into the radix tree at position @index. 701 */ 702 int radix_tree_insert(struct radix_tree_root *root, unsigned long index, 703 void *item) 704 { 705 struct radix_tree_node *node; 706 void __rcu **slot; 707 int error; 708 709 BUG_ON(radix_tree_is_internal_node(item)); 710 711 error = __radix_tree_create(root, index, &node, &slot); 712 if (error) 713 return error; 714 715 error = insert_entries(node, slot, item, false); 716 if (error < 0) 717 return error; 718 719 if (node) { 720 unsigned offset = get_slot_offset(node, slot); 721 BUG_ON(tag_get(node, 0, offset)); 722 BUG_ON(tag_get(node, 1, offset)); 723 BUG_ON(tag_get(node, 2, offset)); 724 } else { 725 BUG_ON(root_tags_get(root)); 726 } 727 728 return 0; 729 } 730 EXPORT_SYMBOL(radix_tree_insert); 731 732 /** 733 * __radix_tree_lookup - lookup an item in a radix tree 734 * @root: radix tree root 735 * @index: index key 736 * @nodep: returns node 737 * @slotp: returns slot 738 * 739 * Lookup and return the item at position @index in the radix 740 * tree @root. 741 * 742 * Until there is more than one item in the tree, no nodes are 743 * allocated and @root->xa_head is used as a direct slot instead of 744 * pointing to a node, in which case *@nodep will be NULL. 745 */ 746 void *__radix_tree_lookup(const struct radix_tree_root *root, 747 unsigned long index, struct radix_tree_node **nodep, 748 void __rcu ***slotp) 749 { 750 struct radix_tree_node *node, *parent; 751 unsigned long maxindex; 752 void __rcu **slot; 753 754 restart: 755 parent = NULL; 756 slot = (void __rcu **)&root->xa_head; 757 radix_tree_load_root(root, &node, &maxindex); 758 if (index > maxindex) 759 return NULL; 760 761 while (radix_tree_is_internal_node(node)) { 762 unsigned offset; 763 764 parent = entry_to_node(node); 765 offset = radix_tree_descend(parent, &node, index); 766 slot = parent->slots + offset; 767 if (node == RADIX_TREE_RETRY) 768 goto restart; 769 if (parent->shift == 0) 770 break; 771 } 772 773 if (nodep) 774 *nodep = parent; 775 if (slotp) 776 *slotp = slot; 777 return node; 778 } 779 780 /** 781 * radix_tree_lookup_slot - lookup a slot in a radix tree 782 * @root: radix tree root 783 * @index: index key 784 * 785 * Returns: the slot corresponding to the position @index in the 786 * radix tree @root. This is useful for update-if-exists operations. 787 * 788 * This function can be called under rcu_read_lock iff the slot is not 789 * modified by radix_tree_replace_slot, otherwise it must be called 790 * exclusive from other writers. Any dereference of the slot must be done 791 * using radix_tree_deref_slot. 792 */ 793 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 794 unsigned long index) 795 { 796 void __rcu **slot; 797 798 if (!__radix_tree_lookup(root, index, NULL, &slot)) 799 return NULL; 800 return slot; 801 } 802 EXPORT_SYMBOL(radix_tree_lookup_slot); 803 804 /** 805 * radix_tree_lookup - perform lookup operation on a radix tree 806 * @root: radix tree root 807 * @index: index key 808 * 809 * Lookup the item at the position @index in the radix tree @root. 810 * 811 * This function can be called under rcu_read_lock, however the caller 812 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 813 * them safely). No RCU barriers are required to access or modify the 814 * returned item, however. 815 */ 816 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 817 { 818 return __radix_tree_lookup(root, index, NULL, NULL); 819 } 820 EXPORT_SYMBOL(radix_tree_lookup); 821 822 static void replace_slot(void __rcu **slot, void *item, 823 struct radix_tree_node *node, int count, int values) 824 { 825 if (node && (count || values)) { 826 node->count += count; 827 node->nr_values += values; 828 } 829 830 rcu_assign_pointer(*slot, item); 831 } 832 833 static bool node_tag_get(const struct radix_tree_root *root, 834 const struct radix_tree_node *node, 835 unsigned int tag, unsigned int offset) 836 { 837 if (node) 838 return tag_get(node, tag, offset); 839 return root_tag_get(root, tag); 840 } 841 842 /* 843 * IDR users want to be able to store NULL in the tree, so if the slot isn't 844 * free, don't adjust the count, even if it's transitioning between NULL and 845 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 846 * have empty bits, but it only stores NULL in slots when they're being 847 * deleted. 848 */ 849 static int calculate_count(struct radix_tree_root *root, 850 struct radix_tree_node *node, void __rcu **slot, 851 void *item, void *old) 852 { 853 if (is_idr(root)) { 854 unsigned offset = get_slot_offset(node, slot); 855 bool free = node_tag_get(root, node, IDR_FREE, offset); 856 if (!free) 857 return 0; 858 if (!old) 859 return 1; 860 } 861 return !!item - !!old; 862 } 863 864 /** 865 * __radix_tree_replace - replace item in a slot 866 * @root: radix tree root 867 * @node: pointer to tree node 868 * @slot: pointer to slot in @node 869 * @item: new item to store in the slot. 870 * 871 * For use with __radix_tree_lookup(). Caller must hold tree write locked 872 * across slot lookup and replacement. 873 */ 874 void __radix_tree_replace(struct radix_tree_root *root, 875 struct radix_tree_node *node, 876 void __rcu **slot, void *item) 877 { 878 void *old = rcu_dereference_raw(*slot); 879 int values = !!xa_is_value(item) - !!xa_is_value(old); 880 int count = calculate_count(root, node, slot, item, old); 881 882 /* 883 * This function supports replacing value entries and 884 * deleting entries, but that needs accounting against the 885 * node unless the slot is root->xa_head. 886 */ 887 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 888 (count || values)); 889 replace_slot(slot, item, node, count, values); 890 891 if (!node) 892 return; 893 894 delete_node(root, node); 895 } 896 897 /** 898 * radix_tree_replace_slot - replace item in a slot 899 * @root: radix tree root 900 * @slot: pointer to slot 901 * @item: new item to store in the slot. 902 * 903 * For use with radix_tree_lookup_slot() and 904 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 905 * across slot lookup and replacement. 906 * 907 * NOTE: This cannot be used to switch between non-entries (empty slots), 908 * regular entries, and value entries, as that requires accounting 909 * inside the radix tree node. When switching from one type of entry or 910 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 911 * radix_tree_iter_replace(). 912 */ 913 void radix_tree_replace_slot(struct radix_tree_root *root, 914 void __rcu **slot, void *item) 915 { 916 __radix_tree_replace(root, NULL, slot, item); 917 } 918 EXPORT_SYMBOL(radix_tree_replace_slot); 919 920 /** 921 * radix_tree_iter_replace - replace item in a slot 922 * @root: radix tree root 923 * @slot: pointer to slot 924 * @item: new item to store in the slot. 925 * 926 * For use with radix_tree_for_each_slot(). 927 * Caller must hold tree write locked. 928 */ 929 void radix_tree_iter_replace(struct radix_tree_root *root, 930 const struct radix_tree_iter *iter, 931 void __rcu **slot, void *item) 932 { 933 __radix_tree_replace(root, iter->node, slot, item); 934 } 935 936 static void node_tag_set(struct radix_tree_root *root, 937 struct radix_tree_node *node, 938 unsigned int tag, unsigned int offset) 939 { 940 while (node) { 941 if (tag_get(node, tag, offset)) 942 return; 943 tag_set(node, tag, offset); 944 offset = node->offset; 945 node = node->parent; 946 } 947 948 if (!root_tag_get(root, tag)) 949 root_tag_set(root, tag); 950 } 951 952 /** 953 * radix_tree_tag_set - set a tag on a radix tree node 954 * @root: radix tree root 955 * @index: index key 956 * @tag: tag index 957 * 958 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 959 * corresponding to @index in the radix tree. From 960 * the root all the way down to the leaf node. 961 * 962 * Returns the address of the tagged item. Setting a tag on a not-present 963 * item is a bug. 964 */ 965 void *radix_tree_tag_set(struct radix_tree_root *root, 966 unsigned long index, unsigned int tag) 967 { 968 struct radix_tree_node *node, *parent; 969 unsigned long maxindex; 970 971 radix_tree_load_root(root, &node, &maxindex); 972 BUG_ON(index > maxindex); 973 974 while (radix_tree_is_internal_node(node)) { 975 unsigned offset; 976 977 parent = entry_to_node(node); 978 offset = radix_tree_descend(parent, &node, index); 979 BUG_ON(!node); 980 981 if (!tag_get(parent, tag, offset)) 982 tag_set(parent, tag, offset); 983 } 984 985 /* set the root's tag bit */ 986 if (!root_tag_get(root, tag)) 987 root_tag_set(root, tag); 988 989 return node; 990 } 991 EXPORT_SYMBOL(radix_tree_tag_set); 992 993 static void node_tag_clear(struct radix_tree_root *root, 994 struct radix_tree_node *node, 995 unsigned int tag, unsigned int offset) 996 { 997 while (node) { 998 if (!tag_get(node, tag, offset)) 999 return; 1000 tag_clear(node, tag, offset); 1001 if (any_tag_set(node, tag)) 1002 return; 1003 1004 offset = node->offset; 1005 node = node->parent; 1006 } 1007 1008 /* clear the root's tag bit */ 1009 if (root_tag_get(root, tag)) 1010 root_tag_clear(root, tag); 1011 } 1012 1013 /** 1014 * radix_tree_tag_clear - clear a tag on a radix tree node 1015 * @root: radix tree root 1016 * @index: index key 1017 * @tag: tag index 1018 * 1019 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1020 * corresponding to @index in the radix tree. If this causes 1021 * the leaf node to have no tags set then clear the tag in the 1022 * next-to-leaf node, etc. 1023 * 1024 * Returns the address of the tagged item on success, else NULL. ie: 1025 * has the same return value and semantics as radix_tree_lookup(). 1026 */ 1027 void *radix_tree_tag_clear(struct radix_tree_root *root, 1028 unsigned long index, unsigned int tag) 1029 { 1030 struct radix_tree_node *node, *parent; 1031 unsigned long maxindex; 1032 int offset; 1033 1034 radix_tree_load_root(root, &node, &maxindex); 1035 if (index > maxindex) 1036 return NULL; 1037 1038 parent = NULL; 1039 1040 while (radix_tree_is_internal_node(node)) { 1041 parent = entry_to_node(node); 1042 offset = radix_tree_descend(parent, &node, index); 1043 } 1044 1045 if (node) 1046 node_tag_clear(root, parent, tag, offset); 1047 1048 return node; 1049 } 1050 EXPORT_SYMBOL(radix_tree_tag_clear); 1051 1052 /** 1053 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1054 * @root: radix tree root 1055 * @iter: iterator state 1056 * @tag: tag to clear 1057 */ 1058 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1059 const struct radix_tree_iter *iter, unsigned int tag) 1060 { 1061 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1062 } 1063 1064 /** 1065 * radix_tree_tag_get - get a tag on a radix tree node 1066 * @root: radix tree root 1067 * @index: index key 1068 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1069 * 1070 * Return values: 1071 * 1072 * 0: tag not present or not set 1073 * 1: tag set 1074 * 1075 * Note that the return value of this function may not be relied on, even if 1076 * the RCU lock is held, unless tag modification and node deletion are excluded 1077 * from concurrency. 1078 */ 1079 int radix_tree_tag_get(const struct radix_tree_root *root, 1080 unsigned long index, unsigned int tag) 1081 { 1082 struct radix_tree_node *node, *parent; 1083 unsigned long maxindex; 1084 1085 if (!root_tag_get(root, tag)) 1086 return 0; 1087 1088 radix_tree_load_root(root, &node, &maxindex); 1089 if (index > maxindex) 1090 return 0; 1091 1092 while (radix_tree_is_internal_node(node)) { 1093 unsigned offset; 1094 1095 parent = entry_to_node(node); 1096 offset = radix_tree_descend(parent, &node, index); 1097 1098 if (!tag_get(parent, tag, offset)) 1099 return 0; 1100 if (node == RADIX_TREE_RETRY) 1101 break; 1102 } 1103 1104 return 1; 1105 } 1106 EXPORT_SYMBOL(radix_tree_tag_get); 1107 1108 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1109 static void set_iter_tags(struct radix_tree_iter *iter, 1110 struct radix_tree_node *node, unsigned offset, 1111 unsigned tag) 1112 { 1113 unsigned tag_long = offset / BITS_PER_LONG; 1114 unsigned tag_bit = offset % BITS_PER_LONG; 1115 1116 if (!node) { 1117 iter->tags = 1; 1118 return; 1119 } 1120 1121 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1122 1123 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1124 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1125 /* Pick tags from next element */ 1126 if (tag_bit) 1127 iter->tags |= node->tags[tag][tag_long + 1] << 1128 (BITS_PER_LONG - tag_bit); 1129 /* Clip chunk size, here only BITS_PER_LONG tags */ 1130 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1131 } 1132 } 1133 1134 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1135 struct radix_tree_iter *iter) 1136 { 1137 slot++; 1138 iter->index = __radix_tree_iter_add(iter, 1); 1139 iter->next_index = iter->index; 1140 iter->tags = 0; 1141 return NULL; 1142 } 1143 EXPORT_SYMBOL(radix_tree_iter_resume); 1144 1145 /** 1146 * radix_tree_next_chunk - find next chunk of slots for iteration 1147 * 1148 * @root: radix tree root 1149 * @iter: iterator state 1150 * @flags: RADIX_TREE_ITER_* flags and tag index 1151 * Returns: pointer to chunk first slot, or NULL if iteration is over 1152 */ 1153 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1154 struct radix_tree_iter *iter, unsigned flags) 1155 { 1156 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1157 struct radix_tree_node *node, *child; 1158 unsigned long index, offset, maxindex; 1159 1160 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1161 return NULL; 1162 1163 /* 1164 * Catch next_index overflow after ~0UL. iter->index never overflows 1165 * during iterating; it can be zero only at the beginning. 1166 * And we cannot overflow iter->next_index in a single step, 1167 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1168 * 1169 * This condition also used by radix_tree_next_slot() to stop 1170 * contiguous iterating, and forbid switching to the next chunk. 1171 */ 1172 index = iter->next_index; 1173 if (!index && iter->index) 1174 return NULL; 1175 1176 restart: 1177 radix_tree_load_root(root, &child, &maxindex); 1178 if (index > maxindex) 1179 return NULL; 1180 if (!child) 1181 return NULL; 1182 1183 if (!radix_tree_is_internal_node(child)) { 1184 /* Single-slot tree */ 1185 iter->index = index; 1186 iter->next_index = maxindex + 1; 1187 iter->tags = 1; 1188 iter->node = NULL; 1189 return (void __rcu **)&root->xa_head; 1190 } 1191 1192 do { 1193 node = entry_to_node(child); 1194 offset = radix_tree_descend(node, &child, index); 1195 1196 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1197 !tag_get(node, tag, offset) : !child) { 1198 /* Hole detected */ 1199 if (flags & RADIX_TREE_ITER_CONTIG) 1200 return NULL; 1201 1202 if (flags & RADIX_TREE_ITER_TAGGED) 1203 offset = radix_tree_find_next_bit(node, tag, 1204 offset + 1); 1205 else 1206 while (++offset < RADIX_TREE_MAP_SIZE) { 1207 void *slot = rcu_dereference_raw( 1208 node->slots[offset]); 1209 if (slot) 1210 break; 1211 } 1212 index &= ~node_maxindex(node); 1213 index += offset << node->shift; 1214 /* Overflow after ~0UL */ 1215 if (!index) 1216 return NULL; 1217 if (offset == RADIX_TREE_MAP_SIZE) 1218 goto restart; 1219 child = rcu_dereference_raw(node->slots[offset]); 1220 } 1221 1222 if (!child) 1223 goto restart; 1224 if (child == RADIX_TREE_RETRY) 1225 break; 1226 } while (node->shift && radix_tree_is_internal_node(child)); 1227 1228 /* Update the iterator state */ 1229 iter->index = (index &~ node_maxindex(node)) | offset; 1230 iter->next_index = (index | node_maxindex(node)) + 1; 1231 iter->node = node; 1232 1233 if (flags & RADIX_TREE_ITER_TAGGED) 1234 set_iter_tags(iter, node, offset, tag); 1235 1236 return node->slots + offset; 1237 } 1238 EXPORT_SYMBOL(radix_tree_next_chunk); 1239 1240 /** 1241 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1242 * @root: radix tree root 1243 * @results: where the results of the lookup are placed 1244 * @first_index: start the lookup from this key 1245 * @max_items: place up to this many items at *results 1246 * 1247 * Performs an index-ascending scan of the tree for present items. Places 1248 * them at *@results and returns the number of items which were placed at 1249 * *@results. 1250 * 1251 * The implementation is naive. 1252 * 1253 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1254 * rcu_read_lock. In this case, rather than the returned results being 1255 * an atomic snapshot of the tree at a single point in time, the 1256 * semantics of an RCU protected gang lookup are as though multiple 1257 * radix_tree_lookups have been issued in individual locks, and results 1258 * stored in 'results'. 1259 */ 1260 unsigned int 1261 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1262 unsigned long first_index, unsigned int max_items) 1263 { 1264 struct radix_tree_iter iter; 1265 void __rcu **slot; 1266 unsigned int ret = 0; 1267 1268 if (unlikely(!max_items)) 1269 return 0; 1270 1271 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1272 results[ret] = rcu_dereference_raw(*slot); 1273 if (!results[ret]) 1274 continue; 1275 if (radix_tree_is_internal_node(results[ret])) { 1276 slot = radix_tree_iter_retry(&iter); 1277 continue; 1278 } 1279 if (++ret == max_items) 1280 break; 1281 } 1282 1283 return ret; 1284 } 1285 EXPORT_SYMBOL(radix_tree_gang_lookup); 1286 1287 /** 1288 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1289 * based on a tag 1290 * @root: radix tree root 1291 * @results: where the results of the lookup are placed 1292 * @first_index: start the lookup from this key 1293 * @max_items: place up to this many items at *results 1294 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1295 * 1296 * Performs an index-ascending scan of the tree for present items which 1297 * have the tag indexed by @tag set. Places the items at *@results and 1298 * returns the number of items which were placed at *@results. 1299 */ 1300 unsigned int 1301 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1302 unsigned long first_index, unsigned int max_items, 1303 unsigned int tag) 1304 { 1305 struct radix_tree_iter iter; 1306 void __rcu **slot; 1307 unsigned int ret = 0; 1308 1309 if (unlikely(!max_items)) 1310 return 0; 1311 1312 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1313 results[ret] = rcu_dereference_raw(*slot); 1314 if (!results[ret]) 1315 continue; 1316 if (radix_tree_is_internal_node(results[ret])) { 1317 slot = radix_tree_iter_retry(&iter); 1318 continue; 1319 } 1320 if (++ret == max_items) 1321 break; 1322 } 1323 1324 return ret; 1325 } 1326 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1327 1328 /** 1329 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1330 * radix tree based on a tag 1331 * @root: radix tree root 1332 * @results: where the results of the lookup are placed 1333 * @first_index: start the lookup from this key 1334 * @max_items: place up to this many items at *results 1335 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1336 * 1337 * Performs an index-ascending scan of the tree for present items which 1338 * have the tag indexed by @tag set. Places the slots at *@results and 1339 * returns the number of slots which were placed at *@results. 1340 */ 1341 unsigned int 1342 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1343 void __rcu ***results, unsigned long first_index, 1344 unsigned int max_items, unsigned int tag) 1345 { 1346 struct radix_tree_iter iter; 1347 void __rcu **slot; 1348 unsigned int ret = 0; 1349 1350 if (unlikely(!max_items)) 1351 return 0; 1352 1353 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1354 results[ret] = slot; 1355 if (++ret == max_items) 1356 break; 1357 } 1358 1359 return ret; 1360 } 1361 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1362 1363 static bool __radix_tree_delete(struct radix_tree_root *root, 1364 struct radix_tree_node *node, void __rcu **slot) 1365 { 1366 void *old = rcu_dereference_raw(*slot); 1367 int values = xa_is_value(old) ? -1 : 0; 1368 unsigned offset = get_slot_offset(node, slot); 1369 int tag; 1370 1371 if (is_idr(root)) 1372 node_tag_set(root, node, IDR_FREE, offset); 1373 else 1374 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1375 node_tag_clear(root, node, tag, offset); 1376 1377 replace_slot(slot, NULL, node, -1, values); 1378 return node && delete_node(root, node); 1379 } 1380 1381 /** 1382 * radix_tree_iter_delete - delete the entry at this iterator position 1383 * @root: radix tree root 1384 * @iter: iterator state 1385 * @slot: pointer to slot 1386 * 1387 * Delete the entry at the position currently pointed to by the iterator. 1388 * This may result in the current node being freed; if it is, the iterator 1389 * is advanced so that it will not reference the freed memory. This 1390 * function may be called without any locking if there are no other threads 1391 * which can access this tree. 1392 */ 1393 void radix_tree_iter_delete(struct radix_tree_root *root, 1394 struct radix_tree_iter *iter, void __rcu **slot) 1395 { 1396 if (__radix_tree_delete(root, iter->node, slot)) 1397 iter->index = iter->next_index; 1398 } 1399 EXPORT_SYMBOL(radix_tree_iter_delete); 1400 1401 /** 1402 * radix_tree_delete_item - delete an item from a radix tree 1403 * @root: radix tree root 1404 * @index: index key 1405 * @item: expected item 1406 * 1407 * Remove @item at @index from the radix tree rooted at @root. 1408 * 1409 * Return: the deleted entry, or %NULL if it was not present 1410 * or the entry at the given @index was not @item. 1411 */ 1412 void *radix_tree_delete_item(struct radix_tree_root *root, 1413 unsigned long index, void *item) 1414 { 1415 struct radix_tree_node *node = NULL; 1416 void __rcu **slot = NULL; 1417 void *entry; 1418 1419 entry = __radix_tree_lookup(root, index, &node, &slot); 1420 if (!slot) 1421 return NULL; 1422 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 1423 get_slot_offset(node, slot)))) 1424 return NULL; 1425 1426 if (item && entry != item) 1427 return NULL; 1428 1429 __radix_tree_delete(root, node, slot); 1430 1431 return entry; 1432 } 1433 EXPORT_SYMBOL(radix_tree_delete_item); 1434 1435 /** 1436 * radix_tree_delete - delete an entry from a radix tree 1437 * @root: radix tree root 1438 * @index: index key 1439 * 1440 * Remove the entry at @index from the radix tree rooted at @root. 1441 * 1442 * Return: The deleted entry, or %NULL if it was not present. 1443 */ 1444 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1445 { 1446 return radix_tree_delete_item(root, index, NULL); 1447 } 1448 EXPORT_SYMBOL(radix_tree_delete); 1449 1450 /** 1451 * radix_tree_tagged - test whether any items in the tree are tagged 1452 * @root: radix tree root 1453 * @tag: tag to test 1454 */ 1455 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 1456 { 1457 return root_tag_get(root, tag); 1458 } 1459 EXPORT_SYMBOL(radix_tree_tagged); 1460 1461 /** 1462 * idr_preload - preload for idr_alloc() 1463 * @gfp_mask: allocation mask to use for preloading 1464 * 1465 * Preallocate memory to use for the next call to idr_alloc(). This function 1466 * returns with preemption disabled. It will be enabled by idr_preload_end(). 1467 */ 1468 void idr_preload(gfp_t gfp_mask) 1469 { 1470 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1471 local_lock(&radix_tree_preloads.lock); 1472 } 1473 EXPORT_SYMBOL(idr_preload); 1474 1475 void __rcu **idr_get_free(struct radix_tree_root *root, 1476 struct radix_tree_iter *iter, gfp_t gfp, 1477 unsigned long max) 1478 { 1479 struct radix_tree_node *node = NULL, *child; 1480 void __rcu **slot = (void __rcu **)&root->xa_head; 1481 unsigned long maxindex, start = iter->next_index; 1482 unsigned int shift, offset = 0; 1483 1484 grow: 1485 shift = radix_tree_load_root(root, &child, &maxindex); 1486 if (!radix_tree_tagged(root, IDR_FREE)) 1487 start = max(start, maxindex + 1); 1488 if (start > max) 1489 return ERR_PTR(-ENOSPC); 1490 1491 if (start > maxindex) { 1492 int error = radix_tree_extend(root, gfp, start, shift); 1493 if (error < 0) 1494 return ERR_PTR(error); 1495 shift = error; 1496 child = rcu_dereference_raw(root->xa_head); 1497 } 1498 if (start == 0 && shift == 0) 1499 shift = RADIX_TREE_MAP_SHIFT; 1500 1501 while (shift) { 1502 shift -= RADIX_TREE_MAP_SHIFT; 1503 if (child == NULL) { 1504 /* Have to add a child node. */ 1505 child = radix_tree_node_alloc(gfp, node, root, shift, 1506 offset, 0, 0); 1507 if (!child) 1508 return ERR_PTR(-ENOMEM); 1509 all_tag_set(child, IDR_FREE); 1510 rcu_assign_pointer(*slot, node_to_entry(child)); 1511 if (node) 1512 node->count++; 1513 } else if (!radix_tree_is_internal_node(child)) 1514 break; 1515 1516 node = entry_to_node(child); 1517 offset = radix_tree_descend(node, &child, start); 1518 if (!tag_get(node, IDR_FREE, offset)) { 1519 offset = radix_tree_find_next_bit(node, IDR_FREE, 1520 offset + 1); 1521 start = next_index(start, node, offset); 1522 if (start > max || start == 0) 1523 return ERR_PTR(-ENOSPC); 1524 while (offset == RADIX_TREE_MAP_SIZE) { 1525 offset = node->offset + 1; 1526 node = node->parent; 1527 if (!node) 1528 goto grow; 1529 shift = node->shift; 1530 } 1531 child = rcu_dereference_raw(node->slots[offset]); 1532 } 1533 slot = &node->slots[offset]; 1534 } 1535 1536 iter->index = start; 1537 if (node) 1538 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 1539 else 1540 iter->next_index = 1; 1541 iter->node = node; 1542 set_iter_tags(iter, node, offset, IDR_FREE); 1543 1544 return slot; 1545 } 1546 1547 /** 1548 * idr_destroy - release all internal memory from an IDR 1549 * @idr: idr handle 1550 * 1551 * After this function is called, the IDR is empty, and may be reused or 1552 * the data structure containing it may be freed. 1553 * 1554 * A typical clean-up sequence for objects stored in an idr tree will use 1555 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 1556 * free the memory used to keep track of those objects. 1557 */ 1558 void idr_destroy(struct idr *idr) 1559 { 1560 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 1561 if (radix_tree_is_internal_node(node)) 1562 radix_tree_free_nodes(node); 1563 idr->idr_rt.xa_head = NULL; 1564 root_tag_set(&idr->idr_rt, IDR_FREE); 1565 } 1566 EXPORT_SYMBOL(idr_destroy); 1567 1568 static void 1569 radix_tree_node_ctor(void *arg) 1570 { 1571 struct radix_tree_node *node = arg; 1572 1573 memset(node, 0, sizeof(*node)); 1574 INIT_LIST_HEAD(&node->private_list); 1575 } 1576 1577 static int radix_tree_cpu_dead(unsigned int cpu) 1578 { 1579 struct radix_tree_preload *rtp; 1580 struct radix_tree_node *node; 1581 1582 /* Free per-cpu pool of preloaded nodes */ 1583 rtp = &per_cpu(radix_tree_preloads, cpu); 1584 while (rtp->nr) { 1585 node = rtp->nodes; 1586 rtp->nodes = node->parent; 1587 kmem_cache_free(radix_tree_node_cachep, node); 1588 rtp->nr--; 1589 } 1590 return 0; 1591 } 1592 1593 void __init radix_tree_init(void) 1594 { 1595 int ret; 1596 1597 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1598 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 1599 BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 1600 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1601 sizeof(struct radix_tree_node), 0, 1602 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1603 radix_tree_node_ctor); 1604 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1605 NULL, radix_tree_cpu_dead); 1606 WARN_ON(ret < 0); 1607 } 1608