1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * Copyright (C) 2012 Konstantin Khlebnikov 7 * Copyright (C) 2016 Intel, Matthew Wilcox 8 * Copyright (C) 2016 Intel, Ross Zwisler 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License as 12 * published by the Free Software Foundation; either version 2, or (at 13 * your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 */ 24 25 #include <linux/bitmap.h> 26 #include <linux/bitops.h> 27 #include <linux/bug.h> 28 #include <linux/cpu.h> 29 #include <linux/errno.h> 30 #include <linux/export.h> 31 #include <linux/idr.h> 32 #include <linux/init.h> 33 #include <linux/kernel.h> 34 #include <linux/kmemleak.h> 35 #include <linux/percpu.h> 36 #include <linux/preempt.h> /* in_interrupt() */ 37 #include <linux/radix-tree.h> 38 #include <linux/rcupdate.h> 39 #include <linux/slab.h> 40 #include <linux/string.h> 41 #include <linux/xarray.h> 42 43 44 /* 45 * Radix tree node cache. 46 */ 47 struct kmem_cache *radix_tree_node_cachep; 48 49 /* 50 * The radix tree is variable-height, so an insert operation not only has 51 * to build the branch to its corresponding item, it also has to build the 52 * branch to existing items if the size has to be increased (by 53 * radix_tree_extend). 54 * 55 * The worst case is a zero height tree with just a single item at index 0, 56 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 57 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 58 * Hence: 59 */ 60 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 61 62 /* 63 * The IDR does not have to be as high as the radix tree since it uses 64 * signed integers, not unsigned longs. 65 */ 66 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 67 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 68 RADIX_TREE_MAP_SHIFT)) 69 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 70 71 /* 72 * The IDA is even shorter since it uses a bitmap at the last level. 73 */ 74 #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 75 #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 76 RADIX_TREE_MAP_SHIFT)) 77 #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 78 79 /* 80 * Per-cpu pool of preloaded nodes 81 */ 82 struct radix_tree_preload { 83 unsigned nr; 84 /* nodes->parent points to next preallocated node */ 85 struct radix_tree_node *nodes; 86 }; 87 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 88 89 static inline struct radix_tree_node *entry_to_node(void *ptr) 90 { 91 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 92 } 93 94 static inline void *node_to_entry(void *ptr) 95 { 96 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 97 } 98 99 #define RADIX_TREE_RETRY XA_RETRY_ENTRY 100 101 static inline unsigned long 102 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 103 { 104 return parent ? slot - parent->slots : 0; 105 } 106 107 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 108 struct radix_tree_node **nodep, unsigned long index) 109 { 110 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 111 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 112 113 *nodep = (void *)entry; 114 return offset; 115 } 116 117 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 118 { 119 return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 120 } 121 122 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 123 int offset) 124 { 125 __set_bit(offset, node->tags[tag]); 126 } 127 128 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 129 int offset) 130 { 131 __clear_bit(offset, node->tags[tag]); 132 } 133 134 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 135 int offset) 136 { 137 return test_bit(offset, node->tags[tag]); 138 } 139 140 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 141 { 142 root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 143 } 144 145 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 146 { 147 root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 148 } 149 150 static inline void root_tag_clear_all(struct radix_tree_root *root) 151 { 152 root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 153 } 154 155 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 156 { 157 return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 158 } 159 160 static inline unsigned root_tags_get(const struct radix_tree_root *root) 161 { 162 return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 163 } 164 165 static inline bool is_idr(const struct radix_tree_root *root) 166 { 167 return !!(root->xa_flags & ROOT_IS_IDR); 168 } 169 170 /* 171 * Returns 1 if any slot in the node has this tag set. 172 * Otherwise returns 0. 173 */ 174 static inline int any_tag_set(const struct radix_tree_node *node, 175 unsigned int tag) 176 { 177 unsigned idx; 178 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 179 if (node->tags[tag][idx]) 180 return 1; 181 } 182 return 0; 183 } 184 185 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 186 { 187 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 188 } 189 190 /** 191 * radix_tree_find_next_bit - find the next set bit in a memory region 192 * 193 * @addr: The address to base the search on 194 * @size: The bitmap size in bits 195 * @offset: The bitnumber to start searching at 196 * 197 * Unrollable variant of find_next_bit() for constant size arrays. 198 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 199 * Returns next bit offset, or size if nothing found. 200 */ 201 static __always_inline unsigned long 202 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 203 unsigned long offset) 204 { 205 const unsigned long *addr = node->tags[tag]; 206 207 if (offset < RADIX_TREE_MAP_SIZE) { 208 unsigned long tmp; 209 210 addr += offset / BITS_PER_LONG; 211 tmp = *addr >> (offset % BITS_PER_LONG); 212 if (tmp) 213 return __ffs(tmp) + offset; 214 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 215 while (offset < RADIX_TREE_MAP_SIZE) { 216 tmp = *++addr; 217 if (tmp) 218 return __ffs(tmp) + offset; 219 offset += BITS_PER_LONG; 220 } 221 } 222 return RADIX_TREE_MAP_SIZE; 223 } 224 225 static unsigned int iter_offset(const struct radix_tree_iter *iter) 226 { 227 return iter->index & RADIX_TREE_MAP_MASK; 228 } 229 230 /* 231 * The maximum index which can be stored in a radix tree 232 */ 233 static inline unsigned long shift_maxindex(unsigned int shift) 234 { 235 return (RADIX_TREE_MAP_SIZE << shift) - 1; 236 } 237 238 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 239 { 240 return shift_maxindex(node->shift); 241 } 242 243 static unsigned long next_index(unsigned long index, 244 const struct radix_tree_node *node, 245 unsigned long offset) 246 { 247 return (index & ~node_maxindex(node)) + (offset << node->shift); 248 } 249 250 /* 251 * This assumes that the caller has performed appropriate preallocation, and 252 * that the caller has pinned this thread of control to the current CPU. 253 */ 254 static struct radix_tree_node * 255 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 256 struct radix_tree_root *root, 257 unsigned int shift, unsigned int offset, 258 unsigned int count, unsigned int nr_values) 259 { 260 struct radix_tree_node *ret = NULL; 261 262 /* 263 * Preload code isn't irq safe and it doesn't make sense to use 264 * preloading during an interrupt anyway as all the allocations have 265 * to be atomic. So just do normal allocation when in interrupt. 266 */ 267 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 268 struct radix_tree_preload *rtp; 269 270 /* 271 * Even if the caller has preloaded, try to allocate from the 272 * cache first for the new node to get accounted to the memory 273 * cgroup. 274 */ 275 ret = kmem_cache_alloc(radix_tree_node_cachep, 276 gfp_mask | __GFP_NOWARN); 277 if (ret) 278 goto out; 279 280 /* 281 * Provided the caller has preloaded here, we will always 282 * succeed in getting a node here (and never reach 283 * kmem_cache_alloc) 284 */ 285 rtp = this_cpu_ptr(&radix_tree_preloads); 286 if (rtp->nr) { 287 ret = rtp->nodes; 288 rtp->nodes = ret->parent; 289 rtp->nr--; 290 } 291 /* 292 * Update the allocation stack trace as this is more useful 293 * for debugging. 294 */ 295 kmemleak_update_trace(ret); 296 goto out; 297 } 298 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 299 out: 300 BUG_ON(radix_tree_is_internal_node(ret)); 301 if (ret) { 302 ret->shift = shift; 303 ret->offset = offset; 304 ret->count = count; 305 ret->nr_values = nr_values; 306 ret->parent = parent; 307 ret->array = root; 308 } 309 return ret; 310 } 311 312 void radix_tree_node_rcu_free(struct rcu_head *head) 313 { 314 struct radix_tree_node *node = 315 container_of(head, struct radix_tree_node, rcu_head); 316 317 /* 318 * Must only free zeroed nodes into the slab. We can be left with 319 * non-NULL entries by radix_tree_free_nodes, so clear the entries 320 * and tags here. 321 */ 322 memset(node->slots, 0, sizeof(node->slots)); 323 memset(node->tags, 0, sizeof(node->tags)); 324 INIT_LIST_HEAD(&node->private_list); 325 326 kmem_cache_free(radix_tree_node_cachep, node); 327 } 328 329 static inline void 330 radix_tree_node_free(struct radix_tree_node *node) 331 { 332 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 333 } 334 335 /* 336 * Load up this CPU's radix_tree_node buffer with sufficient objects to 337 * ensure that the addition of a single element in the tree cannot fail. On 338 * success, return zero, with preemption disabled. On error, return -ENOMEM 339 * with preemption not disabled. 340 * 341 * To make use of this facility, the radix tree must be initialised without 342 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 343 */ 344 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 345 { 346 struct radix_tree_preload *rtp; 347 struct radix_tree_node *node; 348 int ret = -ENOMEM; 349 350 /* 351 * Nodes preloaded by one cgroup can be be used by another cgroup, so 352 * they should never be accounted to any particular memory cgroup. 353 */ 354 gfp_mask &= ~__GFP_ACCOUNT; 355 356 preempt_disable(); 357 rtp = this_cpu_ptr(&radix_tree_preloads); 358 while (rtp->nr < nr) { 359 preempt_enable(); 360 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 361 if (node == NULL) 362 goto out; 363 preempt_disable(); 364 rtp = this_cpu_ptr(&radix_tree_preloads); 365 if (rtp->nr < nr) { 366 node->parent = rtp->nodes; 367 rtp->nodes = node; 368 rtp->nr++; 369 } else { 370 kmem_cache_free(radix_tree_node_cachep, node); 371 } 372 } 373 ret = 0; 374 out: 375 return ret; 376 } 377 378 /* 379 * Load up this CPU's radix_tree_node buffer with sufficient objects to 380 * ensure that the addition of a single element in the tree cannot fail. On 381 * success, return zero, with preemption disabled. On error, return -ENOMEM 382 * with preemption not disabled. 383 * 384 * To make use of this facility, the radix tree must be initialised without 385 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 386 */ 387 int radix_tree_preload(gfp_t gfp_mask) 388 { 389 /* Warn on non-sensical use... */ 390 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 391 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 392 } 393 EXPORT_SYMBOL(radix_tree_preload); 394 395 /* 396 * The same as above function, except we don't guarantee preloading happens. 397 * We do it, if we decide it helps. On success, return zero with preemption 398 * disabled. On error, return -ENOMEM with preemption not disabled. 399 */ 400 int radix_tree_maybe_preload(gfp_t gfp_mask) 401 { 402 if (gfpflags_allow_blocking(gfp_mask)) 403 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 404 /* Preloading doesn't help anything with this gfp mask, skip it */ 405 preempt_disable(); 406 return 0; 407 } 408 EXPORT_SYMBOL(radix_tree_maybe_preload); 409 410 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 411 struct radix_tree_node **nodep, unsigned long *maxindex) 412 { 413 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 414 415 *nodep = node; 416 417 if (likely(radix_tree_is_internal_node(node))) { 418 node = entry_to_node(node); 419 *maxindex = node_maxindex(node); 420 return node->shift + RADIX_TREE_MAP_SHIFT; 421 } 422 423 *maxindex = 0; 424 return 0; 425 } 426 427 /* 428 * Extend a radix tree so it can store key @index. 429 */ 430 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 431 unsigned long index, unsigned int shift) 432 { 433 void *entry; 434 unsigned int maxshift; 435 int tag; 436 437 /* Figure out what the shift should be. */ 438 maxshift = shift; 439 while (index > shift_maxindex(maxshift)) 440 maxshift += RADIX_TREE_MAP_SHIFT; 441 442 entry = rcu_dereference_raw(root->xa_head); 443 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 444 goto out; 445 446 do { 447 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 448 root, shift, 0, 1, 0); 449 if (!node) 450 return -ENOMEM; 451 452 if (is_idr(root)) { 453 all_tag_set(node, IDR_FREE); 454 if (!root_tag_get(root, IDR_FREE)) { 455 tag_clear(node, IDR_FREE, 0); 456 root_tag_set(root, IDR_FREE); 457 } 458 } else { 459 /* Propagate the aggregated tag info to the new child */ 460 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 461 if (root_tag_get(root, tag)) 462 tag_set(node, tag, 0); 463 } 464 } 465 466 BUG_ON(shift > BITS_PER_LONG); 467 if (radix_tree_is_internal_node(entry)) { 468 entry_to_node(entry)->parent = node; 469 } else if (xa_is_value(entry)) { 470 /* Moving a value entry root->xa_head to a node */ 471 node->nr_values = 1; 472 } 473 /* 474 * entry was already in the radix tree, so we do not need 475 * rcu_assign_pointer here 476 */ 477 node->slots[0] = (void __rcu *)entry; 478 entry = node_to_entry(node); 479 rcu_assign_pointer(root->xa_head, entry); 480 shift += RADIX_TREE_MAP_SHIFT; 481 } while (shift <= maxshift); 482 out: 483 return maxshift + RADIX_TREE_MAP_SHIFT; 484 } 485 486 /** 487 * radix_tree_shrink - shrink radix tree to minimum height 488 * @root radix tree root 489 */ 490 static inline bool radix_tree_shrink(struct radix_tree_root *root) 491 { 492 bool shrunk = false; 493 494 for (;;) { 495 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 496 struct radix_tree_node *child; 497 498 if (!radix_tree_is_internal_node(node)) 499 break; 500 node = entry_to_node(node); 501 502 /* 503 * The candidate node has more than one child, or its child 504 * is not at the leftmost slot, we cannot shrink. 505 */ 506 if (node->count != 1) 507 break; 508 child = rcu_dereference_raw(node->slots[0]); 509 if (!child) 510 break; 511 512 /* 513 * For an IDR, we must not shrink entry 0 into the root in 514 * case somebody calls idr_replace() with a pointer that 515 * appears to be an internal entry 516 */ 517 if (!node->shift && is_idr(root)) 518 break; 519 520 if (radix_tree_is_internal_node(child)) 521 entry_to_node(child)->parent = NULL; 522 523 /* 524 * We don't need rcu_assign_pointer(), since we are simply 525 * moving the node from one part of the tree to another: if it 526 * was safe to dereference the old pointer to it 527 * (node->slots[0]), it will be safe to dereference the new 528 * one (root->xa_head) as far as dependent read barriers go. 529 */ 530 root->xa_head = (void __rcu *)child; 531 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 532 root_tag_clear(root, IDR_FREE); 533 534 /* 535 * We have a dilemma here. The node's slot[0] must not be 536 * NULLed in case there are concurrent lookups expecting to 537 * find the item. However if this was a bottom-level node, 538 * then it may be subject to the slot pointer being visible 539 * to callers dereferencing it. If item corresponding to 540 * slot[0] is subsequently deleted, these callers would expect 541 * their slot to become empty sooner or later. 542 * 543 * For example, lockless pagecache will look up a slot, deref 544 * the page pointer, and if the page has 0 refcount it means it 545 * was concurrently deleted from pagecache so try the deref 546 * again. Fortunately there is already a requirement for logic 547 * to retry the entire slot lookup -- the indirect pointer 548 * problem (replacing direct root node with an indirect pointer 549 * also results in a stale slot). So tag the slot as indirect 550 * to force callers to retry. 551 */ 552 node->count = 0; 553 if (!radix_tree_is_internal_node(child)) { 554 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 555 } 556 557 WARN_ON_ONCE(!list_empty(&node->private_list)); 558 radix_tree_node_free(node); 559 shrunk = true; 560 } 561 562 return shrunk; 563 } 564 565 static bool delete_node(struct radix_tree_root *root, 566 struct radix_tree_node *node) 567 { 568 bool deleted = false; 569 570 do { 571 struct radix_tree_node *parent; 572 573 if (node->count) { 574 if (node_to_entry(node) == 575 rcu_dereference_raw(root->xa_head)) 576 deleted |= radix_tree_shrink(root); 577 return deleted; 578 } 579 580 parent = node->parent; 581 if (parent) { 582 parent->slots[node->offset] = NULL; 583 parent->count--; 584 } else { 585 /* 586 * Shouldn't the tags already have all been cleared 587 * by the caller? 588 */ 589 if (!is_idr(root)) 590 root_tag_clear_all(root); 591 root->xa_head = NULL; 592 } 593 594 WARN_ON_ONCE(!list_empty(&node->private_list)); 595 radix_tree_node_free(node); 596 deleted = true; 597 598 node = parent; 599 } while (node); 600 601 return deleted; 602 } 603 604 /** 605 * __radix_tree_create - create a slot in a radix tree 606 * @root: radix tree root 607 * @index: index key 608 * @nodep: returns node 609 * @slotp: returns slot 610 * 611 * Create, if necessary, and return the node and slot for an item 612 * at position @index in the radix tree @root. 613 * 614 * Until there is more than one item in the tree, no nodes are 615 * allocated and @root->xa_head is used as a direct slot instead of 616 * pointing to a node, in which case *@nodep will be NULL. 617 * 618 * Returns -ENOMEM, or 0 for success. 619 */ 620 static int __radix_tree_create(struct radix_tree_root *root, 621 unsigned long index, struct radix_tree_node **nodep, 622 void __rcu ***slotp) 623 { 624 struct radix_tree_node *node = NULL, *child; 625 void __rcu **slot = (void __rcu **)&root->xa_head; 626 unsigned long maxindex; 627 unsigned int shift, offset = 0; 628 unsigned long max = index; 629 gfp_t gfp = root_gfp_mask(root); 630 631 shift = radix_tree_load_root(root, &child, &maxindex); 632 633 /* Make sure the tree is high enough. */ 634 if (max > maxindex) { 635 int error = radix_tree_extend(root, gfp, max, shift); 636 if (error < 0) 637 return error; 638 shift = error; 639 child = rcu_dereference_raw(root->xa_head); 640 } 641 642 while (shift > 0) { 643 shift -= RADIX_TREE_MAP_SHIFT; 644 if (child == NULL) { 645 /* Have to add a child node. */ 646 child = radix_tree_node_alloc(gfp, node, root, shift, 647 offset, 0, 0); 648 if (!child) 649 return -ENOMEM; 650 rcu_assign_pointer(*slot, node_to_entry(child)); 651 if (node) 652 node->count++; 653 } else if (!radix_tree_is_internal_node(child)) 654 break; 655 656 /* Go a level down */ 657 node = entry_to_node(child); 658 offset = radix_tree_descend(node, &child, index); 659 slot = &node->slots[offset]; 660 } 661 662 if (nodep) 663 *nodep = node; 664 if (slotp) 665 *slotp = slot; 666 return 0; 667 } 668 669 /* 670 * Free any nodes below this node. The tree is presumed to not need 671 * shrinking, and any user data in the tree is presumed to not need a 672 * destructor called on it. If we need to add a destructor, we can 673 * add that functionality later. Note that we may not clear tags or 674 * slots from the tree as an RCU walker may still have a pointer into 675 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 676 * but we'll still have to clear those in rcu_free. 677 */ 678 static void radix_tree_free_nodes(struct radix_tree_node *node) 679 { 680 unsigned offset = 0; 681 struct radix_tree_node *child = entry_to_node(node); 682 683 for (;;) { 684 void *entry = rcu_dereference_raw(child->slots[offset]); 685 if (xa_is_node(entry) && child->shift) { 686 child = entry_to_node(entry); 687 offset = 0; 688 continue; 689 } 690 offset++; 691 while (offset == RADIX_TREE_MAP_SIZE) { 692 struct radix_tree_node *old = child; 693 offset = child->offset + 1; 694 child = child->parent; 695 WARN_ON_ONCE(!list_empty(&old->private_list)); 696 radix_tree_node_free(old); 697 if (old == entry_to_node(node)) 698 return; 699 } 700 } 701 } 702 703 static inline int insert_entries(struct radix_tree_node *node, 704 void __rcu **slot, void *item, bool replace) 705 { 706 if (*slot) 707 return -EEXIST; 708 rcu_assign_pointer(*slot, item); 709 if (node) { 710 node->count++; 711 if (xa_is_value(item)) 712 node->nr_values++; 713 } 714 return 1; 715 } 716 717 /** 718 * __radix_tree_insert - insert into a radix tree 719 * @root: radix tree root 720 * @index: index key 721 * @item: item to insert 722 * 723 * Insert an item into the radix tree at position @index. 724 */ 725 int radix_tree_insert(struct radix_tree_root *root, unsigned long index, 726 void *item) 727 { 728 struct radix_tree_node *node; 729 void __rcu **slot; 730 int error; 731 732 BUG_ON(radix_tree_is_internal_node(item)); 733 734 error = __radix_tree_create(root, index, &node, &slot); 735 if (error) 736 return error; 737 738 error = insert_entries(node, slot, item, false); 739 if (error < 0) 740 return error; 741 742 if (node) { 743 unsigned offset = get_slot_offset(node, slot); 744 BUG_ON(tag_get(node, 0, offset)); 745 BUG_ON(tag_get(node, 1, offset)); 746 BUG_ON(tag_get(node, 2, offset)); 747 } else { 748 BUG_ON(root_tags_get(root)); 749 } 750 751 return 0; 752 } 753 EXPORT_SYMBOL(radix_tree_insert); 754 755 /** 756 * __radix_tree_lookup - lookup an item in a radix tree 757 * @root: radix tree root 758 * @index: index key 759 * @nodep: returns node 760 * @slotp: returns slot 761 * 762 * Lookup and return the item at position @index in the radix 763 * tree @root. 764 * 765 * Until there is more than one item in the tree, no nodes are 766 * allocated and @root->xa_head is used as a direct slot instead of 767 * pointing to a node, in which case *@nodep will be NULL. 768 */ 769 void *__radix_tree_lookup(const struct radix_tree_root *root, 770 unsigned long index, struct radix_tree_node **nodep, 771 void __rcu ***slotp) 772 { 773 struct radix_tree_node *node, *parent; 774 unsigned long maxindex; 775 void __rcu **slot; 776 777 restart: 778 parent = NULL; 779 slot = (void __rcu **)&root->xa_head; 780 radix_tree_load_root(root, &node, &maxindex); 781 if (index > maxindex) 782 return NULL; 783 784 while (radix_tree_is_internal_node(node)) { 785 unsigned offset; 786 787 if (node == RADIX_TREE_RETRY) 788 goto restart; 789 parent = entry_to_node(node); 790 offset = radix_tree_descend(parent, &node, index); 791 slot = parent->slots + offset; 792 if (parent->shift == 0) 793 break; 794 } 795 796 if (nodep) 797 *nodep = parent; 798 if (slotp) 799 *slotp = slot; 800 return node; 801 } 802 803 /** 804 * radix_tree_lookup_slot - lookup a slot in a radix tree 805 * @root: radix tree root 806 * @index: index key 807 * 808 * Returns: the slot corresponding to the position @index in the 809 * radix tree @root. This is useful for update-if-exists operations. 810 * 811 * This function can be called under rcu_read_lock iff the slot is not 812 * modified by radix_tree_replace_slot, otherwise it must be called 813 * exclusive from other writers. Any dereference of the slot must be done 814 * using radix_tree_deref_slot. 815 */ 816 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 817 unsigned long index) 818 { 819 void __rcu **slot; 820 821 if (!__radix_tree_lookup(root, index, NULL, &slot)) 822 return NULL; 823 return slot; 824 } 825 EXPORT_SYMBOL(radix_tree_lookup_slot); 826 827 /** 828 * radix_tree_lookup - perform lookup operation on a radix tree 829 * @root: radix tree root 830 * @index: index key 831 * 832 * Lookup the item at the position @index in the radix tree @root. 833 * 834 * This function can be called under rcu_read_lock, however the caller 835 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 836 * them safely). No RCU barriers are required to access or modify the 837 * returned item, however. 838 */ 839 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 840 { 841 return __radix_tree_lookup(root, index, NULL, NULL); 842 } 843 EXPORT_SYMBOL(radix_tree_lookup); 844 845 static void replace_slot(void __rcu **slot, void *item, 846 struct radix_tree_node *node, int count, int values) 847 { 848 if (node && (count || values)) { 849 node->count += count; 850 node->nr_values += values; 851 } 852 853 rcu_assign_pointer(*slot, item); 854 } 855 856 static bool node_tag_get(const struct radix_tree_root *root, 857 const struct radix_tree_node *node, 858 unsigned int tag, unsigned int offset) 859 { 860 if (node) 861 return tag_get(node, tag, offset); 862 return root_tag_get(root, tag); 863 } 864 865 /* 866 * IDR users want to be able to store NULL in the tree, so if the slot isn't 867 * free, don't adjust the count, even if it's transitioning between NULL and 868 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 869 * have empty bits, but it only stores NULL in slots when they're being 870 * deleted. 871 */ 872 static int calculate_count(struct radix_tree_root *root, 873 struct radix_tree_node *node, void __rcu **slot, 874 void *item, void *old) 875 { 876 if (is_idr(root)) { 877 unsigned offset = get_slot_offset(node, slot); 878 bool free = node_tag_get(root, node, IDR_FREE, offset); 879 if (!free) 880 return 0; 881 if (!old) 882 return 1; 883 } 884 return !!item - !!old; 885 } 886 887 /** 888 * __radix_tree_replace - replace item in a slot 889 * @root: radix tree root 890 * @node: pointer to tree node 891 * @slot: pointer to slot in @node 892 * @item: new item to store in the slot. 893 * 894 * For use with __radix_tree_lookup(). Caller must hold tree write locked 895 * across slot lookup and replacement. 896 */ 897 void __radix_tree_replace(struct radix_tree_root *root, 898 struct radix_tree_node *node, 899 void __rcu **slot, void *item) 900 { 901 void *old = rcu_dereference_raw(*slot); 902 int values = !!xa_is_value(item) - !!xa_is_value(old); 903 int count = calculate_count(root, node, slot, item, old); 904 905 /* 906 * This function supports replacing value entries and 907 * deleting entries, but that needs accounting against the 908 * node unless the slot is root->xa_head. 909 */ 910 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 911 (count || values)); 912 replace_slot(slot, item, node, count, values); 913 914 if (!node) 915 return; 916 917 delete_node(root, node); 918 } 919 920 /** 921 * radix_tree_replace_slot - replace item in a slot 922 * @root: radix tree root 923 * @slot: pointer to slot 924 * @item: new item to store in the slot. 925 * 926 * For use with radix_tree_lookup_slot() and 927 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 928 * across slot lookup and replacement. 929 * 930 * NOTE: This cannot be used to switch between non-entries (empty slots), 931 * regular entries, and value entries, as that requires accounting 932 * inside the radix tree node. When switching from one type of entry or 933 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 934 * radix_tree_iter_replace(). 935 */ 936 void radix_tree_replace_slot(struct radix_tree_root *root, 937 void __rcu **slot, void *item) 938 { 939 __radix_tree_replace(root, NULL, slot, item); 940 } 941 EXPORT_SYMBOL(radix_tree_replace_slot); 942 943 /** 944 * radix_tree_iter_replace - replace item in a slot 945 * @root: radix tree root 946 * @slot: pointer to slot 947 * @item: new item to store in the slot. 948 * 949 * For use with radix_tree_for_each_slot(). 950 * Caller must hold tree write locked. 951 */ 952 void radix_tree_iter_replace(struct radix_tree_root *root, 953 const struct radix_tree_iter *iter, 954 void __rcu **slot, void *item) 955 { 956 __radix_tree_replace(root, iter->node, slot, item); 957 } 958 959 static void node_tag_set(struct radix_tree_root *root, 960 struct radix_tree_node *node, 961 unsigned int tag, unsigned int offset) 962 { 963 while (node) { 964 if (tag_get(node, tag, offset)) 965 return; 966 tag_set(node, tag, offset); 967 offset = node->offset; 968 node = node->parent; 969 } 970 971 if (!root_tag_get(root, tag)) 972 root_tag_set(root, tag); 973 } 974 975 /** 976 * radix_tree_tag_set - set a tag on a radix tree node 977 * @root: radix tree root 978 * @index: index key 979 * @tag: tag index 980 * 981 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 982 * corresponding to @index in the radix tree. From 983 * the root all the way down to the leaf node. 984 * 985 * Returns the address of the tagged item. Setting a tag on a not-present 986 * item is a bug. 987 */ 988 void *radix_tree_tag_set(struct radix_tree_root *root, 989 unsigned long index, unsigned int tag) 990 { 991 struct radix_tree_node *node, *parent; 992 unsigned long maxindex; 993 994 radix_tree_load_root(root, &node, &maxindex); 995 BUG_ON(index > maxindex); 996 997 while (radix_tree_is_internal_node(node)) { 998 unsigned offset; 999 1000 parent = entry_to_node(node); 1001 offset = radix_tree_descend(parent, &node, index); 1002 BUG_ON(!node); 1003 1004 if (!tag_get(parent, tag, offset)) 1005 tag_set(parent, tag, offset); 1006 } 1007 1008 /* set the root's tag bit */ 1009 if (!root_tag_get(root, tag)) 1010 root_tag_set(root, tag); 1011 1012 return node; 1013 } 1014 EXPORT_SYMBOL(radix_tree_tag_set); 1015 1016 static void node_tag_clear(struct radix_tree_root *root, 1017 struct radix_tree_node *node, 1018 unsigned int tag, unsigned int offset) 1019 { 1020 while (node) { 1021 if (!tag_get(node, tag, offset)) 1022 return; 1023 tag_clear(node, tag, offset); 1024 if (any_tag_set(node, tag)) 1025 return; 1026 1027 offset = node->offset; 1028 node = node->parent; 1029 } 1030 1031 /* clear the root's tag bit */ 1032 if (root_tag_get(root, tag)) 1033 root_tag_clear(root, tag); 1034 } 1035 1036 /** 1037 * radix_tree_tag_clear - clear a tag on a radix tree node 1038 * @root: radix tree root 1039 * @index: index key 1040 * @tag: tag index 1041 * 1042 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1043 * corresponding to @index in the radix tree. If this causes 1044 * the leaf node to have no tags set then clear the tag in the 1045 * next-to-leaf node, etc. 1046 * 1047 * Returns the address of the tagged item on success, else NULL. ie: 1048 * has the same return value and semantics as radix_tree_lookup(). 1049 */ 1050 void *radix_tree_tag_clear(struct radix_tree_root *root, 1051 unsigned long index, unsigned int tag) 1052 { 1053 struct radix_tree_node *node, *parent; 1054 unsigned long maxindex; 1055 int uninitialized_var(offset); 1056 1057 radix_tree_load_root(root, &node, &maxindex); 1058 if (index > maxindex) 1059 return NULL; 1060 1061 parent = NULL; 1062 1063 while (radix_tree_is_internal_node(node)) { 1064 parent = entry_to_node(node); 1065 offset = radix_tree_descend(parent, &node, index); 1066 } 1067 1068 if (node) 1069 node_tag_clear(root, parent, tag, offset); 1070 1071 return node; 1072 } 1073 EXPORT_SYMBOL(radix_tree_tag_clear); 1074 1075 /** 1076 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1077 * @root: radix tree root 1078 * @iter: iterator state 1079 * @tag: tag to clear 1080 */ 1081 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1082 const struct radix_tree_iter *iter, unsigned int tag) 1083 { 1084 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1085 } 1086 1087 /** 1088 * radix_tree_tag_get - get a tag on a radix tree node 1089 * @root: radix tree root 1090 * @index: index key 1091 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1092 * 1093 * Return values: 1094 * 1095 * 0: tag not present or not set 1096 * 1: tag set 1097 * 1098 * Note that the return value of this function may not be relied on, even if 1099 * the RCU lock is held, unless tag modification and node deletion are excluded 1100 * from concurrency. 1101 */ 1102 int radix_tree_tag_get(const struct radix_tree_root *root, 1103 unsigned long index, unsigned int tag) 1104 { 1105 struct radix_tree_node *node, *parent; 1106 unsigned long maxindex; 1107 1108 if (!root_tag_get(root, tag)) 1109 return 0; 1110 1111 radix_tree_load_root(root, &node, &maxindex); 1112 if (index > maxindex) 1113 return 0; 1114 1115 while (radix_tree_is_internal_node(node)) { 1116 unsigned offset; 1117 1118 parent = entry_to_node(node); 1119 offset = radix_tree_descend(parent, &node, index); 1120 1121 if (!tag_get(parent, tag, offset)) 1122 return 0; 1123 if (node == RADIX_TREE_RETRY) 1124 break; 1125 } 1126 1127 return 1; 1128 } 1129 EXPORT_SYMBOL(radix_tree_tag_get); 1130 1131 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1132 static void set_iter_tags(struct radix_tree_iter *iter, 1133 struct radix_tree_node *node, unsigned offset, 1134 unsigned tag) 1135 { 1136 unsigned tag_long = offset / BITS_PER_LONG; 1137 unsigned tag_bit = offset % BITS_PER_LONG; 1138 1139 if (!node) { 1140 iter->tags = 1; 1141 return; 1142 } 1143 1144 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1145 1146 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1147 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1148 /* Pick tags from next element */ 1149 if (tag_bit) 1150 iter->tags |= node->tags[tag][tag_long + 1] << 1151 (BITS_PER_LONG - tag_bit); 1152 /* Clip chunk size, here only BITS_PER_LONG tags */ 1153 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1154 } 1155 } 1156 1157 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1158 struct radix_tree_iter *iter) 1159 { 1160 slot++; 1161 iter->index = __radix_tree_iter_add(iter, 1); 1162 iter->next_index = iter->index; 1163 iter->tags = 0; 1164 return NULL; 1165 } 1166 EXPORT_SYMBOL(radix_tree_iter_resume); 1167 1168 /** 1169 * radix_tree_next_chunk - find next chunk of slots for iteration 1170 * 1171 * @root: radix tree root 1172 * @iter: iterator state 1173 * @flags: RADIX_TREE_ITER_* flags and tag index 1174 * Returns: pointer to chunk first slot, or NULL if iteration is over 1175 */ 1176 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1177 struct radix_tree_iter *iter, unsigned flags) 1178 { 1179 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1180 struct radix_tree_node *node, *child; 1181 unsigned long index, offset, maxindex; 1182 1183 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1184 return NULL; 1185 1186 /* 1187 * Catch next_index overflow after ~0UL. iter->index never overflows 1188 * during iterating; it can be zero only at the beginning. 1189 * And we cannot overflow iter->next_index in a single step, 1190 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1191 * 1192 * This condition also used by radix_tree_next_slot() to stop 1193 * contiguous iterating, and forbid switching to the next chunk. 1194 */ 1195 index = iter->next_index; 1196 if (!index && iter->index) 1197 return NULL; 1198 1199 restart: 1200 radix_tree_load_root(root, &child, &maxindex); 1201 if (index > maxindex) 1202 return NULL; 1203 if (!child) 1204 return NULL; 1205 1206 if (!radix_tree_is_internal_node(child)) { 1207 /* Single-slot tree */ 1208 iter->index = index; 1209 iter->next_index = maxindex + 1; 1210 iter->tags = 1; 1211 iter->node = NULL; 1212 return (void __rcu **)&root->xa_head; 1213 } 1214 1215 do { 1216 node = entry_to_node(child); 1217 offset = radix_tree_descend(node, &child, index); 1218 1219 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1220 !tag_get(node, tag, offset) : !child) { 1221 /* Hole detected */ 1222 if (flags & RADIX_TREE_ITER_CONTIG) 1223 return NULL; 1224 1225 if (flags & RADIX_TREE_ITER_TAGGED) 1226 offset = radix_tree_find_next_bit(node, tag, 1227 offset + 1); 1228 else 1229 while (++offset < RADIX_TREE_MAP_SIZE) { 1230 void *slot = rcu_dereference_raw( 1231 node->slots[offset]); 1232 if (slot) 1233 break; 1234 } 1235 index &= ~node_maxindex(node); 1236 index += offset << node->shift; 1237 /* Overflow after ~0UL */ 1238 if (!index) 1239 return NULL; 1240 if (offset == RADIX_TREE_MAP_SIZE) 1241 goto restart; 1242 child = rcu_dereference_raw(node->slots[offset]); 1243 } 1244 1245 if (!child) 1246 goto restart; 1247 if (child == RADIX_TREE_RETRY) 1248 break; 1249 } while (node->shift && radix_tree_is_internal_node(child)); 1250 1251 /* Update the iterator state */ 1252 iter->index = (index &~ node_maxindex(node)) | offset; 1253 iter->next_index = (index | node_maxindex(node)) + 1; 1254 iter->node = node; 1255 1256 if (flags & RADIX_TREE_ITER_TAGGED) 1257 set_iter_tags(iter, node, offset, tag); 1258 1259 return node->slots + offset; 1260 } 1261 EXPORT_SYMBOL(radix_tree_next_chunk); 1262 1263 /** 1264 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1265 * @root: radix tree root 1266 * @results: where the results of the lookup are placed 1267 * @first_index: start the lookup from this key 1268 * @max_items: place up to this many items at *results 1269 * 1270 * Performs an index-ascending scan of the tree for present items. Places 1271 * them at *@results and returns the number of items which were placed at 1272 * *@results. 1273 * 1274 * The implementation is naive. 1275 * 1276 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1277 * rcu_read_lock. In this case, rather than the returned results being 1278 * an atomic snapshot of the tree at a single point in time, the 1279 * semantics of an RCU protected gang lookup are as though multiple 1280 * radix_tree_lookups have been issued in individual locks, and results 1281 * stored in 'results'. 1282 */ 1283 unsigned int 1284 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1285 unsigned long first_index, unsigned int max_items) 1286 { 1287 struct radix_tree_iter iter; 1288 void __rcu **slot; 1289 unsigned int ret = 0; 1290 1291 if (unlikely(!max_items)) 1292 return 0; 1293 1294 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1295 results[ret] = rcu_dereference_raw(*slot); 1296 if (!results[ret]) 1297 continue; 1298 if (radix_tree_is_internal_node(results[ret])) { 1299 slot = radix_tree_iter_retry(&iter); 1300 continue; 1301 } 1302 if (++ret == max_items) 1303 break; 1304 } 1305 1306 return ret; 1307 } 1308 EXPORT_SYMBOL(radix_tree_gang_lookup); 1309 1310 /** 1311 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1312 * based on a tag 1313 * @root: radix tree root 1314 * @results: where the results of the lookup are placed 1315 * @first_index: start the lookup from this key 1316 * @max_items: place up to this many items at *results 1317 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1318 * 1319 * Performs an index-ascending scan of the tree for present items which 1320 * have the tag indexed by @tag set. Places the items at *@results and 1321 * returns the number of items which were placed at *@results. 1322 */ 1323 unsigned int 1324 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1325 unsigned long first_index, unsigned int max_items, 1326 unsigned int tag) 1327 { 1328 struct radix_tree_iter iter; 1329 void __rcu **slot; 1330 unsigned int ret = 0; 1331 1332 if (unlikely(!max_items)) 1333 return 0; 1334 1335 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1336 results[ret] = rcu_dereference_raw(*slot); 1337 if (!results[ret]) 1338 continue; 1339 if (radix_tree_is_internal_node(results[ret])) { 1340 slot = radix_tree_iter_retry(&iter); 1341 continue; 1342 } 1343 if (++ret == max_items) 1344 break; 1345 } 1346 1347 return ret; 1348 } 1349 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1350 1351 /** 1352 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1353 * radix tree based on a tag 1354 * @root: radix tree root 1355 * @results: where the results of the lookup are placed 1356 * @first_index: start the lookup from this key 1357 * @max_items: place up to this many items at *results 1358 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1359 * 1360 * Performs an index-ascending scan of the tree for present items which 1361 * have the tag indexed by @tag set. Places the slots at *@results and 1362 * returns the number of slots which were placed at *@results. 1363 */ 1364 unsigned int 1365 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1366 void __rcu ***results, unsigned long first_index, 1367 unsigned int max_items, unsigned int tag) 1368 { 1369 struct radix_tree_iter iter; 1370 void __rcu **slot; 1371 unsigned int ret = 0; 1372 1373 if (unlikely(!max_items)) 1374 return 0; 1375 1376 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1377 results[ret] = slot; 1378 if (++ret == max_items) 1379 break; 1380 } 1381 1382 return ret; 1383 } 1384 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1385 1386 static bool __radix_tree_delete(struct radix_tree_root *root, 1387 struct radix_tree_node *node, void __rcu **slot) 1388 { 1389 void *old = rcu_dereference_raw(*slot); 1390 int values = xa_is_value(old) ? -1 : 0; 1391 unsigned offset = get_slot_offset(node, slot); 1392 int tag; 1393 1394 if (is_idr(root)) 1395 node_tag_set(root, node, IDR_FREE, offset); 1396 else 1397 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1398 node_tag_clear(root, node, tag, offset); 1399 1400 replace_slot(slot, NULL, node, -1, values); 1401 return node && delete_node(root, node); 1402 } 1403 1404 /** 1405 * radix_tree_iter_delete - delete the entry at this iterator position 1406 * @root: radix tree root 1407 * @iter: iterator state 1408 * @slot: pointer to slot 1409 * 1410 * Delete the entry at the position currently pointed to by the iterator. 1411 * This may result in the current node being freed; if it is, the iterator 1412 * is advanced so that it will not reference the freed memory. This 1413 * function may be called without any locking if there are no other threads 1414 * which can access this tree. 1415 */ 1416 void radix_tree_iter_delete(struct radix_tree_root *root, 1417 struct radix_tree_iter *iter, void __rcu **slot) 1418 { 1419 if (__radix_tree_delete(root, iter->node, slot)) 1420 iter->index = iter->next_index; 1421 } 1422 EXPORT_SYMBOL(radix_tree_iter_delete); 1423 1424 /** 1425 * radix_tree_delete_item - delete an item from a radix tree 1426 * @root: radix tree root 1427 * @index: index key 1428 * @item: expected item 1429 * 1430 * Remove @item at @index from the radix tree rooted at @root. 1431 * 1432 * Return: the deleted entry, or %NULL if it was not present 1433 * or the entry at the given @index was not @item. 1434 */ 1435 void *radix_tree_delete_item(struct radix_tree_root *root, 1436 unsigned long index, void *item) 1437 { 1438 struct radix_tree_node *node = NULL; 1439 void __rcu **slot = NULL; 1440 void *entry; 1441 1442 entry = __radix_tree_lookup(root, index, &node, &slot); 1443 if (!slot) 1444 return NULL; 1445 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 1446 get_slot_offset(node, slot)))) 1447 return NULL; 1448 1449 if (item && entry != item) 1450 return NULL; 1451 1452 __radix_tree_delete(root, node, slot); 1453 1454 return entry; 1455 } 1456 EXPORT_SYMBOL(radix_tree_delete_item); 1457 1458 /** 1459 * radix_tree_delete - delete an entry from a radix tree 1460 * @root: radix tree root 1461 * @index: index key 1462 * 1463 * Remove the entry at @index from the radix tree rooted at @root. 1464 * 1465 * Return: The deleted entry, or %NULL if it was not present. 1466 */ 1467 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1468 { 1469 return radix_tree_delete_item(root, index, NULL); 1470 } 1471 EXPORT_SYMBOL(radix_tree_delete); 1472 1473 /** 1474 * radix_tree_tagged - test whether any items in the tree are tagged 1475 * @root: radix tree root 1476 * @tag: tag to test 1477 */ 1478 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 1479 { 1480 return root_tag_get(root, tag); 1481 } 1482 EXPORT_SYMBOL(radix_tree_tagged); 1483 1484 /** 1485 * idr_preload - preload for idr_alloc() 1486 * @gfp_mask: allocation mask to use for preloading 1487 * 1488 * Preallocate memory to use for the next call to idr_alloc(). This function 1489 * returns with preemption disabled. It will be enabled by idr_preload_end(). 1490 */ 1491 void idr_preload(gfp_t gfp_mask) 1492 { 1493 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1494 preempt_disable(); 1495 } 1496 EXPORT_SYMBOL(idr_preload); 1497 1498 void __rcu **idr_get_free(struct radix_tree_root *root, 1499 struct radix_tree_iter *iter, gfp_t gfp, 1500 unsigned long max) 1501 { 1502 struct radix_tree_node *node = NULL, *child; 1503 void __rcu **slot = (void __rcu **)&root->xa_head; 1504 unsigned long maxindex, start = iter->next_index; 1505 unsigned int shift, offset = 0; 1506 1507 grow: 1508 shift = radix_tree_load_root(root, &child, &maxindex); 1509 if (!radix_tree_tagged(root, IDR_FREE)) 1510 start = max(start, maxindex + 1); 1511 if (start > max) 1512 return ERR_PTR(-ENOSPC); 1513 1514 if (start > maxindex) { 1515 int error = radix_tree_extend(root, gfp, start, shift); 1516 if (error < 0) 1517 return ERR_PTR(error); 1518 shift = error; 1519 child = rcu_dereference_raw(root->xa_head); 1520 } 1521 if (start == 0 && shift == 0) 1522 shift = RADIX_TREE_MAP_SHIFT; 1523 1524 while (shift) { 1525 shift -= RADIX_TREE_MAP_SHIFT; 1526 if (child == NULL) { 1527 /* Have to add a child node. */ 1528 child = radix_tree_node_alloc(gfp, node, root, shift, 1529 offset, 0, 0); 1530 if (!child) 1531 return ERR_PTR(-ENOMEM); 1532 all_tag_set(child, IDR_FREE); 1533 rcu_assign_pointer(*slot, node_to_entry(child)); 1534 if (node) 1535 node->count++; 1536 } else if (!radix_tree_is_internal_node(child)) 1537 break; 1538 1539 node = entry_to_node(child); 1540 offset = radix_tree_descend(node, &child, start); 1541 if (!tag_get(node, IDR_FREE, offset)) { 1542 offset = radix_tree_find_next_bit(node, IDR_FREE, 1543 offset + 1); 1544 start = next_index(start, node, offset); 1545 if (start > max) 1546 return ERR_PTR(-ENOSPC); 1547 while (offset == RADIX_TREE_MAP_SIZE) { 1548 offset = node->offset + 1; 1549 node = node->parent; 1550 if (!node) 1551 goto grow; 1552 shift = node->shift; 1553 } 1554 child = rcu_dereference_raw(node->slots[offset]); 1555 } 1556 slot = &node->slots[offset]; 1557 } 1558 1559 iter->index = start; 1560 if (node) 1561 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 1562 else 1563 iter->next_index = 1; 1564 iter->node = node; 1565 set_iter_tags(iter, node, offset, IDR_FREE); 1566 1567 return slot; 1568 } 1569 1570 /** 1571 * idr_destroy - release all internal memory from an IDR 1572 * @idr: idr handle 1573 * 1574 * After this function is called, the IDR is empty, and may be reused or 1575 * the data structure containing it may be freed. 1576 * 1577 * A typical clean-up sequence for objects stored in an idr tree will use 1578 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 1579 * free the memory used to keep track of those objects. 1580 */ 1581 void idr_destroy(struct idr *idr) 1582 { 1583 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 1584 if (radix_tree_is_internal_node(node)) 1585 radix_tree_free_nodes(node); 1586 idr->idr_rt.xa_head = NULL; 1587 root_tag_set(&idr->idr_rt, IDR_FREE); 1588 } 1589 EXPORT_SYMBOL(idr_destroy); 1590 1591 static void 1592 radix_tree_node_ctor(void *arg) 1593 { 1594 struct radix_tree_node *node = arg; 1595 1596 memset(node, 0, sizeof(*node)); 1597 INIT_LIST_HEAD(&node->private_list); 1598 } 1599 1600 static int radix_tree_cpu_dead(unsigned int cpu) 1601 { 1602 struct radix_tree_preload *rtp; 1603 struct radix_tree_node *node; 1604 1605 /* Free per-cpu pool of preloaded nodes */ 1606 rtp = &per_cpu(radix_tree_preloads, cpu); 1607 while (rtp->nr) { 1608 node = rtp->nodes; 1609 rtp->nodes = node->parent; 1610 kmem_cache_free(radix_tree_node_cachep, node); 1611 rtp->nr--; 1612 } 1613 return 0; 1614 } 1615 1616 void __init radix_tree_init(void) 1617 { 1618 int ret; 1619 1620 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1621 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 1622 BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 1623 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1624 sizeof(struct radix_tree_node), 0, 1625 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1626 radix_tree_node_ctor); 1627 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1628 NULL, radix_tree_cpu_dead); 1629 WARN_ON(ret < 0); 1630 } 1631