1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * Copyright (C) 2012 Konstantin Khlebnikov 7 * Copyright (C) 2016 Intel, Matthew Wilcox 8 * Copyright (C) 2016 Intel, Ross Zwisler 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License as 12 * published by the Free Software Foundation; either version 2, or (at 13 * your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 */ 24 25 #include <linux/bitmap.h> 26 #include <linux/bitops.h> 27 #include <linux/bug.h> 28 #include <linux/cpu.h> 29 #include <linux/errno.h> 30 #include <linux/export.h> 31 #include <linux/idr.h> 32 #include <linux/init.h> 33 #include <linux/kernel.h> 34 #include <linux/kmemleak.h> 35 #include <linux/percpu.h> 36 #include <linux/preempt.h> /* in_interrupt() */ 37 #include <linux/radix-tree.h> 38 #include <linux/rcupdate.h> 39 #include <linux/slab.h> 40 #include <linux/string.h> 41 42 43 /* Number of nodes in fully populated tree of given height */ 44 static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; 45 46 /* 47 * Radix tree node cache. 48 */ 49 static struct kmem_cache *radix_tree_node_cachep; 50 51 /* 52 * The radix tree is variable-height, so an insert operation not only has 53 * to build the branch to its corresponding item, it also has to build the 54 * branch to existing items if the size has to be increased (by 55 * radix_tree_extend). 56 * 57 * The worst case is a zero height tree with just a single item at index 0, 58 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 59 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 60 * Hence: 61 */ 62 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 63 64 /* 65 * The IDR does not have to be as high as the radix tree since it uses 66 * signed integers, not unsigned longs. 67 */ 68 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 69 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 70 RADIX_TREE_MAP_SHIFT)) 71 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 72 73 /* 74 * The IDA is even shorter since it uses a bitmap at the last level. 75 */ 76 #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 77 #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 78 RADIX_TREE_MAP_SHIFT)) 79 #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 80 81 /* 82 * Per-cpu pool of preloaded nodes 83 */ 84 struct radix_tree_preload { 85 unsigned nr; 86 /* nodes->parent points to next preallocated node */ 87 struct radix_tree_node *nodes; 88 }; 89 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 90 91 static inline struct radix_tree_node *entry_to_node(void *ptr) 92 { 93 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 94 } 95 96 static inline void *node_to_entry(void *ptr) 97 { 98 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 99 } 100 101 #define RADIX_TREE_RETRY node_to_entry(NULL) 102 103 #ifdef CONFIG_RADIX_TREE_MULTIORDER 104 /* Sibling slots point directly to another slot in the same node */ 105 static inline 106 bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 107 { 108 void __rcu **ptr = node; 109 return (parent->slots <= ptr) && 110 (ptr < parent->slots + RADIX_TREE_MAP_SIZE); 111 } 112 #else 113 static inline 114 bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 115 { 116 return false; 117 } 118 #endif 119 120 static inline unsigned long 121 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 122 { 123 return parent ? slot - parent->slots : 0; 124 } 125 126 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 127 struct radix_tree_node **nodep, unsigned long index) 128 { 129 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 130 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 131 132 #ifdef CONFIG_RADIX_TREE_MULTIORDER 133 if (radix_tree_is_internal_node(entry)) { 134 if (is_sibling_entry(parent, entry)) { 135 void __rcu **sibentry; 136 sibentry = (void __rcu **) entry_to_node(entry); 137 offset = get_slot_offset(parent, sibentry); 138 entry = rcu_dereference_raw(*sibentry); 139 } 140 } 141 #endif 142 143 *nodep = (void *)entry; 144 return offset; 145 } 146 147 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 148 { 149 return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 150 } 151 152 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 153 int offset) 154 { 155 __set_bit(offset, node->tags[tag]); 156 } 157 158 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 159 int offset) 160 { 161 __clear_bit(offset, node->tags[tag]); 162 } 163 164 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 165 int offset) 166 { 167 return test_bit(offset, node->tags[tag]); 168 } 169 170 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 171 { 172 root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 173 } 174 175 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 176 { 177 root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 178 } 179 180 static inline void root_tag_clear_all(struct radix_tree_root *root) 181 { 182 root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1; 183 } 184 185 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 186 { 187 return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT)); 188 } 189 190 static inline unsigned root_tags_get(const struct radix_tree_root *root) 191 { 192 return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT; 193 } 194 195 static inline bool is_idr(const struct radix_tree_root *root) 196 { 197 return !!(root->gfp_mask & ROOT_IS_IDR); 198 } 199 200 /* 201 * Returns 1 if any slot in the node has this tag set. 202 * Otherwise returns 0. 203 */ 204 static inline int any_tag_set(const struct radix_tree_node *node, 205 unsigned int tag) 206 { 207 unsigned idx; 208 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 209 if (node->tags[tag][idx]) 210 return 1; 211 } 212 return 0; 213 } 214 215 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 216 { 217 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 218 } 219 220 /** 221 * radix_tree_find_next_bit - find the next set bit in a memory region 222 * 223 * @addr: The address to base the search on 224 * @size: The bitmap size in bits 225 * @offset: The bitnumber to start searching at 226 * 227 * Unrollable variant of find_next_bit() for constant size arrays. 228 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 229 * Returns next bit offset, or size if nothing found. 230 */ 231 static __always_inline unsigned long 232 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 233 unsigned long offset) 234 { 235 const unsigned long *addr = node->tags[tag]; 236 237 if (offset < RADIX_TREE_MAP_SIZE) { 238 unsigned long tmp; 239 240 addr += offset / BITS_PER_LONG; 241 tmp = *addr >> (offset % BITS_PER_LONG); 242 if (tmp) 243 return __ffs(tmp) + offset; 244 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 245 while (offset < RADIX_TREE_MAP_SIZE) { 246 tmp = *++addr; 247 if (tmp) 248 return __ffs(tmp) + offset; 249 offset += BITS_PER_LONG; 250 } 251 } 252 return RADIX_TREE_MAP_SIZE; 253 } 254 255 static unsigned int iter_offset(const struct radix_tree_iter *iter) 256 { 257 return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; 258 } 259 260 /* 261 * The maximum index which can be stored in a radix tree 262 */ 263 static inline unsigned long shift_maxindex(unsigned int shift) 264 { 265 return (RADIX_TREE_MAP_SIZE << shift) - 1; 266 } 267 268 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 269 { 270 return shift_maxindex(node->shift); 271 } 272 273 static unsigned long next_index(unsigned long index, 274 const struct radix_tree_node *node, 275 unsigned long offset) 276 { 277 return (index & ~node_maxindex(node)) + (offset << node->shift); 278 } 279 280 #ifndef __KERNEL__ 281 static void dump_node(struct radix_tree_node *node, unsigned long index) 282 { 283 unsigned long i; 284 285 pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", 286 node, node->offset, index, index | node_maxindex(node), 287 node->parent, 288 node->tags[0][0], node->tags[1][0], node->tags[2][0], 289 node->shift, node->count, node->exceptional); 290 291 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 292 unsigned long first = index | (i << node->shift); 293 unsigned long last = first | ((1UL << node->shift) - 1); 294 void *entry = node->slots[i]; 295 if (!entry) 296 continue; 297 if (entry == RADIX_TREE_RETRY) { 298 pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n", 299 i, first, last, node); 300 } else if (!radix_tree_is_internal_node(entry)) { 301 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", 302 entry, i, first, last, node); 303 } else if (is_sibling_entry(node, entry)) { 304 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", 305 entry, i, first, last, node, 306 *(void **)entry_to_node(entry)); 307 } else { 308 dump_node(entry_to_node(entry), first); 309 } 310 } 311 } 312 313 /* For debug */ 314 static void radix_tree_dump(struct radix_tree_root *root) 315 { 316 pr_debug("radix root: %p rnode %p tags %x\n", 317 root, root->rnode, 318 root->gfp_mask >> ROOT_TAG_SHIFT); 319 if (!radix_tree_is_internal_node(root->rnode)) 320 return; 321 dump_node(entry_to_node(root->rnode), 0); 322 } 323 324 static void dump_ida_node(void *entry, unsigned long index) 325 { 326 unsigned long i; 327 328 if (!entry) 329 return; 330 331 if (radix_tree_is_internal_node(entry)) { 332 struct radix_tree_node *node = entry_to_node(entry); 333 334 pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n", 335 node, node->offset, index * IDA_BITMAP_BITS, 336 ((index | node_maxindex(node)) + 1) * 337 IDA_BITMAP_BITS - 1, 338 node->parent, node->tags[0][0], node->shift, 339 node->count); 340 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) 341 dump_ida_node(node->slots[i], 342 index | (i << node->shift)); 343 } else if (radix_tree_exceptional_entry(entry)) { 344 pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", 345 entry, (int)(index & RADIX_TREE_MAP_MASK), 346 index * IDA_BITMAP_BITS, 347 index * IDA_BITMAP_BITS + BITS_PER_LONG - 348 RADIX_TREE_EXCEPTIONAL_SHIFT, 349 (unsigned long)entry >> 350 RADIX_TREE_EXCEPTIONAL_SHIFT); 351 } else { 352 struct ida_bitmap *bitmap = entry; 353 354 pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap, 355 (int)(index & RADIX_TREE_MAP_MASK), 356 index * IDA_BITMAP_BITS, 357 (index + 1) * IDA_BITMAP_BITS - 1); 358 for (i = 0; i < IDA_BITMAP_LONGS; i++) 359 pr_cont(" %lx", bitmap->bitmap[i]); 360 pr_cont("\n"); 361 } 362 } 363 364 static void ida_dump(struct ida *ida) 365 { 366 struct radix_tree_root *root = &ida->ida_rt; 367 pr_debug("ida: %p node %p free %d\n", ida, root->rnode, 368 root->gfp_mask >> ROOT_TAG_SHIFT); 369 dump_ida_node(root->rnode, 0); 370 } 371 #endif 372 373 /* 374 * This assumes that the caller has performed appropriate preallocation, and 375 * that the caller has pinned this thread of control to the current CPU. 376 */ 377 static struct radix_tree_node * 378 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 379 struct radix_tree_root *root, 380 unsigned int shift, unsigned int offset, 381 unsigned int count, unsigned int exceptional) 382 { 383 struct radix_tree_node *ret = NULL; 384 385 /* 386 * Preload code isn't irq safe and it doesn't make sense to use 387 * preloading during an interrupt anyway as all the allocations have 388 * to be atomic. So just do normal allocation when in interrupt. 389 */ 390 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 391 struct radix_tree_preload *rtp; 392 393 /* 394 * Even if the caller has preloaded, try to allocate from the 395 * cache first for the new node to get accounted to the memory 396 * cgroup. 397 */ 398 ret = kmem_cache_alloc(radix_tree_node_cachep, 399 gfp_mask | __GFP_NOWARN); 400 if (ret) 401 goto out; 402 403 /* 404 * Provided the caller has preloaded here, we will always 405 * succeed in getting a node here (and never reach 406 * kmem_cache_alloc) 407 */ 408 rtp = this_cpu_ptr(&radix_tree_preloads); 409 if (rtp->nr) { 410 ret = rtp->nodes; 411 rtp->nodes = ret->parent; 412 rtp->nr--; 413 } 414 /* 415 * Update the allocation stack trace as this is more useful 416 * for debugging. 417 */ 418 kmemleak_update_trace(ret); 419 goto out; 420 } 421 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 422 out: 423 BUG_ON(radix_tree_is_internal_node(ret)); 424 if (ret) { 425 ret->shift = shift; 426 ret->offset = offset; 427 ret->count = count; 428 ret->exceptional = exceptional; 429 ret->parent = parent; 430 ret->root = root; 431 } 432 return ret; 433 } 434 435 static void radix_tree_node_rcu_free(struct rcu_head *head) 436 { 437 struct radix_tree_node *node = 438 container_of(head, struct radix_tree_node, rcu_head); 439 440 /* 441 * Must only free zeroed nodes into the slab. We can be left with 442 * non-NULL entries by radix_tree_free_nodes, so clear the entries 443 * and tags here. 444 */ 445 memset(node->slots, 0, sizeof(node->slots)); 446 memset(node->tags, 0, sizeof(node->tags)); 447 INIT_LIST_HEAD(&node->private_list); 448 449 kmem_cache_free(radix_tree_node_cachep, node); 450 } 451 452 static inline void 453 radix_tree_node_free(struct radix_tree_node *node) 454 { 455 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 456 } 457 458 /* 459 * Load up this CPU's radix_tree_node buffer with sufficient objects to 460 * ensure that the addition of a single element in the tree cannot fail. On 461 * success, return zero, with preemption disabled. On error, return -ENOMEM 462 * with preemption not disabled. 463 * 464 * To make use of this facility, the radix tree must be initialised without 465 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 466 */ 467 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 468 { 469 struct radix_tree_preload *rtp; 470 struct radix_tree_node *node; 471 int ret = -ENOMEM; 472 473 /* 474 * Nodes preloaded by one cgroup can be be used by another cgroup, so 475 * they should never be accounted to any particular memory cgroup. 476 */ 477 gfp_mask &= ~__GFP_ACCOUNT; 478 479 preempt_disable(); 480 rtp = this_cpu_ptr(&radix_tree_preloads); 481 while (rtp->nr < nr) { 482 preempt_enable(); 483 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 484 if (node == NULL) 485 goto out; 486 preempt_disable(); 487 rtp = this_cpu_ptr(&radix_tree_preloads); 488 if (rtp->nr < nr) { 489 node->parent = rtp->nodes; 490 rtp->nodes = node; 491 rtp->nr++; 492 } else { 493 kmem_cache_free(radix_tree_node_cachep, node); 494 } 495 } 496 ret = 0; 497 out: 498 return ret; 499 } 500 501 /* 502 * Load up this CPU's radix_tree_node buffer with sufficient objects to 503 * ensure that the addition of a single element in the tree cannot fail. On 504 * success, return zero, with preemption disabled. On error, return -ENOMEM 505 * with preemption not disabled. 506 * 507 * To make use of this facility, the radix tree must be initialised without 508 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 509 */ 510 int radix_tree_preload(gfp_t gfp_mask) 511 { 512 /* Warn on non-sensical use... */ 513 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 514 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 515 } 516 EXPORT_SYMBOL(radix_tree_preload); 517 518 /* 519 * The same as above function, except we don't guarantee preloading happens. 520 * We do it, if we decide it helps. On success, return zero with preemption 521 * disabled. On error, return -ENOMEM with preemption not disabled. 522 */ 523 int radix_tree_maybe_preload(gfp_t gfp_mask) 524 { 525 if (gfpflags_allow_blocking(gfp_mask)) 526 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 527 /* Preloading doesn't help anything with this gfp mask, skip it */ 528 preempt_disable(); 529 return 0; 530 } 531 EXPORT_SYMBOL(radix_tree_maybe_preload); 532 533 #ifdef CONFIG_RADIX_TREE_MULTIORDER 534 /* 535 * Preload with enough objects to ensure that we can split a single entry 536 * of order @old_order into many entries of size @new_order 537 */ 538 int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, 539 gfp_t gfp_mask) 540 { 541 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); 542 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - 543 (new_order / RADIX_TREE_MAP_SHIFT); 544 unsigned nr = 0; 545 546 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 547 BUG_ON(new_order >= old_order); 548 549 while (layers--) 550 nr = nr * RADIX_TREE_MAP_SIZE + 1; 551 return __radix_tree_preload(gfp_mask, top * nr); 552 } 553 #endif 554 555 /* 556 * The same as function above, but preload number of nodes required to insert 557 * (1 << order) continuous naturally-aligned elements. 558 */ 559 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) 560 { 561 unsigned long nr_subtrees; 562 int nr_nodes, subtree_height; 563 564 /* Preloading doesn't help anything with this gfp mask, skip it */ 565 if (!gfpflags_allow_blocking(gfp_mask)) { 566 preempt_disable(); 567 return 0; 568 } 569 570 /* 571 * Calculate number and height of fully populated subtrees it takes to 572 * store (1 << order) elements. 573 */ 574 nr_subtrees = 1 << order; 575 for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; 576 subtree_height++) 577 nr_subtrees >>= RADIX_TREE_MAP_SHIFT; 578 579 /* 580 * The worst case is zero height tree with a single item at index 0 and 581 * then inserting items starting at ULONG_MAX - (1 << order). 582 * 583 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to 584 * 0-index item. 585 */ 586 nr_nodes = RADIX_TREE_MAX_PATH; 587 588 /* Plus branch to fully populated subtrees. */ 589 nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; 590 591 /* Root node is shared. */ 592 nr_nodes--; 593 594 /* Plus nodes required to build subtrees. */ 595 nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; 596 597 return __radix_tree_preload(gfp_mask, nr_nodes); 598 } 599 600 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 601 struct radix_tree_node **nodep, unsigned long *maxindex) 602 { 603 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 604 605 *nodep = node; 606 607 if (likely(radix_tree_is_internal_node(node))) { 608 node = entry_to_node(node); 609 *maxindex = node_maxindex(node); 610 return node->shift + RADIX_TREE_MAP_SHIFT; 611 } 612 613 *maxindex = 0; 614 return 0; 615 } 616 617 /* 618 * Extend a radix tree so it can store key @index. 619 */ 620 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 621 unsigned long index, unsigned int shift) 622 { 623 void *entry; 624 unsigned int maxshift; 625 int tag; 626 627 /* Figure out what the shift should be. */ 628 maxshift = shift; 629 while (index > shift_maxindex(maxshift)) 630 maxshift += RADIX_TREE_MAP_SHIFT; 631 632 entry = rcu_dereference_raw(root->rnode); 633 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 634 goto out; 635 636 do { 637 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 638 root, shift, 0, 1, 0); 639 if (!node) 640 return -ENOMEM; 641 642 if (is_idr(root)) { 643 all_tag_set(node, IDR_FREE); 644 if (!root_tag_get(root, IDR_FREE)) { 645 tag_clear(node, IDR_FREE, 0); 646 root_tag_set(root, IDR_FREE); 647 } 648 } else { 649 /* Propagate the aggregated tag info to the new child */ 650 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 651 if (root_tag_get(root, tag)) 652 tag_set(node, tag, 0); 653 } 654 } 655 656 BUG_ON(shift > BITS_PER_LONG); 657 if (radix_tree_is_internal_node(entry)) { 658 entry_to_node(entry)->parent = node; 659 } else if (radix_tree_exceptional_entry(entry)) { 660 /* Moving an exceptional root->rnode to a node */ 661 node->exceptional = 1; 662 } 663 /* 664 * entry was already in the radix tree, so we do not need 665 * rcu_assign_pointer here 666 */ 667 node->slots[0] = (void __rcu *)entry; 668 entry = node_to_entry(node); 669 rcu_assign_pointer(root->rnode, entry); 670 shift += RADIX_TREE_MAP_SHIFT; 671 } while (shift <= maxshift); 672 out: 673 return maxshift + RADIX_TREE_MAP_SHIFT; 674 } 675 676 /** 677 * radix_tree_shrink - shrink radix tree to minimum height 678 * @root radix tree root 679 */ 680 static inline bool radix_tree_shrink(struct radix_tree_root *root, 681 radix_tree_update_node_t update_node) 682 { 683 bool shrunk = false; 684 685 for (;;) { 686 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 687 struct radix_tree_node *child; 688 689 if (!radix_tree_is_internal_node(node)) 690 break; 691 node = entry_to_node(node); 692 693 /* 694 * The candidate node has more than one child, or its child 695 * is not at the leftmost slot, or the child is a multiorder 696 * entry, we cannot shrink. 697 */ 698 if (node->count != 1) 699 break; 700 child = rcu_dereference_raw(node->slots[0]); 701 if (!child) 702 break; 703 if (!radix_tree_is_internal_node(child) && node->shift) 704 break; 705 706 if (radix_tree_is_internal_node(child)) 707 entry_to_node(child)->parent = NULL; 708 709 /* 710 * We don't need rcu_assign_pointer(), since we are simply 711 * moving the node from one part of the tree to another: if it 712 * was safe to dereference the old pointer to it 713 * (node->slots[0]), it will be safe to dereference the new 714 * one (root->rnode) as far as dependent read barriers go. 715 */ 716 root->rnode = (void __rcu *)child; 717 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 718 root_tag_clear(root, IDR_FREE); 719 720 /* 721 * We have a dilemma here. The node's slot[0] must not be 722 * NULLed in case there are concurrent lookups expecting to 723 * find the item. However if this was a bottom-level node, 724 * then it may be subject to the slot pointer being visible 725 * to callers dereferencing it. If item corresponding to 726 * slot[0] is subsequently deleted, these callers would expect 727 * their slot to become empty sooner or later. 728 * 729 * For example, lockless pagecache will look up a slot, deref 730 * the page pointer, and if the page has 0 refcount it means it 731 * was concurrently deleted from pagecache so try the deref 732 * again. Fortunately there is already a requirement for logic 733 * to retry the entire slot lookup -- the indirect pointer 734 * problem (replacing direct root node with an indirect pointer 735 * also results in a stale slot). So tag the slot as indirect 736 * to force callers to retry. 737 */ 738 node->count = 0; 739 if (!radix_tree_is_internal_node(child)) { 740 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 741 if (update_node) 742 update_node(node); 743 } 744 745 WARN_ON_ONCE(!list_empty(&node->private_list)); 746 radix_tree_node_free(node); 747 shrunk = true; 748 } 749 750 return shrunk; 751 } 752 753 static bool delete_node(struct radix_tree_root *root, 754 struct radix_tree_node *node, 755 radix_tree_update_node_t update_node) 756 { 757 bool deleted = false; 758 759 do { 760 struct radix_tree_node *parent; 761 762 if (node->count) { 763 if (node_to_entry(node) == 764 rcu_dereference_raw(root->rnode)) 765 deleted |= radix_tree_shrink(root, 766 update_node); 767 return deleted; 768 } 769 770 parent = node->parent; 771 if (parent) { 772 parent->slots[node->offset] = NULL; 773 parent->count--; 774 } else { 775 /* 776 * Shouldn't the tags already have all been cleared 777 * by the caller? 778 */ 779 if (!is_idr(root)) 780 root_tag_clear_all(root); 781 root->rnode = NULL; 782 } 783 784 WARN_ON_ONCE(!list_empty(&node->private_list)); 785 radix_tree_node_free(node); 786 deleted = true; 787 788 node = parent; 789 } while (node); 790 791 return deleted; 792 } 793 794 /** 795 * __radix_tree_create - create a slot in a radix tree 796 * @root: radix tree root 797 * @index: index key 798 * @order: index occupies 2^order aligned slots 799 * @nodep: returns node 800 * @slotp: returns slot 801 * 802 * Create, if necessary, and return the node and slot for an item 803 * at position @index in the radix tree @root. 804 * 805 * Until there is more than one item in the tree, no nodes are 806 * allocated and @root->rnode is used as a direct slot instead of 807 * pointing to a node, in which case *@nodep will be NULL. 808 * 809 * Returns -ENOMEM, or 0 for success. 810 */ 811 int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 812 unsigned order, struct radix_tree_node **nodep, 813 void __rcu ***slotp) 814 { 815 struct radix_tree_node *node = NULL, *child; 816 void __rcu **slot = (void __rcu **)&root->rnode; 817 unsigned long maxindex; 818 unsigned int shift, offset = 0; 819 unsigned long max = index | ((1UL << order) - 1); 820 gfp_t gfp = root_gfp_mask(root); 821 822 shift = radix_tree_load_root(root, &child, &maxindex); 823 824 /* Make sure the tree is high enough. */ 825 if (order > 0 && max == ((1UL << order) - 1)) 826 max++; 827 if (max > maxindex) { 828 int error = radix_tree_extend(root, gfp, max, shift); 829 if (error < 0) 830 return error; 831 shift = error; 832 child = rcu_dereference_raw(root->rnode); 833 } 834 835 while (shift > order) { 836 shift -= RADIX_TREE_MAP_SHIFT; 837 if (child == NULL) { 838 /* Have to add a child node. */ 839 child = radix_tree_node_alloc(gfp, node, root, shift, 840 offset, 0, 0); 841 if (!child) 842 return -ENOMEM; 843 rcu_assign_pointer(*slot, node_to_entry(child)); 844 if (node) 845 node->count++; 846 } else if (!radix_tree_is_internal_node(child)) 847 break; 848 849 /* Go a level down */ 850 node = entry_to_node(child); 851 offset = radix_tree_descend(node, &child, index); 852 slot = &node->slots[offset]; 853 } 854 855 if (nodep) 856 *nodep = node; 857 if (slotp) 858 *slotp = slot; 859 return 0; 860 } 861 862 /* 863 * Free any nodes below this node. The tree is presumed to not need 864 * shrinking, and any user data in the tree is presumed to not need a 865 * destructor called on it. If we need to add a destructor, we can 866 * add that functionality later. Note that we may not clear tags or 867 * slots from the tree as an RCU walker may still have a pointer into 868 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 869 * but we'll still have to clear those in rcu_free. 870 */ 871 static void radix_tree_free_nodes(struct radix_tree_node *node) 872 { 873 unsigned offset = 0; 874 struct radix_tree_node *child = entry_to_node(node); 875 876 for (;;) { 877 void *entry = rcu_dereference_raw(child->slots[offset]); 878 if (radix_tree_is_internal_node(entry) && 879 !is_sibling_entry(child, entry)) { 880 child = entry_to_node(entry); 881 offset = 0; 882 continue; 883 } 884 offset++; 885 while (offset == RADIX_TREE_MAP_SIZE) { 886 struct radix_tree_node *old = child; 887 offset = child->offset + 1; 888 child = child->parent; 889 WARN_ON_ONCE(!list_empty(&old->private_list)); 890 radix_tree_node_free(old); 891 if (old == entry_to_node(node)) 892 return; 893 } 894 } 895 } 896 897 #ifdef CONFIG_RADIX_TREE_MULTIORDER 898 static inline int insert_entries(struct radix_tree_node *node, 899 void __rcu **slot, void *item, unsigned order, bool replace) 900 { 901 struct radix_tree_node *child; 902 unsigned i, n, tag, offset, tags = 0; 903 904 if (node) { 905 if (order > node->shift) 906 n = 1 << (order - node->shift); 907 else 908 n = 1; 909 offset = get_slot_offset(node, slot); 910 } else { 911 n = 1; 912 offset = 0; 913 } 914 915 if (n > 1) { 916 offset = offset & ~(n - 1); 917 slot = &node->slots[offset]; 918 } 919 child = node_to_entry(slot); 920 921 for (i = 0; i < n; i++) { 922 if (slot[i]) { 923 if (replace) { 924 node->count--; 925 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 926 if (tag_get(node, tag, offset + i)) 927 tags |= 1 << tag; 928 } else 929 return -EEXIST; 930 } 931 } 932 933 for (i = 0; i < n; i++) { 934 struct radix_tree_node *old = rcu_dereference_raw(slot[i]); 935 if (i) { 936 rcu_assign_pointer(slot[i], child); 937 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 938 if (tags & (1 << tag)) 939 tag_clear(node, tag, offset + i); 940 } else { 941 rcu_assign_pointer(slot[i], item); 942 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 943 if (tags & (1 << tag)) 944 tag_set(node, tag, offset); 945 } 946 if (radix_tree_is_internal_node(old) && 947 !is_sibling_entry(node, old) && 948 (old != RADIX_TREE_RETRY)) 949 radix_tree_free_nodes(old); 950 if (radix_tree_exceptional_entry(old)) 951 node->exceptional--; 952 } 953 if (node) { 954 node->count += n; 955 if (radix_tree_exceptional_entry(item)) 956 node->exceptional += n; 957 } 958 return n; 959 } 960 #else 961 static inline int insert_entries(struct radix_tree_node *node, 962 void __rcu **slot, void *item, unsigned order, bool replace) 963 { 964 if (*slot) 965 return -EEXIST; 966 rcu_assign_pointer(*slot, item); 967 if (node) { 968 node->count++; 969 if (radix_tree_exceptional_entry(item)) 970 node->exceptional++; 971 } 972 return 1; 973 } 974 #endif 975 976 /** 977 * __radix_tree_insert - insert into a radix tree 978 * @root: radix tree root 979 * @index: index key 980 * @order: key covers the 2^order indices around index 981 * @item: item to insert 982 * 983 * Insert an item into the radix tree at position @index. 984 */ 985 int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, 986 unsigned order, void *item) 987 { 988 struct radix_tree_node *node; 989 void __rcu **slot; 990 int error; 991 992 BUG_ON(radix_tree_is_internal_node(item)); 993 994 error = __radix_tree_create(root, index, order, &node, &slot); 995 if (error) 996 return error; 997 998 error = insert_entries(node, slot, item, order, false); 999 if (error < 0) 1000 return error; 1001 1002 if (node) { 1003 unsigned offset = get_slot_offset(node, slot); 1004 BUG_ON(tag_get(node, 0, offset)); 1005 BUG_ON(tag_get(node, 1, offset)); 1006 BUG_ON(tag_get(node, 2, offset)); 1007 } else { 1008 BUG_ON(root_tags_get(root)); 1009 } 1010 1011 return 0; 1012 } 1013 EXPORT_SYMBOL(__radix_tree_insert); 1014 1015 /** 1016 * __radix_tree_lookup - lookup an item in a radix tree 1017 * @root: radix tree root 1018 * @index: index key 1019 * @nodep: returns node 1020 * @slotp: returns slot 1021 * 1022 * Lookup and return the item at position @index in the radix 1023 * tree @root. 1024 * 1025 * Until there is more than one item in the tree, no nodes are 1026 * allocated and @root->rnode is used as a direct slot instead of 1027 * pointing to a node, in which case *@nodep will be NULL. 1028 */ 1029 void *__radix_tree_lookup(const struct radix_tree_root *root, 1030 unsigned long index, struct radix_tree_node **nodep, 1031 void __rcu ***slotp) 1032 { 1033 struct radix_tree_node *node, *parent; 1034 unsigned long maxindex; 1035 void __rcu **slot; 1036 1037 restart: 1038 parent = NULL; 1039 slot = (void __rcu **)&root->rnode; 1040 radix_tree_load_root(root, &node, &maxindex); 1041 if (index > maxindex) 1042 return NULL; 1043 1044 while (radix_tree_is_internal_node(node)) { 1045 unsigned offset; 1046 1047 if (node == RADIX_TREE_RETRY) 1048 goto restart; 1049 parent = entry_to_node(node); 1050 offset = radix_tree_descend(parent, &node, index); 1051 slot = parent->slots + offset; 1052 } 1053 1054 if (nodep) 1055 *nodep = parent; 1056 if (slotp) 1057 *slotp = slot; 1058 return node; 1059 } 1060 1061 /** 1062 * radix_tree_lookup_slot - lookup a slot in a radix tree 1063 * @root: radix tree root 1064 * @index: index key 1065 * 1066 * Returns: the slot corresponding to the position @index in the 1067 * radix tree @root. This is useful for update-if-exists operations. 1068 * 1069 * This function can be called under rcu_read_lock iff the slot is not 1070 * modified by radix_tree_replace_slot, otherwise it must be called 1071 * exclusive from other writers. Any dereference of the slot must be done 1072 * using radix_tree_deref_slot. 1073 */ 1074 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 1075 unsigned long index) 1076 { 1077 void __rcu **slot; 1078 1079 if (!__radix_tree_lookup(root, index, NULL, &slot)) 1080 return NULL; 1081 return slot; 1082 } 1083 EXPORT_SYMBOL(radix_tree_lookup_slot); 1084 1085 /** 1086 * radix_tree_lookup - perform lookup operation on a radix tree 1087 * @root: radix tree root 1088 * @index: index key 1089 * 1090 * Lookup the item at the position @index in the radix tree @root. 1091 * 1092 * This function can be called under rcu_read_lock, however the caller 1093 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 1094 * them safely). No RCU barriers are required to access or modify the 1095 * returned item, however. 1096 */ 1097 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 1098 { 1099 return __radix_tree_lookup(root, index, NULL, NULL); 1100 } 1101 EXPORT_SYMBOL(radix_tree_lookup); 1102 1103 static inline void replace_sibling_entries(struct radix_tree_node *node, 1104 void __rcu **slot, int count, int exceptional) 1105 { 1106 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1107 void *ptr = node_to_entry(slot); 1108 unsigned offset = get_slot_offset(node, slot) + 1; 1109 1110 while (offset < RADIX_TREE_MAP_SIZE) { 1111 if (rcu_dereference_raw(node->slots[offset]) != ptr) 1112 break; 1113 if (count < 0) { 1114 node->slots[offset] = NULL; 1115 node->count--; 1116 } 1117 node->exceptional += exceptional; 1118 offset++; 1119 } 1120 #endif 1121 } 1122 1123 static void replace_slot(void __rcu **slot, void *item, 1124 struct radix_tree_node *node, int count, int exceptional) 1125 { 1126 if (WARN_ON_ONCE(radix_tree_is_internal_node(item))) 1127 return; 1128 1129 if (node && (count || exceptional)) { 1130 node->count += count; 1131 node->exceptional += exceptional; 1132 replace_sibling_entries(node, slot, count, exceptional); 1133 } 1134 1135 rcu_assign_pointer(*slot, item); 1136 } 1137 1138 static bool node_tag_get(const struct radix_tree_root *root, 1139 const struct radix_tree_node *node, 1140 unsigned int tag, unsigned int offset) 1141 { 1142 if (node) 1143 return tag_get(node, tag, offset); 1144 return root_tag_get(root, tag); 1145 } 1146 1147 /* 1148 * IDR users want to be able to store NULL in the tree, so if the slot isn't 1149 * free, don't adjust the count, even if it's transitioning between NULL and 1150 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 1151 * have empty bits, but it only stores NULL in slots when they're being 1152 * deleted. 1153 */ 1154 static int calculate_count(struct radix_tree_root *root, 1155 struct radix_tree_node *node, void __rcu **slot, 1156 void *item, void *old) 1157 { 1158 if (is_idr(root)) { 1159 unsigned offset = get_slot_offset(node, slot); 1160 bool free = node_tag_get(root, node, IDR_FREE, offset); 1161 if (!free) 1162 return 0; 1163 if (!old) 1164 return 1; 1165 } 1166 return !!item - !!old; 1167 } 1168 1169 /** 1170 * __radix_tree_replace - replace item in a slot 1171 * @root: radix tree root 1172 * @node: pointer to tree node 1173 * @slot: pointer to slot in @node 1174 * @item: new item to store in the slot. 1175 * @update_node: callback for changing leaf nodes 1176 * 1177 * For use with __radix_tree_lookup(). Caller must hold tree write locked 1178 * across slot lookup and replacement. 1179 */ 1180 void __radix_tree_replace(struct radix_tree_root *root, 1181 struct radix_tree_node *node, 1182 void __rcu **slot, void *item, 1183 radix_tree_update_node_t update_node) 1184 { 1185 void *old = rcu_dereference_raw(*slot); 1186 int exceptional = !!radix_tree_exceptional_entry(item) - 1187 !!radix_tree_exceptional_entry(old); 1188 int count = calculate_count(root, node, slot, item, old); 1189 1190 /* 1191 * This function supports replacing exceptional entries and 1192 * deleting entries, but that needs accounting against the 1193 * node unless the slot is root->rnode. 1194 */ 1195 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) && 1196 (count || exceptional)); 1197 replace_slot(slot, item, node, count, exceptional); 1198 1199 if (!node) 1200 return; 1201 1202 if (update_node) 1203 update_node(node); 1204 1205 delete_node(root, node, update_node); 1206 } 1207 1208 /** 1209 * radix_tree_replace_slot - replace item in a slot 1210 * @root: radix tree root 1211 * @slot: pointer to slot 1212 * @item: new item to store in the slot. 1213 * 1214 * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), 1215 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 1216 * across slot lookup and replacement. 1217 * 1218 * NOTE: This cannot be used to switch between non-entries (empty slots), 1219 * regular entries, and exceptional entries, as that requires accounting 1220 * inside the radix tree node. When switching from one type of entry or 1221 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 1222 * radix_tree_iter_replace(). 1223 */ 1224 void radix_tree_replace_slot(struct radix_tree_root *root, 1225 void __rcu **slot, void *item) 1226 { 1227 __radix_tree_replace(root, NULL, slot, item, NULL); 1228 } 1229 EXPORT_SYMBOL(radix_tree_replace_slot); 1230 1231 /** 1232 * radix_tree_iter_replace - replace item in a slot 1233 * @root: radix tree root 1234 * @slot: pointer to slot 1235 * @item: new item to store in the slot. 1236 * 1237 * For use with radix_tree_split() and radix_tree_for_each_slot(). 1238 * Caller must hold tree write locked across split and replacement. 1239 */ 1240 void radix_tree_iter_replace(struct radix_tree_root *root, 1241 const struct radix_tree_iter *iter, 1242 void __rcu **slot, void *item) 1243 { 1244 __radix_tree_replace(root, iter->node, slot, item, NULL); 1245 } 1246 1247 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1248 /** 1249 * radix_tree_join - replace multiple entries with one multiorder entry 1250 * @root: radix tree root 1251 * @index: an index inside the new entry 1252 * @order: order of the new entry 1253 * @item: new entry 1254 * 1255 * Call this function to replace several entries with one larger entry. 1256 * The existing entries are presumed to not need freeing as a result of 1257 * this call. 1258 * 1259 * The replacement entry will have all the tags set on it that were set 1260 * on any of the entries it is replacing. 1261 */ 1262 int radix_tree_join(struct radix_tree_root *root, unsigned long index, 1263 unsigned order, void *item) 1264 { 1265 struct radix_tree_node *node; 1266 void __rcu **slot; 1267 int error; 1268 1269 BUG_ON(radix_tree_is_internal_node(item)); 1270 1271 error = __radix_tree_create(root, index, order, &node, &slot); 1272 if (!error) 1273 error = insert_entries(node, slot, item, order, true); 1274 if (error > 0) 1275 error = 0; 1276 1277 return error; 1278 } 1279 1280 /** 1281 * radix_tree_split - Split an entry into smaller entries 1282 * @root: radix tree root 1283 * @index: An index within the large entry 1284 * @order: Order of new entries 1285 * 1286 * Call this function as the first step in replacing a multiorder entry 1287 * with several entries of lower order. After this function returns, 1288 * loop over the relevant portion of the tree using radix_tree_for_each_slot() 1289 * and call radix_tree_iter_replace() to set up each new entry. 1290 * 1291 * The tags from this entry are replicated to all the new entries. 1292 * 1293 * The radix tree should be locked against modification during the entire 1294 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which 1295 * should prompt RCU walkers to restart the lookup from the root. 1296 */ 1297 int radix_tree_split(struct radix_tree_root *root, unsigned long index, 1298 unsigned order) 1299 { 1300 struct radix_tree_node *parent, *node, *child; 1301 void __rcu **slot; 1302 unsigned int offset, end; 1303 unsigned n, tag, tags = 0; 1304 gfp_t gfp = root_gfp_mask(root); 1305 1306 if (!__radix_tree_lookup(root, index, &parent, &slot)) 1307 return -ENOENT; 1308 if (!parent) 1309 return -ENOENT; 1310 1311 offset = get_slot_offset(parent, slot); 1312 1313 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1314 if (tag_get(parent, tag, offset)) 1315 tags |= 1 << tag; 1316 1317 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { 1318 if (!is_sibling_entry(parent, 1319 rcu_dereference_raw(parent->slots[end]))) 1320 break; 1321 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1322 if (tags & (1 << tag)) 1323 tag_set(parent, tag, end); 1324 /* rcu_assign_pointer ensures tags are set before RETRY */ 1325 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); 1326 } 1327 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); 1328 parent->exceptional -= (end - offset); 1329 1330 if (order == parent->shift) 1331 return 0; 1332 if (order > parent->shift) { 1333 while (offset < end) 1334 offset += insert_entries(parent, &parent->slots[offset], 1335 RADIX_TREE_RETRY, order, true); 1336 return 0; 1337 } 1338 1339 node = parent; 1340 1341 for (;;) { 1342 if (node->shift > order) { 1343 child = radix_tree_node_alloc(gfp, node, root, 1344 node->shift - RADIX_TREE_MAP_SHIFT, 1345 offset, 0, 0); 1346 if (!child) 1347 goto nomem; 1348 if (node != parent) { 1349 node->count++; 1350 rcu_assign_pointer(node->slots[offset], 1351 node_to_entry(child)); 1352 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1353 if (tags & (1 << tag)) 1354 tag_set(node, tag, offset); 1355 } 1356 1357 node = child; 1358 offset = 0; 1359 continue; 1360 } 1361 1362 n = insert_entries(node, &node->slots[offset], 1363 RADIX_TREE_RETRY, order, false); 1364 BUG_ON(n > RADIX_TREE_MAP_SIZE); 1365 1366 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1367 if (tags & (1 << tag)) 1368 tag_set(node, tag, offset); 1369 offset += n; 1370 1371 while (offset == RADIX_TREE_MAP_SIZE) { 1372 if (node == parent) 1373 break; 1374 offset = node->offset; 1375 child = node; 1376 node = node->parent; 1377 rcu_assign_pointer(node->slots[offset], 1378 node_to_entry(child)); 1379 offset++; 1380 } 1381 if ((node == parent) && (offset == end)) 1382 return 0; 1383 } 1384 1385 nomem: 1386 /* Shouldn't happen; did user forget to preload? */ 1387 /* TODO: free all the allocated nodes */ 1388 WARN_ON(1); 1389 return -ENOMEM; 1390 } 1391 #endif 1392 1393 static void node_tag_set(struct radix_tree_root *root, 1394 struct radix_tree_node *node, 1395 unsigned int tag, unsigned int offset) 1396 { 1397 while (node) { 1398 if (tag_get(node, tag, offset)) 1399 return; 1400 tag_set(node, tag, offset); 1401 offset = node->offset; 1402 node = node->parent; 1403 } 1404 1405 if (!root_tag_get(root, tag)) 1406 root_tag_set(root, tag); 1407 } 1408 1409 /** 1410 * radix_tree_tag_set - set a tag on a radix tree node 1411 * @root: radix tree root 1412 * @index: index key 1413 * @tag: tag index 1414 * 1415 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 1416 * corresponding to @index in the radix tree. From 1417 * the root all the way down to the leaf node. 1418 * 1419 * Returns the address of the tagged item. Setting a tag on a not-present 1420 * item is a bug. 1421 */ 1422 void *radix_tree_tag_set(struct radix_tree_root *root, 1423 unsigned long index, unsigned int tag) 1424 { 1425 struct radix_tree_node *node, *parent; 1426 unsigned long maxindex; 1427 1428 radix_tree_load_root(root, &node, &maxindex); 1429 BUG_ON(index > maxindex); 1430 1431 while (radix_tree_is_internal_node(node)) { 1432 unsigned offset; 1433 1434 parent = entry_to_node(node); 1435 offset = radix_tree_descend(parent, &node, index); 1436 BUG_ON(!node); 1437 1438 if (!tag_get(parent, tag, offset)) 1439 tag_set(parent, tag, offset); 1440 } 1441 1442 /* set the root's tag bit */ 1443 if (!root_tag_get(root, tag)) 1444 root_tag_set(root, tag); 1445 1446 return node; 1447 } 1448 EXPORT_SYMBOL(radix_tree_tag_set); 1449 1450 /** 1451 * radix_tree_iter_tag_set - set a tag on the current iterator entry 1452 * @root: radix tree root 1453 * @iter: iterator state 1454 * @tag: tag to set 1455 */ 1456 void radix_tree_iter_tag_set(struct radix_tree_root *root, 1457 const struct radix_tree_iter *iter, unsigned int tag) 1458 { 1459 node_tag_set(root, iter->node, tag, iter_offset(iter)); 1460 } 1461 1462 static void node_tag_clear(struct radix_tree_root *root, 1463 struct radix_tree_node *node, 1464 unsigned int tag, unsigned int offset) 1465 { 1466 while (node) { 1467 if (!tag_get(node, tag, offset)) 1468 return; 1469 tag_clear(node, tag, offset); 1470 if (any_tag_set(node, tag)) 1471 return; 1472 1473 offset = node->offset; 1474 node = node->parent; 1475 } 1476 1477 /* clear the root's tag bit */ 1478 if (root_tag_get(root, tag)) 1479 root_tag_clear(root, tag); 1480 } 1481 1482 /** 1483 * radix_tree_tag_clear - clear a tag on a radix tree node 1484 * @root: radix tree root 1485 * @index: index key 1486 * @tag: tag index 1487 * 1488 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1489 * corresponding to @index in the radix tree. If this causes 1490 * the leaf node to have no tags set then clear the tag in the 1491 * next-to-leaf node, etc. 1492 * 1493 * Returns the address of the tagged item on success, else NULL. ie: 1494 * has the same return value and semantics as radix_tree_lookup(). 1495 */ 1496 void *radix_tree_tag_clear(struct radix_tree_root *root, 1497 unsigned long index, unsigned int tag) 1498 { 1499 struct radix_tree_node *node, *parent; 1500 unsigned long maxindex; 1501 int uninitialized_var(offset); 1502 1503 radix_tree_load_root(root, &node, &maxindex); 1504 if (index > maxindex) 1505 return NULL; 1506 1507 parent = NULL; 1508 1509 while (radix_tree_is_internal_node(node)) { 1510 parent = entry_to_node(node); 1511 offset = radix_tree_descend(parent, &node, index); 1512 } 1513 1514 if (node) 1515 node_tag_clear(root, parent, tag, offset); 1516 1517 return node; 1518 } 1519 EXPORT_SYMBOL(radix_tree_tag_clear); 1520 1521 /** 1522 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1523 * @root: radix tree root 1524 * @iter: iterator state 1525 * @tag: tag to clear 1526 */ 1527 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1528 const struct radix_tree_iter *iter, unsigned int tag) 1529 { 1530 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1531 } 1532 1533 /** 1534 * radix_tree_tag_get - get a tag on a radix tree node 1535 * @root: radix tree root 1536 * @index: index key 1537 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1538 * 1539 * Return values: 1540 * 1541 * 0: tag not present or not set 1542 * 1: tag set 1543 * 1544 * Note that the return value of this function may not be relied on, even if 1545 * the RCU lock is held, unless tag modification and node deletion are excluded 1546 * from concurrency. 1547 */ 1548 int radix_tree_tag_get(const struct radix_tree_root *root, 1549 unsigned long index, unsigned int tag) 1550 { 1551 struct radix_tree_node *node, *parent; 1552 unsigned long maxindex; 1553 1554 if (!root_tag_get(root, tag)) 1555 return 0; 1556 1557 radix_tree_load_root(root, &node, &maxindex); 1558 if (index > maxindex) 1559 return 0; 1560 1561 while (radix_tree_is_internal_node(node)) { 1562 unsigned offset; 1563 1564 parent = entry_to_node(node); 1565 offset = radix_tree_descend(parent, &node, index); 1566 1567 if (!tag_get(parent, tag, offset)) 1568 return 0; 1569 if (node == RADIX_TREE_RETRY) 1570 break; 1571 } 1572 1573 return 1; 1574 } 1575 EXPORT_SYMBOL(radix_tree_tag_get); 1576 1577 static inline void __set_iter_shift(struct radix_tree_iter *iter, 1578 unsigned int shift) 1579 { 1580 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1581 iter->shift = shift; 1582 #endif 1583 } 1584 1585 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1586 static void set_iter_tags(struct radix_tree_iter *iter, 1587 struct radix_tree_node *node, unsigned offset, 1588 unsigned tag) 1589 { 1590 unsigned tag_long = offset / BITS_PER_LONG; 1591 unsigned tag_bit = offset % BITS_PER_LONG; 1592 1593 if (!node) { 1594 iter->tags = 1; 1595 return; 1596 } 1597 1598 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1599 1600 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1601 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1602 /* Pick tags from next element */ 1603 if (tag_bit) 1604 iter->tags |= node->tags[tag][tag_long + 1] << 1605 (BITS_PER_LONG - tag_bit); 1606 /* Clip chunk size, here only BITS_PER_LONG tags */ 1607 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1608 } 1609 } 1610 1611 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1612 static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1613 void __rcu **slot, struct radix_tree_iter *iter) 1614 { 1615 while (iter->index < iter->next_index) { 1616 *nodep = rcu_dereference_raw(*slot); 1617 if (*nodep && !is_sibling_entry(iter->node, *nodep)) 1618 return slot; 1619 slot++; 1620 iter->index = __radix_tree_iter_add(iter, 1); 1621 iter->tags >>= 1; 1622 } 1623 1624 *nodep = NULL; 1625 return NULL; 1626 } 1627 1628 void __rcu **__radix_tree_next_slot(void __rcu **slot, 1629 struct radix_tree_iter *iter, unsigned flags) 1630 { 1631 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1632 struct radix_tree_node *node; 1633 1634 slot = skip_siblings(&node, slot, iter); 1635 1636 while (radix_tree_is_internal_node(node)) { 1637 unsigned offset; 1638 unsigned long next_index; 1639 1640 if (node == RADIX_TREE_RETRY) 1641 return slot; 1642 node = entry_to_node(node); 1643 iter->node = node; 1644 iter->shift = node->shift; 1645 1646 if (flags & RADIX_TREE_ITER_TAGGED) { 1647 offset = radix_tree_find_next_bit(node, tag, 0); 1648 if (offset == RADIX_TREE_MAP_SIZE) 1649 return NULL; 1650 slot = &node->slots[offset]; 1651 iter->index = __radix_tree_iter_add(iter, offset); 1652 set_iter_tags(iter, node, offset, tag); 1653 node = rcu_dereference_raw(*slot); 1654 } else { 1655 offset = 0; 1656 slot = &node->slots[0]; 1657 for (;;) { 1658 node = rcu_dereference_raw(*slot); 1659 if (node) 1660 break; 1661 slot++; 1662 offset++; 1663 if (offset == RADIX_TREE_MAP_SIZE) 1664 return NULL; 1665 } 1666 iter->index = __radix_tree_iter_add(iter, offset); 1667 } 1668 if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) 1669 goto none; 1670 next_index = (iter->index | shift_maxindex(iter->shift)) + 1; 1671 if (next_index < iter->next_index) 1672 iter->next_index = next_index; 1673 } 1674 1675 return slot; 1676 none: 1677 iter->next_index = 0; 1678 return NULL; 1679 } 1680 EXPORT_SYMBOL(__radix_tree_next_slot); 1681 #else 1682 static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1683 void __rcu **slot, struct radix_tree_iter *iter) 1684 { 1685 return slot; 1686 } 1687 #endif 1688 1689 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1690 struct radix_tree_iter *iter) 1691 { 1692 struct radix_tree_node *node; 1693 1694 slot++; 1695 iter->index = __radix_tree_iter_add(iter, 1); 1696 skip_siblings(&node, slot, iter); 1697 iter->next_index = iter->index; 1698 iter->tags = 0; 1699 return NULL; 1700 } 1701 EXPORT_SYMBOL(radix_tree_iter_resume); 1702 1703 /** 1704 * radix_tree_next_chunk - find next chunk of slots for iteration 1705 * 1706 * @root: radix tree root 1707 * @iter: iterator state 1708 * @flags: RADIX_TREE_ITER_* flags and tag index 1709 * Returns: pointer to chunk first slot, or NULL if iteration is over 1710 */ 1711 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1712 struct radix_tree_iter *iter, unsigned flags) 1713 { 1714 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1715 struct radix_tree_node *node, *child; 1716 unsigned long index, offset, maxindex; 1717 1718 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1719 return NULL; 1720 1721 /* 1722 * Catch next_index overflow after ~0UL. iter->index never overflows 1723 * during iterating; it can be zero only at the beginning. 1724 * And we cannot overflow iter->next_index in a single step, 1725 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1726 * 1727 * This condition also used by radix_tree_next_slot() to stop 1728 * contiguous iterating, and forbid switching to the next chunk. 1729 */ 1730 index = iter->next_index; 1731 if (!index && iter->index) 1732 return NULL; 1733 1734 restart: 1735 radix_tree_load_root(root, &child, &maxindex); 1736 if (index > maxindex) 1737 return NULL; 1738 if (!child) 1739 return NULL; 1740 1741 if (!radix_tree_is_internal_node(child)) { 1742 /* Single-slot tree */ 1743 iter->index = index; 1744 iter->next_index = maxindex + 1; 1745 iter->tags = 1; 1746 iter->node = NULL; 1747 __set_iter_shift(iter, 0); 1748 return (void __rcu **)&root->rnode; 1749 } 1750 1751 do { 1752 node = entry_to_node(child); 1753 offset = radix_tree_descend(node, &child, index); 1754 1755 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1756 !tag_get(node, tag, offset) : !child) { 1757 /* Hole detected */ 1758 if (flags & RADIX_TREE_ITER_CONTIG) 1759 return NULL; 1760 1761 if (flags & RADIX_TREE_ITER_TAGGED) 1762 offset = radix_tree_find_next_bit(node, tag, 1763 offset + 1); 1764 else 1765 while (++offset < RADIX_TREE_MAP_SIZE) { 1766 void *slot = rcu_dereference_raw( 1767 node->slots[offset]); 1768 if (is_sibling_entry(node, slot)) 1769 continue; 1770 if (slot) 1771 break; 1772 } 1773 index &= ~node_maxindex(node); 1774 index += offset << node->shift; 1775 /* Overflow after ~0UL */ 1776 if (!index) 1777 return NULL; 1778 if (offset == RADIX_TREE_MAP_SIZE) 1779 goto restart; 1780 child = rcu_dereference_raw(node->slots[offset]); 1781 } 1782 1783 if (!child) 1784 goto restart; 1785 if (child == RADIX_TREE_RETRY) 1786 break; 1787 } while (radix_tree_is_internal_node(child)); 1788 1789 /* Update the iterator state */ 1790 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 1791 iter->next_index = (index | node_maxindex(node)) + 1; 1792 iter->node = node; 1793 __set_iter_shift(iter, node->shift); 1794 1795 if (flags & RADIX_TREE_ITER_TAGGED) 1796 set_iter_tags(iter, node, offset, tag); 1797 1798 return node->slots + offset; 1799 } 1800 EXPORT_SYMBOL(radix_tree_next_chunk); 1801 1802 /** 1803 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1804 * @root: radix tree root 1805 * @results: where the results of the lookup are placed 1806 * @first_index: start the lookup from this key 1807 * @max_items: place up to this many items at *results 1808 * 1809 * Performs an index-ascending scan of the tree for present items. Places 1810 * them at *@results and returns the number of items which were placed at 1811 * *@results. 1812 * 1813 * The implementation is naive. 1814 * 1815 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1816 * rcu_read_lock. In this case, rather than the returned results being 1817 * an atomic snapshot of the tree at a single point in time, the 1818 * semantics of an RCU protected gang lookup are as though multiple 1819 * radix_tree_lookups have been issued in individual locks, and results 1820 * stored in 'results'. 1821 */ 1822 unsigned int 1823 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1824 unsigned long first_index, unsigned int max_items) 1825 { 1826 struct radix_tree_iter iter; 1827 void __rcu **slot; 1828 unsigned int ret = 0; 1829 1830 if (unlikely(!max_items)) 1831 return 0; 1832 1833 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1834 results[ret] = rcu_dereference_raw(*slot); 1835 if (!results[ret]) 1836 continue; 1837 if (radix_tree_is_internal_node(results[ret])) { 1838 slot = radix_tree_iter_retry(&iter); 1839 continue; 1840 } 1841 if (++ret == max_items) 1842 break; 1843 } 1844 1845 return ret; 1846 } 1847 EXPORT_SYMBOL(radix_tree_gang_lookup); 1848 1849 /** 1850 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 1851 * @root: radix tree root 1852 * @results: where the results of the lookup are placed 1853 * @indices: where their indices should be placed (but usually NULL) 1854 * @first_index: start the lookup from this key 1855 * @max_items: place up to this many items at *results 1856 * 1857 * Performs an index-ascending scan of the tree for present items. Places 1858 * their slots at *@results and returns the number of items which were 1859 * placed at *@results. 1860 * 1861 * The implementation is naive. 1862 * 1863 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 1864 * be dereferenced with radix_tree_deref_slot, and if using only RCU 1865 * protection, radix_tree_deref_slot may fail requiring a retry. 1866 */ 1867 unsigned int 1868 radix_tree_gang_lookup_slot(const struct radix_tree_root *root, 1869 void __rcu ***results, unsigned long *indices, 1870 unsigned long first_index, unsigned int max_items) 1871 { 1872 struct radix_tree_iter iter; 1873 void __rcu **slot; 1874 unsigned int ret = 0; 1875 1876 if (unlikely(!max_items)) 1877 return 0; 1878 1879 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1880 results[ret] = slot; 1881 if (indices) 1882 indices[ret] = iter.index; 1883 if (++ret == max_items) 1884 break; 1885 } 1886 1887 return ret; 1888 } 1889 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 1890 1891 /** 1892 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1893 * based on a tag 1894 * @root: radix tree root 1895 * @results: where the results of the lookup are placed 1896 * @first_index: start the lookup from this key 1897 * @max_items: place up to this many items at *results 1898 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1899 * 1900 * Performs an index-ascending scan of the tree for present items which 1901 * have the tag indexed by @tag set. Places the items at *@results and 1902 * returns the number of items which were placed at *@results. 1903 */ 1904 unsigned int 1905 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1906 unsigned long first_index, unsigned int max_items, 1907 unsigned int tag) 1908 { 1909 struct radix_tree_iter iter; 1910 void __rcu **slot; 1911 unsigned int ret = 0; 1912 1913 if (unlikely(!max_items)) 1914 return 0; 1915 1916 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1917 results[ret] = rcu_dereference_raw(*slot); 1918 if (!results[ret]) 1919 continue; 1920 if (radix_tree_is_internal_node(results[ret])) { 1921 slot = radix_tree_iter_retry(&iter); 1922 continue; 1923 } 1924 if (++ret == max_items) 1925 break; 1926 } 1927 1928 return ret; 1929 } 1930 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1931 1932 /** 1933 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1934 * radix tree based on a tag 1935 * @root: radix tree root 1936 * @results: where the results of the lookup are placed 1937 * @first_index: start the lookup from this key 1938 * @max_items: place up to this many items at *results 1939 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1940 * 1941 * Performs an index-ascending scan of the tree for present items which 1942 * have the tag indexed by @tag set. Places the slots at *@results and 1943 * returns the number of slots which were placed at *@results. 1944 */ 1945 unsigned int 1946 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1947 void __rcu ***results, unsigned long first_index, 1948 unsigned int max_items, unsigned int tag) 1949 { 1950 struct radix_tree_iter iter; 1951 void __rcu **slot; 1952 unsigned int ret = 0; 1953 1954 if (unlikely(!max_items)) 1955 return 0; 1956 1957 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1958 results[ret] = slot; 1959 if (++ret == max_items) 1960 break; 1961 } 1962 1963 return ret; 1964 } 1965 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1966 1967 /** 1968 * __radix_tree_delete_node - try to free node after clearing a slot 1969 * @root: radix tree root 1970 * @node: node containing @index 1971 * @update_node: callback for changing leaf nodes 1972 * 1973 * After clearing the slot at @index in @node from radix tree 1974 * rooted at @root, call this function to attempt freeing the 1975 * node and shrinking the tree. 1976 */ 1977 void __radix_tree_delete_node(struct radix_tree_root *root, 1978 struct radix_tree_node *node, 1979 radix_tree_update_node_t update_node) 1980 { 1981 delete_node(root, node, update_node); 1982 } 1983 1984 static bool __radix_tree_delete(struct radix_tree_root *root, 1985 struct radix_tree_node *node, void __rcu **slot) 1986 { 1987 void *old = rcu_dereference_raw(*slot); 1988 int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; 1989 unsigned offset = get_slot_offset(node, slot); 1990 int tag; 1991 1992 if (is_idr(root)) 1993 node_tag_set(root, node, IDR_FREE, offset); 1994 else 1995 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1996 node_tag_clear(root, node, tag, offset); 1997 1998 replace_slot(slot, NULL, node, -1, exceptional); 1999 return node && delete_node(root, node, NULL); 2000 } 2001 2002 /** 2003 * radix_tree_iter_delete - delete the entry at this iterator position 2004 * @root: radix tree root 2005 * @iter: iterator state 2006 * @slot: pointer to slot 2007 * 2008 * Delete the entry at the position currently pointed to by the iterator. 2009 * This may result in the current node being freed; if it is, the iterator 2010 * is advanced so that it will not reference the freed memory. This 2011 * function may be called without any locking if there are no other threads 2012 * which can access this tree. 2013 */ 2014 void radix_tree_iter_delete(struct radix_tree_root *root, 2015 struct radix_tree_iter *iter, void __rcu **slot) 2016 { 2017 if (__radix_tree_delete(root, iter->node, slot)) 2018 iter->index = iter->next_index; 2019 } 2020 EXPORT_SYMBOL(radix_tree_iter_delete); 2021 2022 /** 2023 * radix_tree_delete_item - delete an item from a radix tree 2024 * @root: radix tree root 2025 * @index: index key 2026 * @item: expected item 2027 * 2028 * Remove @item at @index from the radix tree rooted at @root. 2029 * 2030 * Return: the deleted entry, or %NULL if it was not present 2031 * or the entry at the given @index was not @item. 2032 */ 2033 void *radix_tree_delete_item(struct radix_tree_root *root, 2034 unsigned long index, void *item) 2035 { 2036 struct radix_tree_node *node = NULL; 2037 void __rcu **slot = NULL; 2038 void *entry; 2039 2040 entry = __radix_tree_lookup(root, index, &node, &slot); 2041 if (!slot) 2042 return NULL; 2043 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 2044 get_slot_offset(node, slot)))) 2045 return NULL; 2046 2047 if (item && entry != item) 2048 return NULL; 2049 2050 __radix_tree_delete(root, node, slot); 2051 2052 return entry; 2053 } 2054 EXPORT_SYMBOL(radix_tree_delete_item); 2055 2056 /** 2057 * radix_tree_delete - delete an entry from a radix tree 2058 * @root: radix tree root 2059 * @index: index key 2060 * 2061 * Remove the entry at @index from the radix tree rooted at @root. 2062 * 2063 * Return: The deleted entry, or %NULL if it was not present. 2064 */ 2065 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 2066 { 2067 return radix_tree_delete_item(root, index, NULL); 2068 } 2069 EXPORT_SYMBOL(radix_tree_delete); 2070 2071 void radix_tree_clear_tags(struct radix_tree_root *root, 2072 struct radix_tree_node *node, 2073 void __rcu **slot) 2074 { 2075 if (node) { 2076 unsigned int tag, offset = get_slot_offset(node, slot); 2077 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 2078 node_tag_clear(root, node, tag, offset); 2079 } else { 2080 root_tag_clear_all(root); 2081 } 2082 } 2083 2084 /** 2085 * radix_tree_tagged - test whether any items in the tree are tagged 2086 * @root: radix tree root 2087 * @tag: tag to test 2088 */ 2089 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 2090 { 2091 return root_tag_get(root, tag); 2092 } 2093 EXPORT_SYMBOL(radix_tree_tagged); 2094 2095 /** 2096 * idr_preload - preload for idr_alloc() 2097 * @gfp_mask: allocation mask to use for preloading 2098 * 2099 * Preallocate memory to use for the next call to idr_alloc(). This function 2100 * returns with preemption disabled. It will be enabled by idr_preload_end(). 2101 */ 2102 void idr_preload(gfp_t gfp_mask) 2103 { 2104 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 2105 preempt_disable(); 2106 } 2107 EXPORT_SYMBOL(idr_preload); 2108 2109 int ida_pre_get(struct ida *ida, gfp_t gfp) 2110 { 2111 /* 2112 * The IDA API has no preload_end() equivalent. Instead, 2113 * ida_get_new() can return -EAGAIN, prompting the caller 2114 * to return to the ida_pre_get() step. 2115 */ 2116 if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) 2117 preempt_enable(); 2118 2119 if (!this_cpu_read(ida_bitmap)) { 2120 struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); 2121 if (!bitmap) 2122 return 0; 2123 if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) 2124 kfree(bitmap); 2125 } 2126 2127 return 1; 2128 } 2129 2130 void __rcu **idr_get_free(struct radix_tree_root *root, 2131 struct radix_tree_iter *iter, gfp_t gfp, 2132 unsigned long max) 2133 { 2134 struct radix_tree_node *node = NULL, *child; 2135 void __rcu **slot = (void __rcu **)&root->rnode; 2136 unsigned long maxindex, start = iter->next_index; 2137 unsigned int shift, offset = 0; 2138 2139 grow: 2140 shift = radix_tree_load_root(root, &child, &maxindex); 2141 if (!radix_tree_tagged(root, IDR_FREE)) 2142 start = max(start, maxindex + 1); 2143 if (start > max) 2144 return ERR_PTR(-ENOSPC); 2145 2146 if (start > maxindex) { 2147 int error = radix_tree_extend(root, gfp, start, shift); 2148 if (error < 0) 2149 return ERR_PTR(error); 2150 shift = error; 2151 child = rcu_dereference_raw(root->rnode); 2152 } 2153 2154 while (shift) { 2155 shift -= RADIX_TREE_MAP_SHIFT; 2156 if (child == NULL) { 2157 /* Have to add a child node. */ 2158 child = radix_tree_node_alloc(gfp, node, root, shift, 2159 offset, 0, 0); 2160 if (!child) 2161 return ERR_PTR(-ENOMEM); 2162 all_tag_set(child, IDR_FREE); 2163 rcu_assign_pointer(*slot, node_to_entry(child)); 2164 if (node) 2165 node->count++; 2166 } else if (!radix_tree_is_internal_node(child)) 2167 break; 2168 2169 node = entry_to_node(child); 2170 offset = radix_tree_descend(node, &child, start); 2171 if (!tag_get(node, IDR_FREE, offset)) { 2172 offset = radix_tree_find_next_bit(node, IDR_FREE, 2173 offset + 1); 2174 start = next_index(start, node, offset); 2175 if (start > max) 2176 return ERR_PTR(-ENOSPC); 2177 while (offset == RADIX_TREE_MAP_SIZE) { 2178 offset = node->offset + 1; 2179 node = node->parent; 2180 if (!node) 2181 goto grow; 2182 shift = node->shift; 2183 } 2184 child = rcu_dereference_raw(node->slots[offset]); 2185 } 2186 slot = &node->slots[offset]; 2187 } 2188 2189 iter->index = start; 2190 if (node) 2191 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 2192 else 2193 iter->next_index = 1; 2194 iter->node = node; 2195 __set_iter_shift(iter, shift); 2196 set_iter_tags(iter, node, offset, IDR_FREE); 2197 2198 return slot; 2199 } 2200 2201 /** 2202 * idr_destroy - release all internal memory from an IDR 2203 * @idr: idr handle 2204 * 2205 * After this function is called, the IDR is empty, and may be reused or 2206 * the data structure containing it may be freed. 2207 * 2208 * A typical clean-up sequence for objects stored in an idr tree will use 2209 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 2210 * free the memory used to keep track of those objects. 2211 */ 2212 void idr_destroy(struct idr *idr) 2213 { 2214 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode); 2215 if (radix_tree_is_internal_node(node)) 2216 radix_tree_free_nodes(node); 2217 idr->idr_rt.rnode = NULL; 2218 root_tag_set(&idr->idr_rt, IDR_FREE); 2219 } 2220 EXPORT_SYMBOL(idr_destroy); 2221 2222 static void 2223 radix_tree_node_ctor(void *arg) 2224 { 2225 struct radix_tree_node *node = arg; 2226 2227 memset(node, 0, sizeof(*node)); 2228 INIT_LIST_HEAD(&node->private_list); 2229 } 2230 2231 static __init unsigned long __maxindex(unsigned int height) 2232 { 2233 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 2234 int shift = RADIX_TREE_INDEX_BITS - width; 2235 2236 if (shift < 0) 2237 return ~0UL; 2238 if (shift >= BITS_PER_LONG) 2239 return 0UL; 2240 return ~0UL >> shift; 2241 } 2242 2243 static __init void radix_tree_init_maxnodes(void) 2244 { 2245 unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; 2246 unsigned int i, j; 2247 2248 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 2249 height_to_maxindex[i] = __maxindex(i); 2250 for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { 2251 for (j = i; j > 0; j--) 2252 height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; 2253 } 2254 } 2255 2256 static int radix_tree_cpu_dead(unsigned int cpu) 2257 { 2258 struct radix_tree_preload *rtp; 2259 struct radix_tree_node *node; 2260 2261 /* Free per-cpu pool of preloaded nodes */ 2262 rtp = &per_cpu(radix_tree_preloads, cpu); 2263 while (rtp->nr) { 2264 node = rtp->nodes; 2265 rtp->nodes = node->parent; 2266 kmem_cache_free(radix_tree_node_cachep, node); 2267 rtp->nr--; 2268 } 2269 kfree(per_cpu(ida_bitmap, cpu)); 2270 per_cpu(ida_bitmap, cpu) = NULL; 2271 return 0; 2272 } 2273 2274 void __init radix_tree_init(void) 2275 { 2276 int ret; 2277 2278 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 2279 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 2280 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 2281 sizeof(struct radix_tree_node), 0, 2282 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 2283 radix_tree_node_ctor); 2284 radix_tree_init_maxnodes(); 2285 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 2286 NULL, radix_tree_cpu_dead); 2287 WARN_ON(ret < 0); 2288 } 2289