11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2001 Momchil Velikov 31da177e4SLinus Torvalds * Portions Copyright (C) 2001 Christoph Hellwig 4cde53535SChristoph Lameter * Copyright (C) 2005 SGI, Christoph Lameter 57cf9c2c7SNick Piggin * Copyright (C) 2006 Nick Piggin 678c1d784SKonstantin Khlebnikov * Copyright (C) 2012 Konstantin Khlebnikov 76b053b8eSMatthew Wilcox * Copyright (C) 2016 Intel, Matthew Wilcox 86b053b8eSMatthew Wilcox * Copyright (C) 2016 Intel, Ross Zwisler 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License as 121da177e4SLinus Torvalds * published by the Free Software Foundation; either version 2, or (at 131da177e4SLinus Torvalds * your option) any later version. 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * This program is distributed in the hope that it will be useful, but 161da177e4SLinus Torvalds * WITHOUT ANY WARRANTY; without even the implied warranty of 171da177e4SLinus Torvalds * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 181da177e4SLinus Torvalds * General Public License for more details. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * You should have received a copy of the GNU General Public License 211da177e4SLinus Torvalds * along with this program; if not, write to the Free Software 221da177e4SLinus Torvalds * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 231da177e4SLinus Torvalds */ 241da177e4SLinus Torvalds 250a835c4fSMatthew Wilcox #include <linux/bitmap.h> 260a835c4fSMatthew Wilcox #include <linux/bitops.h> 27460488c5SMatthew Wilcox #include <linux/bug.h> 28e157b555SMatthew Wilcox #include <linux/cpu.h> 291da177e4SLinus Torvalds #include <linux/errno.h> 300a835c4fSMatthew Wilcox #include <linux/export.h> 310a835c4fSMatthew Wilcox #include <linux/idr.h> 321da177e4SLinus Torvalds #include <linux/init.h> 331da177e4SLinus Torvalds #include <linux/kernel.h> 34ce80b067SCatalin Marinas #include <linux/kmemleak.h> 350a835c4fSMatthew Wilcox #include <linux/percpu.h> 3692cf2118SFrederic Weisbecker #include <linux/preempt.h> /* in_interrupt() */ 370a835c4fSMatthew Wilcox #include <linux/radix-tree.h> 380a835c4fSMatthew Wilcox #include <linux/rcupdate.h> 390a835c4fSMatthew Wilcox #include <linux/slab.h> 400a835c4fSMatthew Wilcox #include <linux/string.h> 4102c02bf1SMatthew Wilcox #include <linux/xarray.h> 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds 4426fb1589SJeff Moyer /* 451da177e4SLinus Torvalds * Radix tree node cache. 461da177e4SLinus Torvalds */ 4758d6ea30SMatthew Wilcox struct kmem_cache *radix_tree_node_cachep; 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds /* 5055368052SNick Piggin * The radix tree is variable-height, so an insert operation not only has 5155368052SNick Piggin * to build the branch to its corresponding item, it also has to build the 5255368052SNick Piggin * branch to existing items if the size has to be increased (by 5355368052SNick Piggin * radix_tree_extend). 5455368052SNick Piggin * 5555368052SNick Piggin * The worst case is a zero height tree with just a single item at index 0, 5655368052SNick Piggin * and then inserting an item at index ULONG_MAX. This requires 2 new branches 5755368052SNick Piggin * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 5855368052SNick Piggin * Hence: 5955368052SNick Piggin */ 6055368052SNick Piggin #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 6155368052SNick Piggin 6255368052SNick Piggin /* 630a835c4fSMatthew Wilcox * The IDR does not have to be as high as the radix tree since it uses 640a835c4fSMatthew Wilcox * signed integers, not unsigned longs. 650a835c4fSMatthew Wilcox */ 660a835c4fSMatthew Wilcox #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 670a835c4fSMatthew Wilcox #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 680a835c4fSMatthew Wilcox RADIX_TREE_MAP_SHIFT)) 690a835c4fSMatthew Wilcox #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 700a835c4fSMatthew Wilcox 710a835c4fSMatthew Wilcox /* 727ad3d4d8SMatthew Wilcox * The IDA is even shorter since it uses a bitmap at the last level. 737ad3d4d8SMatthew Wilcox */ 747ad3d4d8SMatthew Wilcox #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 757ad3d4d8SMatthew Wilcox #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 767ad3d4d8SMatthew Wilcox RADIX_TREE_MAP_SHIFT)) 777ad3d4d8SMatthew Wilcox #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 787ad3d4d8SMatthew Wilcox 797ad3d4d8SMatthew Wilcox /* 801da177e4SLinus Torvalds * Per-cpu pool of preloaded nodes 811da177e4SLinus Torvalds */ 821da177e4SLinus Torvalds struct radix_tree_preload { 832fcd9005SMatthew Wilcox unsigned nr; 841293d5c5SMatthew Wilcox /* nodes->parent points to next preallocated node */ 859d2a8da0SKirill A. Shutemov struct radix_tree_node *nodes; 861da177e4SLinus Torvalds }; 878cef7d57SHarvey Harrison static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 881da177e4SLinus Torvalds 89148deab2SMatthew Wilcox static inline struct radix_tree_node *entry_to_node(void *ptr) 90148deab2SMatthew Wilcox { 91148deab2SMatthew Wilcox return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 92148deab2SMatthew Wilcox } 93148deab2SMatthew Wilcox 94a4db4dceSMatthew Wilcox static inline void *node_to_entry(void *ptr) 9527d20fddSNick Piggin { 9630ff46ccSMatthew Wilcox return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 9727d20fddSNick Piggin } 9827d20fddSNick Piggin 9902c02bf1SMatthew Wilcox #define RADIX_TREE_RETRY XA_RETRY_ENTRY 100db050f29SMatthew Wilcox 101d7b62727SMatthew Wilcox static inline unsigned long 102d7b62727SMatthew Wilcox get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 103db050f29SMatthew Wilcox { 10476f070b4SMatthew Wilcox return parent ? slot - parent->slots : 0; 105db050f29SMatthew Wilcox } 106db050f29SMatthew Wilcox 10735534c86SMatthew Wilcox static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 1089e85d811SMatthew Wilcox struct radix_tree_node **nodep, unsigned long index) 109db050f29SMatthew Wilcox { 1109e85d811SMatthew Wilcox unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 111d7b62727SMatthew Wilcox void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 112db050f29SMatthew Wilcox 113db050f29SMatthew Wilcox *nodep = (void *)entry; 114db050f29SMatthew Wilcox return offset; 115db050f29SMatthew Wilcox } 116db050f29SMatthew Wilcox 11735534c86SMatthew Wilcox static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 118612d6c19SNick Piggin { 119f8d5d0ccSMatthew Wilcox return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 120612d6c19SNick Piggin } 121612d6c19SNick Piggin 122643b52b9SNick Piggin static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 123643b52b9SNick Piggin int offset) 124643b52b9SNick Piggin { 125643b52b9SNick Piggin __set_bit(offset, node->tags[tag]); 126643b52b9SNick Piggin } 127643b52b9SNick Piggin 128643b52b9SNick Piggin static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 129643b52b9SNick Piggin int offset) 130643b52b9SNick Piggin { 131643b52b9SNick Piggin __clear_bit(offset, node->tags[tag]); 132643b52b9SNick Piggin } 133643b52b9SNick Piggin 13435534c86SMatthew Wilcox static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 135643b52b9SNick Piggin int offset) 136643b52b9SNick Piggin { 137643b52b9SNick Piggin return test_bit(offset, node->tags[tag]); 138643b52b9SNick Piggin } 139643b52b9SNick Piggin 14035534c86SMatthew Wilcox static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 141643b52b9SNick Piggin { 142f8d5d0ccSMatthew Wilcox root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 143643b52b9SNick Piggin } 144643b52b9SNick Piggin 1452fcd9005SMatthew Wilcox static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 146643b52b9SNick Piggin { 147f8d5d0ccSMatthew Wilcox root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 148643b52b9SNick Piggin } 149643b52b9SNick Piggin 150643b52b9SNick Piggin static inline void root_tag_clear_all(struct radix_tree_root *root) 151643b52b9SNick Piggin { 152f8d5d0ccSMatthew Wilcox root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 153643b52b9SNick Piggin } 154643b52b9SNick Piggin 15535534c86SMatthew Wilcox static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 156643b52b9SNick Piggin { 157f8d5d0ccSMatthew Wilcox return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 158643b52b9SNick Piggin } 159643b52b9SNick Piggin 16035534c86SMatthew Wilcox static inline unsigned root_tags_get(const struct radix_tree_root *root) 1617b60e9adSMatthew Wilcox { 162f8d5d0ccSMatthew Wilcox return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 1630a835c4fSMatthew Wilcox } 1640a835c4fSMatthew Wilcox 1650a835c4fSMatthew Wilcox static inline bool is_idr(const struct radix_tree_root *root) 1660a835c4fSMatthew Wilcox { 167f8d5d0ccSMatthew Wilcox return !!(root->xa_flags & ROOT_IS_IDR); 1687b60e9adSMatthew Wilcox } 1697b60e9adSMatthew Wilcox 170643b52b9SNick Piggin /* 171643b52b9SNick Piggin * Returns 1 if any slot in the node has this tag set. 172643b52b9SNick Piggin * Otherwise returns 0. 173643b52b9SNick Piggin */ 17435534c86SMatthew Wilcox static inline int any_tag_set(const struct radix_tree_node *node, 17535534c86SMatthew Wilcox unsigned int tag) 176643b52b9SNick Piggin { 1772fcd9005SMatthew Wilcox unsigned idx; 178643b52b9SNick Piggin for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 179643b52b9SNick Piggin if (node->tags[tag][idx]) 180643b52b9SNick Piggin return 1; 181643b52b9SNick Piggin } 182643b52b9SNick Piggin return 0; 183643b52b9SNick Piggin } 18478c1d784SKonstantin Khlebnikov 1850a835c4fSMatthew Wilcox static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 1860a835c4fSMatthew Wilcox { 1870a835c4fSMatthew Wilcox bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 1880a835c4fSMatthew Wilcox } 1890a835c4fSMatthew Wilcox 19078c1d784SKonstantin Khlebnikov /** 19178c1d784SKonstantin Khlebnikov * radix_tree_find_next_bit - find the next set bit in a memory region 19278c1d784SKonstantin Khlebnikov * 19378c1d784SKonstantin Khlebnikov * @addr: The address to base the search on 19478c1d784SKonstantin Khlebnikov * @size: The bitmap size in bits 19578c1d784SKonstantin Khlebnikov * @offset: The bitnumber to start searching at 19678c1d784SKonstantin Khlebnikov * 19778c1d784SKonstantin Khlebnikov * Unrollable variant of find_next_bit() for constant size arrays. 19878c1d784SKonstantin Khlebnikov * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 19978c1d784SKonstantin Khlebnikov * Returns next bit offset, or size if nothing found. 20078c1d784SKonstantin Khlebnikov */ 20178c1d784SKonstantin Khlebnikov static __always_inline unsigned long 202bc412fcaSMatthew Wilcox radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 203bc412fcaSMatthew Wilcox unsigned long offset) 20478c1d784SKonstantin Khlebnikov { 205bc412fcaSMatthew Wilcox const unsigned long *addr = node->tags[tag]; 20678c1d784SKonstantin Khlebnikov 207bc412fcaSMatthew Wilcox if (offset < RADIX_TREE_MAP_SIZE) { 20878c1d784SKonstantin Khlebnikov unsigned long tmp; 20978c1d784SKonstantin Khlebnikov 21078c1d784SKonstantin Khlebnikov addr += offset / BITS_PER_LONG; 21178c1d784SKonstantin Khlebnikov tmp = *addr >> (offset % BITS_PER_LONG); 21278c1d784SKonstantin Khlebnikov if (tmp) 21378c1d784SKonstantin Khlebnikov return __ffs(tmp) + offset; 21478c1d784SKonstantin Khlebnikov offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 215bc412fcaSMatthew Wilcox while (offset < RADIX_TREE_MAP_SIZE) { 21678c1d784SKonstantin Khlebnikov tmp = *++addr; 21778c1d784SKonstantin Khlebnikov if (tmp) 21878c1d784SKonstantin Khlebnikov return __ffs(tmp) + offset; 21978c1d784SKonstantin Khlebnikov offset += BITS_PER_LONG; 22078c1d784SKonstantin Khlebnikov } 22178c1d784SKonstantin Khlebnikov } 222bc412fcaSMatthew Wilcox return RADIX_TREE_MAP_SIZE; 22378c1d784SKonstantin Khlebnikov } 22478c1d784SKonstantin Khlebnikov 225268f42deSMatthew Wilcox static unsigned int iter_offset(const struct radix_tree_iter *iter) 226268f42deSMatthew Wilcox { 2273a08cd52SMatthew Wilcox return iter->index & RADIX_TREE_MAP_MASK; 228268f42deSMatthew Wilcox } 229268f42deSMatthew Wilcox 230218ed750SMatthew Wilcox /* 231218ed750SMatthew Wilcox * The maximum index which can be stored in a radix tree 232218ed750SMatthew Wilcox */ 233218ed750SMatthew Wilcox static inline unsigned long shift_maxindex(unsigned int shift) 234218ed750SMatthew Wilcox { 235218ed750SMatthew Wilcox return (RADIX_TREE_MAP_SIZE << shift) - 1; 236218ed750SMatthew Wilcox } 237218ed750SMatthew Wilcox 23835534c86SMatthew Wilcox static inline unsigned long node_maxindex(const struct radix_tree_node *node) 239218ed750SMatthew Wilcox { 240218ed750SMatthew Wilcox return shift_maxindex(node->shift); 241218ed750SMatthew Wilcox } 242218ed750SMatthew Wilcox 2430a835c4fSMatthew Wilcox static unsigned long next_index(unsigned long index, 2440a835c4fSMatthew Wilcox const struct radix_tree_node *node, 2450a835c4fSMatthew Wilcox unsigned long offset) 2460a835c4fSMatthew Wilcox { 2470a835c4fSMatthew Wilcox return (index & ~node_maxindex(node)) + (offset << node->shift); 2480a835c4fSMatthew Wilcox } 2490a835c4fSMatthew Wilcox 2501da177e4SLinus Torvalds /* 2511da177e4SLinus Torvalds * This assumes that the caller has performed appropriate preallocation, and 2521da177e4SLinus Torvalds * that the caller has pinned this thread of control to the current CPU. 2531da177e4SLinus Torvalds */ 2541da177e4SLinus Torvalds static struct radix_tree_node * 2550a835c4fSMatthew Wilcox radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 256d58275bcSMatthew Wilcox struct radix_tree_root *root, 257e8de4340SMatthew Wilcox unsigned int shift, unsigned int offset, 25801959dfeSMatthew Wilcox unsigned int count, unsigned int nr_values) 2591da177e4SLinus Torvalds { 260e2848a0eSNick Piggin struct radix_tree_node *ret = NULL; 2611da177e4SLinus Torvalds 2625e4c0d97SJan Kara /* 2632fcd9005SMatthew Wilcox * Preload code isn't irq safe and it doesn't make sense to use 2642fcd9005SMatthew Wilcox * preloading during an interrupt anyway as all the allocations have 2652fcd9005SMatthew Wilcox * to be atomic. So just do normal allocation when in interrupt. 2665e4c0d97SJan Kara */ 267d0164adcSMel Gorman if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 2681da177e4SLinus Torvalds struct radix_tree_preload *rtp; 2691da177e4SLinus Torvalds 270e2848a0eSNick Piggin /* 27158e698afSVladimir Davydov * Even if the caller has preloaded, try to allocate from the 27205eb6e72SVladimir Davydov * cache first for the new node to get accounted to the memory 27305eb6e72SVladimir Davydov * cgroup. 27458e698afSVladimir Davydov */ 27558e698afSVladimir Davydov ret = kmem_cache_alloc(radix_tree_node_cachep, 27605eb6e72SVladimir Davydov gfp_mask | __GFP_NOWARN); 27758e698afSVladimir Davydov if (ret) 27858e698afSVladimir Davydov goto out; 27958e698afSVladimir Davydov 28058e698afSVladimir Davydov /* 281e2848a0eSNick Piggin * Provided the caller has preloaded here, we will always 282e2848a0eSNick Piggin * succeed in getting a node here (and never reach 283e2848a0eSNick Piggin * kmem_cache_alloc) 284e2848a0eSNick Piggin */ 2857c8e0181SChristoph Lameter rtp = this_cpu_ptr(&radix_tree_preloads); 2861da177e4SLinus Torvalds if (rtp->nr) { 2879d2a8da0SKirill A. Shutemov ret = rtp->nodes; 2881293d5c5SMatthew Wilcox rtp->nodes = ret->parent; 2891da177e4SLinus Torvalds rtp->nr--; 2901da177e4SLinus Torvalds } 291ce80b067SCatalin Marinas /* 292ce80b067SCatalin Marinas * Update the allocation stack trace as this is more useful 293ce80b067SCatalin Marinas * for debugging. 294ce80b067SCatalin Marinas */ 295ce80b067SCatalin Marinas kmemleak_update_trace(ret); 29658e698afSVladimir Davydov goto out; 2971da177e4SLinus Torvalds } 29805eb6e72SVladimir Davydov ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 29958e698afSVladimir Davydov out: 300b194d16cSMatthew Wilcox BUG_ON(radix_tree_is_internal_node(ret)); 301e8de4340SMatthew Wilcox if (ret) { 302e8de4340SMatthew Wilcox ret->shift = shift; 303e8de4340SMatthew Wilcox ret->offset = offset; 304e8de4340SMatthew Wilcox ret->count = count; 30501959dfeSMatthew Wilcox ret->nr_values = nr_values; 306d58275bcSMatthew Wilcox ret->parent = parent; 30701959dfeSMatthew Wilcox ret->array = root; 308e8de4340SMatthew Wilcox } 3091da177e4SLinus Torvalds return ret; 3101da177e4SLinus Torvalds } 3111da177e4SLinus Torvalds 31258d6ea30SMatthew Wilcox void radix_tree_node_rcu_free(struct rcu_head *head) 3137cf9c2c7SNick Piggin { 3147cf9c2c7SNick Piggin struct radix_tree_node *node = 3157cf9c2c7SNick Piggin container_of(head, struct radix_tree_node, rcu_head); 316643b52b9SNick Piggin 317643b52b9SNick Piggin /* 318175542f5SMatthew Wilcox * Must only free zeroed nodes into the slab. We can be left with 319175542f5SMatthew Wilcox * non-NULL entries by radix_tree_free_nodes, so clear the entries 320175542f5SMatthew Wilcox * and tags here. 321643b52b9SNick Piggin */ 322175542f5SMatthew Wilcox memset(node->slots, 0, sizeof(node->slots)); 323175542f5SMatthew Wilcox memset(node->tags, 0, sizeof(node->tags)); 32491d9c05aSMatthew Wilcox INIT_LIST_HEAD(&node->private_list); 325643b52b9SNick Piggin 3267cf9c2c7SNick Piggin kmem_cache_free(radix_tree_node_cachep, node); 3277cf9c2c7SNick Piggin } 3287cf9c2c7SNick Piggin 3291da177e4SLinus Torvalds static inline void 3301da177e4SLinus Torvalds radix_tree_node_free(struct radix_tree_node *node) 3311da177e4SLinus Torvalds { 3327cf9c2c7SNick Piggin call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds /* 3361da177e4SLinus Torvalds * Load up this CPU's radix_tree_node buffer with sufficient objects to 3371da177e4SLinus Torvalds * ensure that the addition of a single element in the tree cannot fail. On 3381da177e4SLinus Torvalds * success, return zero, with preemption disabled. On error, return -ENOMEM 3391da177e4SLinus Torvalds * with preemption not disabled. 340b34df792SDavid Howells * 341b34df792SDavid Howells * To make use of this facility, the radix tree must be initialised without 342d0164adcSMel Gorman * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 3431da177e4SLinus Torvalds */ 344bc9ae224SEric Dumazet static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 3451da177e4SLinus Torvalds { 3461da177e4SLinus Torvalds struct radix_tree_preload *rtp; 3471da177e4SLinus Torvalds struct radix_tree_node *node; 3481da177e4SLinus Torvalds int ret = -ENOMEM; 3491da177e4SLinus Torvalds 35005eb6e72SVladimir Davydov /* 35105eb6e72SVladimir Davydov * Nodes preloaded by one cgroup can be be used by another cgroup, so 35205eb6e72SVladimir Davydov * they should never be accounted to any particular memory cgroup. 35305eb6e72SVladimir Davydov */ 35405eb6e72SVladimir Davydov gfp_mask &= ~__GFP_ACCOUNT; 35505eb6e72SVladimir Davydov 3561da177e4SLinus Torvalds preempt_disable(); 3577c8e0181SChristoph Lameter rtp = this_cpu_ptr(&radix_tree_preloads); 358c78c66d1SKirill A. Shutemov while (rtp->nr < nr) { 3591da177e4SLinus Torvalds preempt_enable(); 360488514d1SChristoph Lameter node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 3611da177e4SLinus Torvalds if (node == NULL) 3621da177e4SLinus Torvalds goto out; 3631da177e4SLinus Torvalds preempt_disable(); 3647c8e0181SChristoph Lameter rtp = this_cpu_ptr(&radix_tree_preloads); 365c78c66d1SKirill A. Shutemov if (rtp->nr < nr) { 3661293d5c5SMatthew Wilcox node->parent = rtp->nodes; 3679d2a8da0SKirill A. Shutemov rtp->nodes = node; 3689d2a8da0SKirill A. Shutemov rtp->nr++; 3699d2a8da0SKirill A. Shutemov } else { 3701da177e4SLinus Torvalds kmem_cache_free(radix_tree_node_cachep, node); 3711da177e4SLinus Torvalds } 3729d2a8da0SKirill A. Shutemov } 3731da177e4SLinus Torvalds ret = 0; 3741da177e4SLinus Torvalds out: 3751da177e4SLinus Torvalds return ret; 3761da177e4SLinus Torvalds } 3775e4c0d97SJan Kara 3785e4c0d97SJan Kara /* 3795e4c0d97SJan Kara * Load up this CPU's radix_tree_node buffer with sufficient objects to 3805e4c0d97SJan Kara * ensure that the addition of a single element in the tree cannot fail. On 3815e4c0d97SJan Kara * success, return zero, with preemption disabled. On error, return -ENOMEM 3825e4c0d97SJan Kara * with preemption not disabled. 3835e4c0d97SJan Kara * 3845e4c0d97SJan Kara * To make use of this facility, the radix tree must be initialised without 385d0164adcSMel Gorman * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 3865e4c0d97SJan Kara */ 3875e4c0d97SJan Kara int radix_tree_preload(gfp_t gfp_mask) 3885e4c0d97SJan Kara { 3895e4c0d97SJan Kara /* Warn on non-sensical use... */ 390d0164adcSMel Gorman WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 391c78c66d1SKirill A. Shutemov return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 3925e4c0d97SJan Kara } 393d7f0923dSDavid Chinner EXPORT_SYMBOL(radix_tree_preload); 3941da177e4SLinus Torvalds 3956e954b9eSNick Piggin /* 3965e4c0d97SJan Kara * The same as above function, except we don't guarantee preloading happens. 3975e4c0d97SJan Kara * We do it, if we decide it helps. On success, return zero with preemption 3985e4c0d97SJan Kara * disabled. On error, return -ENOMEM with preemption not disabled. 3995e4c0d97SJan Kara */ 4005e4c0d97SJan Kara int radix_tree_maybe_preload(gfp_t gfp_mask) 4015e4c0d97SJan Kara { 402d0164adcSMel Gorman if (gfpflags_allow_blocking(gfp_mask)) 403c78c66d1SKirill A. Shutemov return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 4045e4c0d97SJan Kara /* Preloading doesn't help anything with this gfp mask, skip it */ 4055e4c0d97SJan Kara preempt_disable(); 4065e4c0d97SJan Kara return 0; 4075e4c0d97SJan Kara } 4085e4c0d97SJan Kara EXPORT_SYMBOL(radix_tree_maybe_preload); 4095e4c0d97SJan Kara 41035534c86SMatthew Wilcox static unsigned radix_tree_load_root(const struct radix_tree_root *root, 4111456a439SMatthew Wilcox struct radix_tree_node **nodep, unsigned long *maxindex) 4121456a439SMatthew Wilcox { 413f8d5d0ccSMatthew Wilcox struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 4141456a439SMatthew Wilcox 4151456a439SMatthew Wilcox *nodep = node; 4161456a439SMatthew Wilcox 417b194d16cSMatthew Wilcox if (likely(radix_tree_is_internal_node(node))) { 4184dd6c098SMatthew Wilcox node = entry_to_node(node); 4191456a439SMatthew Wilcox *maxindex = node_maxindex(node); 420c12e51b0SMatthew Wilcox return node->shift + RADIX_TREE_MAP_SHIFT; 4211456a439SMatthew Wilcox } 4221456a439SMatthew Wilcox 4231456a439SMatthew Wilcox *maxindex = 0; 4241456a439SMatthew Wilcox return 0; 4251456a439SMatthew Wilcox } 4261456a439SMatthew Wilcox 4271da177e4SLinus Torvalds /* 4281da177e4SLinus Torvalds * Extend a radix tree so it can store key @index. 4291da177e4SLinus Torvalds */ 4300a835c4fSMatthew Wilcox static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 431d0891265SMatthew Wilcox unsigned long index, unsigned int shift) 4321da177e4SLinus Torvalds { 433d7b62727SMatthew Wilcox void *entry; 434d0891265SMatthew Wilcox unsigned int maxshift; 4351da177e4SLinus Torvalds int tag; 4361da177e4SLinus Torvalds 437d0891265SMatthew Wilcox /* Figure out what the shift should be. */ 438d0891265SMatthew Wilcox maxshift = shift; 439d0891265SMatthew Wilcox while (index > shift_maxindex(maxshift)) 440d0891265SMatthew Wilcox maxshift += RADIX_TREE_MAP_SHIFT; 4411da177e4SLinus Torvalds 442f8d5d0ccSMatthew Wilcox entry = rcu_dereference_raw(root->xa_head); 443d7b62727SMatthew Wilcox if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 4441da177e4SLinus Torvalds goto out; 4451da177e4SLinus Torvalds 4461da177e4SLinus Torvalds do { 4470a835c4fSMatthew Wilcox struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 448d58275bcSMatthew Wilcox root, shift, 0, 1, 0); 4492fcd9005SMatthew Wilcox if (!node) 4501da177e4SLinus Torvalds return -ENOMEM; 4511da177e4SLinus Torvalds 4520a835c4fSMatthew Wilcox if (is_idr(root)) { 4530a835c4fSMatthew Wilcox all_tag_set(node, IDR_FREE); 4540a835c4fSMatthew Wilcox if (!root_tag_get(root, IDR_FREE)) { 4550a835c4fSMatthew Wilcox tag_clear(node, IDR_FREE, 0); 4560a835c4fSMatthew Wilcox root_tag_set(root, IDR_FREE); 4570a835c4fSMatthew Wilcox } 4580a835c4fSMatthew Wilcox } else { 4590a835c4fSMatthew Wilcox /* Propagate the aggregated tag info to the new child */ 460daff89f3SJonathan Corbet for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 461612d6c19SNick Piggin if (root_tag_get(root, tag)) 4621da177e4SLinus Torvalds tag_set(node, tag, 0); 4631da177e4SLinus Torvalds } 4640a835c4fSMatthew Wilcox } 4651da177e4SLinus Torvalds 466d0891265SMatthew Wilcox BUG_ON(shift > BITS_PER_LONG); 467d7b62727SMatthew Wilcox if (radix_tree_is_internal_node(entry)) { 468d7b62727SMatthew Wilcox entry_to_node(entry)->parent = node; 4693159f943SMatthew Wilcox } else if (xa_is_value(entry)) { 47001959dfeSMatthew Wilcox /* Moving a value entry root->xa_head to a node */ 47101959dfeSMatthew Wilcox node->nr_values = 1; 472f7942430SJohannes Weiner } 473d7b62727SMatthew Wilcox /* 474d7b62727SMatthew Wilcox * entry was already in the radix tree, so we do not need 475d7b62727SMatthew Wilcox * rcu_assign_pointer here 476d7b62727SMatthew Wilcox */ 477d7b62727SMatthew Wilcox node->slots[0] = (void __rcu *)entry; 478d7b62727SMatthew Wilcox entry = node_to_entry(node); 479f8d5d0ccSMatthew Wilcox rcu_assign_pointer(root->xa_head, entry); 480d0891265SMatthew Wilcox shift += RADIX_TREE_MAP_SHIFT; 481d0891265SMatthew Wilcox } while (shift <= maxshift); 4821da177e4SLinus Torvalds out: 483d0891265SMatthew Wilcox return maxshift + RADIX_TREE_MAP_SHIFT; 4841da177e4SLinus Torvalds } 4851da177e4SLinus Torvalds 4861da177e4SLinus Torvalds /** 487f4b109c6SJohannes Weiner * radix_tree_shrink - shrink radix tree to minimum height 488f4b109c6SJohannes Weiner * @root radix tree root 489f4b109c6SJohannes Weiner */ 4901cf56f9dSMatthew Wilcox static inline bool radix_tree_shrink(struct radix_tree_root *root) 491f4b109c6SJohannes Weiner { 4920ac398efSMatthew Wilcox bool shrunk = false; 4930ac398efSMatthew Wilcox 494f4b109c6SJohannes Weiner for (;;) { 495f8d5d0ccSMatthew Wilcox struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 496f4b109c6SJohannes Weiner struct radix_tree_node *child; 497f4b109c6SJohannes Weiner 498f4b109c6SJohannes Weiner if (!radix_tree_is_internal_node(node)) 499f4b109c6SJohannes Weiner break; 500f4b109c6SJohannes Weiner node = entry_to_node(node); 501f4b109c6SJohannes Weiner 502f4b109c6SJohannes Weiner /* 503f4b109c6SJohannes Weiner * The candidate node has more than one child, or its child 5043a08cd52SMatthew Wilcox * is not at the leftmost slot, we cannot shrink. 505f4b109c6SJohannes Weiner */ 506f4b109c6SJohannes Weiner if (node->count != 1) 507f4b109c6SJohannes Weiner break; 50812320d0fSMatthew Wilcox child = rcu_dereference_raw(node->slots[0]); 509f4b109c6SJohannes Weiner if (!child) 510f4b109c6SJohannes Weiner break; 511f4b109c6SJohannes Weiner 51266ee620fSMatthew Wilcox /* 51366ee620fSMatthew Wilcox * For an IDR, we must not shrink entry 0 into the root in 51466ee620fSMatthew Wilcox * case somebody calls idr_replace() with a pointer that 51566ee620fSMatthew Wilcox * appears to be an internal entry 51666ee620fSMatthew Wilcox */ 51766ee620fSMatthew Wilcox if (!node->shift && is_idr(root)) 51866ee620fSMatthew Wilcox break; 51966ee620fSMatthew Wilcox 520f4b109c6SJohannes Weiner if (radix_tree_is_internal_node(child)) 521f4b109c6SJohannes Weiner entry_to_node(child)->parent = NULL; 522f4b109c6SJohannes Weiner 523f4b109c6SJohannes Weiner /* 524f4b109c6SJohannes Weiner * We don't need rcu_assign_pointer(), since we are simply 525f4b109c6SJohannes Weiner * moving the node from one part of the tree to another: if it 526f4b109c6SJohannes Weiner * was safe to dereference the old pointer to it 527f4b109c6SJohannes Weiner * (node->slots[0]), it will be safe to dereference the new 528f8d5d0ccSMatthew Wilcox * one (root->xa_head) as far as dependent read barriers go. 529f4b109c6SJohannes Weiner */ 530f8d5d0ccSMatthew Wilcox root->xa_head = (void __rcu *)child; 5310a835c4fSMatthew Wilcox if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 5320a835c4fSMatthew Wilcox root_tag_clear(root, IDR_FREE); 533f4b109c6SJohannes Weiner 534f4b109c6SJohannes Weiner /* 535f4b109c6SJohannes Weiner * We have a dilemma here. The node's slot[0] must not be 536f4b109c6SJohannes Weiner * NULLed in case there are concurrent lookups expecting to 537f4b109c6SJohannes Weiner * find the item. However if this was a bottom-level node, 538f4b109c6SJohannes Weiner * then it may be subject to the slot pointer being visible 539f4b109c6SJohannes Weiner * to callers dereferencing it. If item corresponding to 540f4b109c6SJohannes Weiner * slot[0] is subsequently deleted, these callers would expect 541f4b109c6SJohannes Weiner * their slot to become empty sooner or later. 542f4b109c6SJohannes Weiner * 543f4b109c6SJohannes Weiner * For example, lockless pagecache will look up a slot, deref 544f4b109c6SJohannes Weiner * the page pointer, and if the page has 0 refcount it means it 545f4b109c6SJohannes Weiner * was concurrently deleted from pagecache so try the deref 546f4b109c6SJohannes Weiner * again. Fortunately there is already a requirement for logic 547f4b109c6SJohannes Weiner * to retry the entire slot lookup -- the indirect pointer 548f4b109c6SJohannes Weiner * problem (replacing direct root node with an indirect pointer 549f4b109c6SJohannes Weiner * also results in a stale slot). So tag the slot as indirect 550f4b109c6SJohannes Weiner * to force callers to retry. 551f4b109c6SJohannes Weiner */ 5524d693d08SJohannes Weiner node->count = 0; 5534d693d08SJohannes Weiner if (!radix_tree_is_internal_node(child)) { 554d7b62727SMatthew Wilcox node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 5554d693d08SJohannes Weiner } 556f4b109c6SJohannes Weiner 557ea07b862SJohannes Weiner WARN_ON_ONCE(!list_empty(&node->private_list)); 558f4b109c6SJohannes Weiner radix_tree_node_free(node); 5590ac398efSMatthew Wilcox shrunk = true; 560f4b109c6SJohannes Weiner } 561f4b109c6SJohannes Weiner 5620ac398efSMatthew Wilcox return shrunk; 5630ac398efSMatthew Wilcox } 5640ac398efSMatthew Wilcox 5650ac398efSMatthew Wilcox static bool delete_node(struct radix_tree_root *root, 5661cf56f9dSMatthew Wilcox struct radix_tree_node *node) 567f4b109c6SJohannes Weiner { 5680ac398efSMatthew Wilcox bool deleted = false; 5690ac398efSMatthew Wilcox 570f4b109c6SJohannes Weiner do { 571f4b109c6SJohannes Weiner struct radix_tree_node *parent; 572f4b109c6SJohannes Weiner 573f4b109c6SJohannes Weiner if (node->count) { 57412320d0fSMatthew Wilcox if (node_to_entry(node) == 575f8d5d0ccSMatthew Wilcox rcu_dereference_raw(root->xa_head)) 5761cf56f9dSMatthew Wilcox deleted |= radix_tree_shrink(root); 5770ac398efSMatthew Wilcox return deleted; 578f4b109c6SJohannes Weiner } 579f4b109c6SJohannes Weiner 580f4b109c6SJohannes Weiner parent = node->parent; 581f4b109c6SJohannes Weiner if (parent) { 582f4b109c6SJohannes Weiner parent->slots[node->offset] = NULL; 583f4b109c6SJohannes Weiner parent->count--; 584f4b109c6SJohannes Weiner } else { 5850a835c4fSMatthew Wilcox /* 5860a835c4fSMatthew Wilcox * Shouldn't the tags already have all been cleared 5870a835c4fSMatthew Wilcox * by the caller? 5880a835c4fSMatthew Wilcox */ 5890a835c4fSMatthew Wilcox if (!is_idr(root)) 590f4b109c6SJohannes Weiner root_tag_clear_all(root); 591f8d5d0ccSMatthew Wilcox root->xa_head = NULL; 592f4b109c6SJohannes Weiner } 593f4b109c6SJohannes Weiner 594ea07b862SJohannes Weiner WARN_ON_ONCE(!list_empty(&node->private_list)); 595f4b109c6SJohannes Weiner radix_tree_node_free(node); 5960ac398efSMatthew Wilcox deleted = true; 597f4b109c6SJohannes Weiner 598f4b109c6SJohannes Weiner node = parent; 599f4b109c6SJohannes Weiner } while (node); 6000ac398efSMatthew Wilcox 6010ac398efSMatthew Wilcox return deleted; 602f4b109c6SJohannes Weiner } 603f4b109c6SJohannes Weiner 604f4b109c6SJohannes Weiner /** 605139e5616SJohannes Weiner * __radix_tree_create - create a slot in a radix tree 6061da177e4SLinus Torvalds * @root: radix tree root 6071da177e4SLinus Torvalds * @index: index key 608139e5616SJohannes Weiner * @nodep: returns node 609139e5616SJohannes Weiner * @slotp: returns slot 6101da177e4SLinus Torvalds * 611139e5616SJohannes Weiner * Create, if necessary, and return the node and slot for an item 612139e5616SJohannes Weiner * at position @index in the radix tree @root. 613139e5616SJohannes Weiner * 614139e5616SJohannes Weiner * Until there is more than one item in the tree, no nodes are 615f8d5d0ccSMatthew Wilcox * allocated and @root->xa_head is used as a direct slot instead of 616139e5616SJohannes Weiner * pointing to a node, in which case *@nodep will be NULL. 617139e5616SJohannes Weiner * 618139e5616SJohannes Weiner * Returns -ENOMEM, or 0 for success. 6191da177e4SLinus Torvalds */ 62074d60958SMatthew Wilcox static int __radix_tree_create(struct radix_tree_root *root, 6213a08cd52SMatthew Wilcox unsigned long index, struct radix_tree_node **nodep, 6223a08cd52SMatthew Wilcox void __rcu ***slotp) 6231da177e4SLinus Torvalds { 62489148aa4SMatthew Wilcox struct radix_tree_node *node = NULL, *child; 625f8d5d0ccSMatthew Wilcox void __rcu **slot = (void __rcu **)&root->xa_head; 62649ea6ebcSMatthew Wilcox unsigned long maxindex; 62789148aa4SMatthew Wilcox unsigned int shift, offset = 0; 6283a08cd52SMatthew Wilcox unsigned long max = index; 6290a835c4fSMatthew Wilcox gfp_t gfp = root_gfp_mask(root); 63049ea6ebcSMatthew Wilcox 63189148aa4SMatthew Wilcox shift = radix_tree_load_root(root, &child, &maxindex); 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds /* Make sure the tree is high enough. */ 63449ea6ebcSMatthew Wilcox if (max > maxindex) { 6350a835c4fSMatthew Wilcox int error = radix_tree_extend(root, gfp, max, shift); 63649ea6ebcSMatthew Wilcox if (error < 0) 6371da177e4SLinus Torvalds return error; 63849ea6ebcSMatthew Wilcox shift = error; 639f8d5d0ccSMatthew Wilcox child = rcu_dereference_raw(root->xa_head); 6401da177e4SLinus Torvalds } 6411da177e4SLinus Torvalds 6423a08cd52SMatthew Wilcox while (shift > 0) { 643c12e51b0SMatthew Wilcox shift -= RADIX_TREE_MAP_SHIFT; 64489148aa4SMatthew Wilcox if (child == NULL) { 6451da177e4SLinus Torvalds /* Have to add a child node. */ 646d58275bcSMatthew Wilcox child = radix_tree_node_alloc(gfp, node, root, shift, 647e8de4340SMatthew Wilcox offset, 0, 0); 64889148aa4SMatthew Wilcox if (!child) 6491da177e4SLinus Torvalds return -ENOMEM; 65089148aa4SMatthew Wilcox rcu_assign_pointer(*slot, node_to_entry(child)); 65189148aa4SMatthew Wilcox if (node) 6521da177e4SLinus Torvalds node->count++; 65389148aa4SMatthew Wilcox } else if (!radix_tree_is_internal_node(child)) 654e6145236SMatthew Wilcox break; 6551da177e4SLinus Torvalds 6561da177e4SLinus Torvalds /* Go a level down */ 65789148aa4SMatthew Wilcox node = entry_to_node(child); 6589e85d811SMatthew Wilcox offset = radix_tree_descend(node, &child, index); 65989148aa4SMatthew Wilcox slot = &node->slots[offset]; 660e6145236SMatthew Wilcox } 661e6145236SMatthew Wilcox 662139e5616SJohannes Weiner if (nodep) 663139e5616SJohannes Weiner *nodep = node; 664139e5616SJohannes Weiner if (slotp) 66589148aa4SMatthew Wilcox *slotp = slot; 666139e5616SJohannes Weiner return 0; 667139e5616SJohannes Weiner } 668139e5616SJohannes Weiner 669175542f5SMatthew Wilcox /* 670175542f5SMatthew Wilcox * Free any nodes below this node. The tree is presumed to not need 671175542f5SMatthew Wilcox * shrinking, and any user data in the tree is presumed to not need a 672175542f5SMatthew Wilcox * destructor called on it. If we need to add a destructor, we can 673175542f5SMatthew Wilcox * add that functionality later. Note that we may not clear tags or 674175542f5SMatthew Wilcox * slots from the tree as an RCU walker may still have a pointer into 675175542f5SMatthew Wilcox * this subtree. We could replace the entries with RADIX_TREE_RETRY, 676175542f5SMatthew Wilcox * but we'll still have to clear those in rcu_free. 677175542f5SMatthew Wilcox */ 678175542f5SMatthew Wilcox static void radix_tree_free_nodes(struct radix_tree_node *node) 679175542f5SMatthew Wilcox { 680175542f5SMatthew Wilcox unsigned offset = 0; 681175542f5SMatthew Wilcox struct radix_tree_node *child = entry_to_node(node); 682175542f5SMatthew Wilcox 683175542f5SMatthew Wilcox for (;;) { 68412320d0fSMatthew Wilcox void *entry = rcu_dereference_raw(child->slots[offset]); 68502c02bf1SMatthew Wilcox if (xa_is_node(entry) && child->shift) { 686175542f5SMatthew Wilcox child = entry_to_node(entry); 687175542f5SMatthew Wilcox offset = 0; 688175542f5SMatthew Wilcox continue; 689175542f5SMatthew Wilcox } 690175542f5SMatthew Wilcox offset++; 691175542f5SMatthew Wilcox while (offset == RADIX_TREE_MAP_SIZE) { 692175542f5SMatthew Wilcox struct radix_tree_node *old = child; 693175542f5SMatthew Wilcox offset = child->offset + 1; 694175542f5SMatthew Wilcox child = child->parent; 695dd040b6fSMatthew Wilcox WARN_ON_ONCE(!list_empty(&old->private_list)); 696175542f5SMatthew Wilcox radix_tree_node_free(old); 697175542f5SMatthew Wilcox if (old == entry_to_node(node)) 698175542f5SMatthew Wilcox return; 699175542f5SMatthew Wilcox } 700175542f5SMatthew Wilcox } 701175542f5SMatthew Wilcox } 702175542f5SMatthew Wilcox 703d7b62727SMatthew Wilcox static inline int insert_entries(struct radix_tree_node *node, 7043a08cd52SMatthew Wilcox void __rcu **slot, void *item, bool replace) 705175542f5SMatthew Wilcox { 706175542f5SMatthew Wilcox if (*slot) 707175542f5SMatthew Wilcox return -EEXIST; 708175542f5SMatthew Wilcox rcu_assign_pointer(*slot, item); 709175542f5SMatthew Wilcox if (node) { 710175542f5SMatthew Wilcox node->count++; 7113159f943SMatthew Wilcox if (xa_is_value(item)) 71201959dfeSMatthew Wilcox node->nr_values++; 713175542f5SMatthew Wilcox } 714175542f5SMatthew Wilcox return 1; 715175542f5SMatthew Wilcox } 716175542f5SMatthew Wilcox 717139e5616SJohannes Weiner /** 718e6145236SMatthew Wilcox * __radix_tree_insert - insert into a radix tree 719139e5616SJohannes Weiner * @root: radix tree root 720139e5616SJohannes Weiner * @index: index key 721139e5616SJohannes Weiner * @item: item to insert 722139e5616SJohannes Weiner * 723139e5616SJohannes Weiner * Insert an item into the radix tree at position @index. 724139e5616SJohannes Weiner */ 7253a08cd52SMatthew Wilcox int radix_tree_insert(struct radix_tree_root *root, unsigned long index, 7263a08cd52SMatthew Wilcox void *item) 727139e5616SJohannes Weiner { 728139e5616SJohannes Weiner struct radix_tree_node *node; 729d7b62727SMatthew Wilcox void __rcu **slot; 730139e5616SJohannes Weiner int error; 731139e5616SJohannes Weiner 732b194d16cSMatthew Wilcox BUG_ON(radix_tree_is_internal_node(item)); 733139e5616SJohannes Weiner 7343a08cd52SMatthew Wilcox error = __radix_tree_create(root, index, &node, &slot); 735139e5616SJohannes Weiner if (error) 736139e5616SJohannes Weiner return error; 737175542f5SMatthew Wilcox 7383a08cd52SMatthew Wilcox error = insert_entries(node, slot, item, false); 739175542f5SMatthew Wilcox if (error < 0) 740175542f5SMatthew Wilcox return error; 741201b6264SChristoph Lameter 742612d6c19SNick Piggin if (node) { 7437b60e9adSMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 7447b60e9adSMatthew Wilcox BUG_ON(tag_get(node, 0, offset)); 7457b60e9adSMatthew Wilcox BUG_ON(tag_get(node, 1, offset)); 7467b60e9adSMatthew Wilcox BUG_ON(tag_get(node, 2, offset)); 747612d6c19SNick Piggin } else { 7487b60e9adSMatthew Wilcox BUG_ON(root_tags_get(root)); 749612d6c19SNick Piggin } 7501da177e4SLinus Torvalds 7511da177e4SLinus Torvalds return 0; 7521da177e4SLinus Torvalds } 7533a08cd52SMatthew Wilcox EXPORT_SYMBOL(radix_tree_insert); 7541da177e4SLinus Torvalds 755139e5616SJohannes Weiner /** 756139e5616SJohannes Weiner * __radix_tree_lookup - lookup an item in a radix tree 757139e5616SJohannes Weiner * @root: radix tree root 758139e5616SJohannes Weiner * @index: index key 759139e5616SJohannes Weiner * @nodep: returns node 760139e5616SJohannes Weiner * @slotp: returns slot 761139e5616SJohannes Weiner * 762139e5616SJohannes Weiner * Lookup and return the item at position @index in the radix 763139e5616SJohannes Weiner * tree @root. 764139e5616SJohannes Weiner * 765139e5616SJohannes Weiner * Until there is more than one item in the tree, no nodes are 766f8d5d0ccSMatthew Wilcox * allocated and @root->xa_head is used as a direct slot instead of 767139e5616SJohannes Weiner * pointing to a node, in which case *@nodep will be NULL. 768a4331366SHans Reiser */ 76935534c86SMatthew Wilcox void *__radix_tree_lookup(const struct radix_tree_root *root, 77035534c86SMatthew Wilcox unsigned long index, struct radix_tree_node **nodep, 771d7b62727SMatthew Wilcox void __rcu ***slotp) 772a4331366SHans Reiser { 773139e5616SJohannes Weiner struct radix_tree_node *node, *parent; 77485829954SMatthew Wilcox unsigned long maxindex; 775d7b62727SMatthew Wilcox void __rcu **slot; 7767cf9c2c7SNick Piggin 77785829954SMatthew Wilcox restart: 77885829954SMatthew Wilcox parent = NULL; 779f8d5d0ccSMatthew Wilcox slot = (void __rcu **)&root->xa_head; 7809e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 78185829954SMatthew Wilcox if (index > maxindex) 7827cf9c2c7SNick Piggin return NULL; 7837cf9c2c7SNick Piggin 784b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 78585829954SMatthew Wilcox unsigned offset; 786139e5616SJohannes Weiner 7874dd6c098SMatthew Wilcox parent = entry_to_node(node); 7889e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 78985829954SMatthew Wilcox slot = parent->slots + offset; 790*eff3860bSMatthew Wilcox if (node == RADIX_TREE_RETRY) 791*eff3860bSMatthew Wilcox goto restart; 79266ee620fSMatthew Wilcox if (parent->shift == 0) 79366ee620fSMatthew Wilcox break; 79485829954SMatthew Wilcox } 7957cf9c2c7SNick Piggin 796139e5616SJohannes Weiner if (nodep) 797139e5616SJohannes Weiner *nodep = parent; 798139e5616SJohannes Weiner if (slotp) 799139e5616SJohannes Weiner *slotp = slot; 800139e5616SJohannes Weiner return node; 801b72b71c6SHuang Shijie } 802b72b71c6SHuang Shijie 803b72b71c6SHuang Shijie /** 804b72b71c6SHuang Shijie * radix_tree_lookup_slot - lookup a slot in a radix tree 805b72b71c6SHuang Shijie * @root: radix tree root 806b72b71c6SHuang Shijie * @index: index key 807b72b71c6SHuang Shijie * 808b72b71c6SHuang Shijie * Returns: the slot corresponding to the position @index in the 809b72b71c6SHuang Shijie * radix tree @root. This is useful for update-if-exists operations. 810b72b71c6SHuang Shijie * 811b72b71c6SHuang Shijie * This function can be called under rcu_read_lock iff the slot is not 812b72b71c6SHuang Shijie * modified by radix_tree_replace_slot, otherwise it must be called 813b72b71c6SHuang Shijie * exclusive from other writers. Any dereference of the slot must be done 814b72b71c6SHuang Shijie * using radix_tree_deref_slot. 815b72b71c6SHuang Shijie */ 816d7b62727SMatthew Wilcox void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 81735534c86SMatthew Wilcox unsigned long index) 818b72b71c6SHuang Shijie { 819d7b62727SMatthew Wilcox void __rcu **slot; 820139e5616SJohannes Weiner 821139e5616SJohannes Weiner if (!__radix_tree_lookup(root, index, NULL, &slot)) 822139e5616SJohannes Weiner return NULL; 823139e5616SJohannes Weiner return slot; 824a4331366SHans Reiser } 825a4331366SHans Reiser EXPORT_SYMBOL(radix_tree_lookup_slot); 826a4331366SHans Reiser 8271da177e4SLinus Torvalds /** 8281da177e4SLinus Torvalds * radix_tree_lookup - perform lookup operation on a radix tree 8291da177e4SLinus Torvalds * @root: radix tree root 8301da177e4SLinus Torvalds * @index: index key 8311da177e4SLinus Torvalds * 8321da177e4SLinus Torvalds * Lookup the item at the position @index in the radix tree @root. 8337cf9c2c7SNick Piggin * 8347cf9c2c7SNick Piggin * This function can be called under rcu_read_lock, however the caller 8357cf9c2c7SNick Piggin * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 8367cf9c2c7SNick Piggin * them safely). No RCU barriers are required to access or modify the 8377cf9c2c7SNick Piggin * returned item, however. 8381da177e4SLinus Torvalds */ 83935534c86SMatthew Wilcox void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 8401da177e4SLinus Torvalds { 841139e5616SJohannes Weiner return __radix_tree_lookup(root, index, NULL, NULL); 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_lookup); 8441da177e4SLinus Torvalds 845d7b62727SMatthew Wilcox static void replace_slot(void __rcu **slot, void *item, 84601959dfeSMatthew Wilcox struct radix_tree_node *node, int count, int values) 8476d75f366SJohannes Weiner { 84801959dfeSMatthew Wilcox if (node && (count || values)) { 849f4b109c6SJohannes Weiner node->count += count; 85001959dfeSMatthew Wilcox node->nr_values += values; 851a90eb3a2SMatthew Wilcox } 8526d75f366SJohannes Weiner 8536d75f366SJohannes Weiner rcu_assign_pointer(*slot, item); 8546d75f366SJohannes Weiner } 8556d75f366SJohannes Weiner 8560a835c4fSMatthew Wilcox static bool node_tag_get(const struct radix_tree_root *root, 8570a835c4fSMatthew Wilcox const struct radix_tree_node *node, 8580a835c4fSMatthew Wilcox unsigned int tag, unsigned int offset) 859a90eb3a2SMatthew Wilcox { 8600a835c4fSMatthew Wilcox if (node) 8610a835c4fSMatthew Wilcox return tag_get(node, tag, offset); 8620a835c4fSMatthew Wilcox return root_tag_get(root, tag); 863a90eb3a2SMatthew Wilcox } 8640a835c4fSMatthew Wilcox 8650a835c4fSMatthew Wilcox /* 8660a835c4fSMatthew Wilcox * IDR users want to be able to store NULL in the tree, so if the slot isn't 8670a835c4fSMatthew Wilcox * free, don't adjust the count, even if it's transitioning between NULL and 8680a835c4fSMatthew Wilcox * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 8690a835c4fSMatthew Wilcox * have empty bits, but it only stores NULL in slots when they're being 8700a835c4fSMatthew Wilcox * deleted. 8710a835c4fSMatthew Wilcox */ 8720a835c4fSMatthew Wilcox static int calculate_count(struct radix_tree_root *root, 873d7b62727SMatthew Wilcox struct radix_tree_node *node, void __rcu **slot, 8740a835c4fSMatthew Wilcox void *item, void *old) 8750a835c4fSMatthew Wilcox { 8760a835c4fSMatthew Wilcox if (is_idr(root)) { 8770a835c4fSMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 8780a835c4fSMatthew Wilcox bool free = node_tag_get(root, node, IDR_FREE, offset); 8790a835c4fSMatthew Wilcox if (!free) 8800a835c4fSMatthew Wilcox return 0; 8810a835c4fSMatthew Wilcox if (!old) 8820a835c4fSMatthew Wilcox return 1; 8830a835c4fSMatthew Wilcox } 8840a835c4fSMatthew Wilcox return !!item - !!old; 885a90eb3a2SMatthew Wilcox } 886a90eb3a2SMatthew Wilcox 8871da177e4SLinus Torvalds /** 888f7942430SJohannes Weiner * __radix_tree_replace - replace item in a slot 889f7942430SJohannes Weiner * @root: radix tree root 890f7942430SJohannes Weiner * @node: pointer to tree node 891f7942430SJohannes Weiner * @slot: pointer to slot in @node 892f7942430SJohannes Weiner * @item: new item to store in the slot. 893f7942430SJohannes Weiner * 894f7942430SJohannes Weiner * For use with __radix_tree_lookup(). Caller must hold tree write locked 895f7942430SJohannes Weiner * across slot lookup and replacement. 896f7942430SJohannes Weiner */ 897f7942430SJohannes Weiner void __radix_tree_replace(struct radix_tree_root *root, 898f7942430SJohannes Weiner struct radix_tree_node *node, 8991cf56f9dSMatthew Wilcox void __rcu **slot, void *item) 900f7942430SJohannes Weiner { 9010a835c4fSMatthew Wilcox void *old = rcu_dereference_raw(*slot); 90201959dfeSMatthew Wilcox int values = !!xa_is_value(item) - !!xa_is_value(old); 9030a835c4fSMatthew Wilcox int count = calculate_count(root, node, slot, item, old); 9040a835c4fSMatthew Wilcox 9056d75f366SJohannes Weiner /* 90601959dfeSMatthew Wilcox * This function supports replacing value entries and 907f4b109c6SJohannes Weiner * deleting entries, but that needs accounting against the 908f8d5d0ccSMatthew Wilcox * node unless the slot is root->xa_head. 9096d75f366SJohannes Weiner */ 910f8d5d0ccSMatthew Wilcox WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 91101959dfeSMatthew Wilcox (count || values)); 91201959dfeSMatthew Wilcox replace_slot(slot, item, node, count, values); 913f4b109c6SJohannes Weiner 9144d693d08SJohannes Weiner if (!node) 9154d693d08SJohannes Weiner return; 9164d693d08SJohannes Weiner 9171cf56f9dSMatthew Wilcox delete_node(root, node); 9186d75f366SJohannes Weiner } 919f7942430SJohannes Weiner 9206d75f366SJohannes Weiner /** 9216d75f366SJohannes Weiner * radix_tree_replace_slot - replace item in a slot 9226d75f366SJohannes Weiner * @root: radix tree root 9236d75f366SJohannes Weiner * @slot: pointer to slot 9246d75f366SJohannes Weiner * @item: new item to store in the slot. 9256d75f366SJohannes Weiner * 9267b8d046fSMatthew Wilcox * For use with radix_tree_lookup_slot() and 9276d75f366SJohannes Weiner * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 9286d75f366SJohannes Weiner * across slot lookup and replacement. 9296d75f366SJohannes Weiner * 9306d75f366SJohannes Weiner * NOTE: This cannot be used to switch between non-entries (empty slots), 93101959dfeSMatthew Wilcox * regular entries, and value entries, as that requires accounting 932f4b109c6SJohannes Weiner * inside the radix tree node. When switching from one type of entry or 933e157b555SMatthew Wilcox * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 934e157b555SMatthew Wilcox * radix_tree_iter_replace(). 9356d75f366SJohannes Weiner */ 9366d75f366SJohannes Weiner void radix_tree_replace_slot(struct radix_tree_root *root, 937d7b62727SMatthew Wilcox void __rcu **slot, void *item) 9386d75f366SJohannes Weiner { 9391cf56f9dSMatthew Wilcox __radix_tree_replace(root, NULL, slot, item); 940f7942430SJohannes Weiner } 94110257d71SSong Liu EXPORT_SYMBOL(radix_tree_replace_slot); 942f7942430SJohannes Weiner 943e157b555SMatthew Wilcox /** 944e157b555SMatthew Wilcox * radix_tree_iter_replace - replace item in a slot 945e157b555SMatthew Wilcox * @root: radix tree root 946e157b555SMatthew Wilcox * @slot: pointer to slot 947e157b555SMatthew Wilcox * @item: new item to store in the slot. 948e157b555SMatthew Wilcox * 9492956c664SMatthew Wilcox * For use with radix_tree_for_each_slot(). 9502956c664SMatthew Wilcox * Caller must hold tree write locked. 951e157b555SMatthew Wilcox */ 952e157b555SMatthew Wilcox void radix_tree_iter_replace(struct radix_tree_root *root, 953d7b62727SMatthew Wilcox const struct radix_tree_iter *iter, 954d7b62727SMatthew Wilcox void __rcu **slot, void *item) 955e157b555SMatthew Wilcox { 9561cf56f9dSMatthew Wilcox __radix_tree_replace(root, iter->node, slot, item); 957e157b555SMatthew Wilcox } 958e157b555SMatthew Wilcox 95930b888baSMatthew Wilcox static void node_tag_set(struct radix_tree_root *root, 96030b888baSMatthew Wilcox struct radix_tree_node *node, 96130b888baSMatthew Wilcox unsigned int tag, unsigned int offset) 96230b888baSMatthew Wilcox { 96330b888baSMatthew Wilcox while (node) { 96430b888baSMatthew Wilcox if (tag_get(node, tag, offset)) 96530b888baSMatthew Wilcox return; 96630b888baSMatthew Wilcox tag_set(node, tag, offset); 96730b888baSMatthew Wilcox offset = node->offset; 96830b888baSMatthew Wilcox node = node->parent; 96930b888baSMatthew Wilcox } 97030b888baSMatthew Wilcox 97130b888baSMatthew Wilcox if (!root_tag_get(root, tag)) 97230b888baSMatthew Wilcox root_tag_set(root, tag); 97330b888baSMatthew Wilcox } 97430b888baSMatthew Wilcox 9751da177e4SLinus Torvalds /** 9761da177e4SLinus Torvalds * radix_tree_tag_set - set a tag on a radix tree node 9771da177e4SLinus Torvalds * @root: radix tree root 9781da177e4SLinus Torvalds * @index: index key 9791da177e4SLinus Torvalds * @tag: tag index 9801da177e4SLinus Torvalds * 981daff89f3SJonathan Corbet * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 982daff89f3SJonathan Corbet * corresponding to @index in the radix tree. From 9831da177e4SLinus Torvalds * the root all the way down to the leaf node. 9841da177e4SLinus Torvalds * 9851da177e4SLinus Torvalds * Returns the address of the tagged item. Setting a tag on a not-present 9861da177e4SLinus Torvalds * item is a bug. 9871da177e4SLinus Torvalds */ 9881da177e4SLinus Torvalds void *radix_tree_tag_set(struct radix_tree_root *root, 989daff89f3SJonathan Corbet unsigned long index, unsigned int tag) 9901da177e4SLinus Torvalds { 991fb969909SRoss Zwisler struct radix_tree_node *node, *parent; 992fb969909SRoss Zwisler unsigned long maxindex; 9931da177e4SLinus Torvalds 9949e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 995fb969909SRoss Zwisler BUG_ON(index > maxindex); 9961da177e4SLinus Torvalds 997b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 998fb969909SRoss Zwisler unsigned offset; 9991da177e4SLinus Torvalds 10004dd6c098SMatthew Wilcox parent = entry_to_node(node); 10019e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 1002fb969909SRoss Zwisler BUG_ON(!node); 1003fb969909SRoss Zwisler 1004fb969909SRoss Zwisler if (!tag_get(parent, tag, offset)) 1005fb969909SRoss Zwisler tag_set(parent, tag, offset); 10061da177e4SLinus Torvalds } 10071da177e4SLinus Torvalds 1008612d6c19SNick Piggin /* set the root's tag bit */ 1009fb969909SRoss Zwisler if (!root_tag_get(root, tag)) 1010612d6c19SNick Piggin root_tag_set(root, tag); 1011612d6c19SNick Piggin 1012fb969909SRoss Zwisler return node; 10131da177e4SLinus Torvalds } 10141da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_set); 10151da177e4SLinus Torvalds 1016d604c324SMatthew Wilcox static void node_tag_clear(struct radix_tree_root *root, 1017d604c324SMatthew Wilcox struct radix_tree_node *node, 1018d604c324SMatthew Wilcox unsigned int tag, unsigned int offset) 1019d604c324SMatthew Wilcox { 1020d604c324SMatthew Wilcox while (node) { 1021d604c324SMatthew Wilcox if (!tag_get(node, tag, offset)) 1022d604c324SMatthew Wilcox return; 1023d604c324SMatthew Wilcox tag_clear(node, tag, offset); 1024d604c324SMatthew Wilcox if (any_tag_set(node, tag)) 1025d604c324SMatthew Wilcox return; 1026d604c324SMatthew Wilcox 1027d604c324SMatthew Wilcox offset = node->offset; 1028d604c324SMatthew Wilcox node = node->parent; 1029d604c324SMatthew Wilcox } 1030d604c324SMatthew Wilcox 1031d604c324SMatthew Wilcox /* clear the root's tag bit */ 1032d604c324SMatthew Wilcox if (root_tag_get(root, tag)) 1033d604c324SMatthew Wilcox root_tag_clear(root, tag); 1034d604c324SMatthew Wilcox } 1035d604c324SMatthew Wilcox 1036268f42deSMatthew Wilcox /** 10371da177e4SLinus Torvalds * radix_tree_tag_clear - clear a tag on a radix tree node 10381da177e4SLinus Torvalds * @root: radix tree root 10391da177e4SLinus Torvalds * @index: index key 10401da177e4SLinus Torvalds * @tag: tag index 10411da177e4SLinus Torvalds * 1042daff89f3SJonathan Corbet * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 10432fcd9005SMatthew Wilcox * corresponding to @index in the radix tree. If this causes 10442fcd9005SMatthew Wilcox * the leaf node to have no tags set then clear the tag in the 10451da177e4SLinus Torvalds * next-to-leaf node, etc. 10461da177e4SLinus Torvalds * 10471da177e4SLinus Torvalds * Returns the address of the tagged item on success, else NULL. ie: 10481da177e4SLinus Torvalds * has the same return value and semantics as radix_tree_lookup(). 10491da177e4SLinus Torvalds */ 10501da177e4SLinus Torvalds void *radix_tree_tag_clear(struct radix_tree_root *root, 1051daff89f3SJonathan Corbet unsigned long index, unsigned int tag) 10521da177e4SLinus Torvalds { 105300f47b58SRoss Zwisler struct radix_tree_node *node, *parent; 105400f47b58SRoss Zwisler unsigned long maxindex; 1055e2bdb933SHugh Dickins int uninitialized_var(offset); 10561da177e4SLinus Torvalds 10579e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 105800f47b58SRoss Zwisler if (index > maxindex) 105900f47b58SRoss Zwisler return NULL; 10601da177e4SLinus Torvalds 106100f47b58SRoss Zwisler parent = NULL; 10621da177e4SLinus Torvalds 1063b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 10644dd6c098SMatthew Wilcox parent = entry_to_node(node); 10659e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 10661da177e4SLinus Torvalds } 10671da177e4SLinus Torvalds 1068d604c324SMatthew Wilcox if (node) 1069d604c324SMatthew Wilcox node_tag_clear(root, parent, tag, offset); 10701da177e4SLinus Torvalds 107100f47b58SRoss Zwisler return node; 10721da177e4SLinus Torvalds } 10731da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_clear); 10741da177e4SLinus Torvalds 10751da177e4SLinus Torvalds /** 107630b888baSMatthew Wilcox * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 107730b888baSMatthew Wilcox * @root: radix tree root 107830b888baSMatthew Wilcox * @iter: iterator state 107930b888baSMatthew Wilcox * @tag: tag to clear 108030b888baSMatthew Wilcox */ 108130b888baSMatthew Wilcox void radix_tree_iter_tag_clear(struct radix_tree_root *root, 108230b888baSMatthew Wilcox const struct radix_tree_iter *iter, unsigned int tag) 108330b888baSMatthew Wilcox { 108430b888baSMatthew Wilcox node_tag_clear(root, iter->node, tag, iter_offset(iter)); 108530b888baSMatthew Wilcox } 108630b888baSMatthew Wilcox 108730b888baSMatthew Wilcox /** 10881da177e4SLinus Torvalds * radix_tree_tag_get - get a tag on a radix tree node 10891da177e4SLinus Torvalds * @root: radix tree root 10901da177e4SLinus Torvalds * @index: index key 1091daff89f3SJonathan Corbet * @tag: tag index (< RADIX_TREE_MAX_TAGS) 10921da177e4SLinus Torvalds * 109332605a18SMarcelo Tosatti * Return values: 10941da177e4SLinus Torvalds * 1095612d6c19SNick Piggin * 0: tag not present or not set 1096612d6c19SNick Piggin * 1: tag set 1097ce82653dSDavid Howells * 1098ce82653dSDavid Howells * Note that the return value of this function may not be relied on, even if 1099ce82653dSDavid Howells * the RCU lock is held, unless tag modification and node deletion are excluded 1100ce82653dSDavid Howells * from concurrency. 11011da177e4SLinus Torvalds */ 110235534c86SMatthew Wilcox int radix_tree_tag_get(const struct radix_tree_root *root, 1103daff89f3SJonathan Corbet unsigned long index, unsigned int tag) 11041da177e4SLinus Torvalds { 11054589ba6dSRoss Zwisler struct radix_tree_node *node, *parent; 11064589ba6dSRoss Zwisler unsigned long maxindex; 11071da177e4SLinus Torvalds 1108612d6c19SNick Piggin if (!root_tag_get(root, tag)) 1109612d6c19SNick Piggin return 0; 1110612d6c19SNick Piggin 11119e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 11124589ba6dSRoss Zwisler if (index > maxindex) 11134589ba6dSRoss Zwisler return 0; 11147cf9c2c7SNick Piggin 1115b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 11169e85d811SMatthew Wilcox unsigned offset; 11174589ba6dSRoss Zwisler 11184dd6c098SMatthew Wilcox parent = entry_to_node(node); 11199e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 11204589ba6dSRoss Zwisler 11214589ba6dSRoss Zwisler if (!tag_get(parent, tag, offset)) 11224589ba6dSRoss Zwisler return 0; 11234589ba6dSRoss Zwisler if (node == RADIX_TREE_RETRY) 11244589ba6dSRoss Zwisler break; 11251da177e4SLinus Torvalds } 11264589ba6dSRoss Zwisler 11274589ba6dSRoss Zwisler return 1; 11281da177e4SLinus Torvalds } 11291da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_get); 11301da177e4SLinus Torvalds 1131148deab2SMatthew Wilcox /* Construct iter->tags bit-mask from node->tags[tag] array */ 1132148deab2SMatthew Wilcox static void set_iter_tags(struct radix_tree_iter *iter, 1133148deab2SMatthew Wilcox struct radix_tree_node *node, unsigned offset, 1134148deab2SMatthew Wilcox unsigned tag) 1135148deab2SMatthew Wilcox { 1136148deab2SMatthew Wilcox unsigned tag_long = offset / BITS_PER_LONG; 1137148deab2SMatthew Wilcox unsigned tag_bit = offset % BITS_PER_LONG; 1138148deab2SMatthew Wilcox 11390a835c4fSMatthew Wilcox if (!node) { 11400a835c4fSMatthew Wilcox iter->tags = 1; 11410a835c4fSMatthew Wilcox return; 11420a835c4fSMatthew Wilcox } 11430a835c4fSMatthew Wilcox 1144148deab2SMatthew Wilcox iter->tags = node->tags[tag][tag_long] >> tag_bit; 1145148deab2SMatthew Wilcox 1146148deab2SMatthew Wilcox /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1147148deab2SMatthew Wilcox if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1148148deab2SMatthew Wilcox /* Pick tags from next element */ 1149148deab2SMatthew Wilcox if (tag_bit) 1150148deab2SMatthew Wilcox iter->tags |= node->tags[tag][tag_long + 1] << 1151148deab2SMatthew Wilcox (BITS_PER_LONG - tag_bit); 1152148deab2SMatthew Wilcox /* Clip chunk size, here only BITS_PER_LONG tags */ 1153148deab2SMatthew Wilcox iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1154148deab2SMatthew Wilcox } 1155148deab2SMatthew Wilcox } 1156148deab2SMatthew Wilcox 1157d7b62727SMatthew Wilcox void __rcu **radix_tree_iter_resume(void __rcu **slot, 1158d7b62727SMatthew Wilcox struct radix_tree_iter *iter) 1159148deab2SMatthew Wilcox { 1160148deab2SMatthew Wilcox slot++; 1161148deab2SMatthew Wilcox iter->index = __radix_tree_iter_add(iter, 1); 1162148deab2SMatthew Wilcox iter->next_index = iter->index; 1163148deab2SMatthew Wilcox iter->tags = 0; 1164148deab2SMatthew Wilcox return NULL; 1165148deab2SMatthew Wilcox } 1166148deab2SMatthew Wilcox EXPORT_SYMBOL(radix_tree_iter_resume); 1167148deab2SMatthew Wilcox 11686df8ba4fSFengguang Wu /** 116978c1d784SKonstantin Khlebnikov * radix_tree_next_chunk - find next chunk of slots for iteration 117078c1d784SKonstantin Khlebnikov * 117178c1d784SKonstantin Khlebnikov * @root: radix tree root 117278c1d784SKonstantin Khlebnikov * @iter: iterator state 117378c1d784SKonstantin Khlebnikov * @flags: RADIX_TREE_ITER_* flags and tag index 117478c1d784SKonstantin Khlebnikov * Returns: pointer to chunk first slot, or NULL if iteration is over 117578c1d784SKonstantin Khlebnikov */ 1176d7b62727SMatthew Wilcox void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 117778c1d784SKonstantin Khlebnikov struct radix_tree_iter *iter, unsigned flags) 117878c1d784SKonstantin Khlebnikov { 11799e85d811SMatthew Wilcox unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 11808c1244deSMatthew Wilcox struct radix_tree_node *node, *child; 118121ef5339SRoss Zwisler unsigned long index, offset, maxindex; 118278c1d784SKonstantin Khlebnikov 118378c1d784SKonstantin Khlebnikov if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 118478c1d784SKonstantin Khlebnikov return NULL; 118578c1d784SKonstantin Khlebnikov 118678c1d784SKonstantin Khlebnikov /* 118778c1d784SKonstantin Khlebnikov * Catch next_index overflow after ~0UL. iter->index never overflows 118878c1d784SKonstantin Khlebnikov * during iterating; it can be zero only at the beginning. 118978c1d784SKonstantin Khlebnikov * And we cannot overflow iter->next_index in a single step, 119078c1d784SKonstantin Khlebnikov * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1191fffaee36SKonstantin Khlebnikov * 1192fffaee36SKonstantin Khlebnikov * This condition also used by radix_tree_next_slot() to stop 119391b9677cSMatthew Wilcox * contiguous iterating, and forbid switching to the next chunk. 119478c1d784SKonstantin Khlebnikov */ 119578c1d784SKonstantin Khlebnikov index = iter->next_index; 119678c1d784SKonstantin Khlebnikov if (!index && iter->index) 119778c1d784SKonstantin Khlebnikov return NULL; 119878c1d784SKonstantin Khlebnikov 119921ef5339SRoss Zwisler restart: 12009e85d811SMatthew Wilcox radix_tree_load_root(root, &child, &maxindex); 120121ef5339SRoss Zwisler if (index > maxindex) 120221ef5339SRoss Zwisler return NULL; 12038c1244deSMatthew Wilcox if (!child) 12048c1244deSMatthew Wilcox return NULL; 120521ef5339SRoss Zwisler 12068c1244deSMatthew Wilcox if (!radix_tree_is_internal_node(child)) { 120778c1d784SKonstantin Khlebnikov /* Single-slot tree */ 120821ef5339SRoss Zwisler iter->index = index; 120921ef5339SRoss Zwisler iter->next_index = maxindex + 1; 121078c1d784SKonstantin Khlebnikov iter->tags = 1; 1211268f42deSMatthew Wilcox iter->node = NULL; 1212f8d5d0ccSMatthew Wilcox return (void __rcu **)&root->xa_head; 121321ef5339SRoss Zwisler } 121421ef5339SRoss Zwisler 12158c1244deSMatthew Wilcox do { 12168c1244deSMatthew Wilcox node = entry_to_node(child); 12179e85d811SMatthew Wilcox offset = radix_tree_descend(node, &child, index); 12188c1244deSMatthew Wilcox 121978c1d784SKonstantin Khlebnikov if ((flags & RADIX_TREE_ITER_TAGGED) ? 12208c1244deSMatthew Wilcox !tag_get(node, tag, offset) : !child) { 122178c1d784SKonstantin Khlebnikov /* Hole detected */ 122278c1d784SKonstantin Khlebnikov if (flags & RADIX_TREE_ITER_CONTIG) 122378c1d784SKonstantin Khlebnikov return NULL; 122478c1d784SKonstantin Khlebnikov 122578c1d784SKonstantin Khlebnikov if (flags & RADIX_TREE_ITER_TAGGED) 1226bc412fcaSMatthew Wilcox offset = radix_tree_find_next_bit(node, tag, 122778c1d784SKonstantin Khlebnikov offset + 1); 122878c1d784SKonstantin Khlebnikov else 122978c1d784SKonstantin Khlebnikov while (++offset < RADIX_TREE_MAP_SIZE) { 123012320d0fSMatthew Wilcox void *slot = rcu_dereference_raw( 123112320d0fSMatthew Wilcox node->slots[offset]); 123221ef5339SRoss Zwisler if (slot) 123378c1d784SKonstantin Khlebnikov break; 123478c1d784SKonstantin Khlebnikov } 12358c1244deSMatthew Wilcox index &= ~node_maxindex(node); 12369e85d811SMatthew Wilcox index += offset << node->shift; 123778c1d784SKonstantin Khlebnikov /* Overflow after ~0UL */ 123878c1d784SKonstantin Khlebnikov if (!index) 123978c1d784SKonstantin Khlebnikov return NULL; 124078c1d784SKonstantin Khlebnikov if (offset == RADIX_TREE_MAP_SIZE) 124178c1d784SKonstantin Khlebnikov goto restart; 12428c1244deSMatthew Wilcox child = rcu_dereference_raw(node->slots[offset]); 124378c1d784SKonstantin Khlebnikov } 124478c1d784SKonstantin Khlebnikov 1245e157b555SMatthew Wilcox if (!child) 124678c1d784SKonstantin Khlebnikov goto restart; 1247e157b555SMatthew Wilcox if (child == RADIX_TREE_RETRY) 1248e157b555SMatthew Wilcox break; 124966ee620fSMatthew Wilcox } while (node->shift && radix_tree_is_internal_node(child)); 125078c1d784SKonstantin Khlebnikov 125178c1d784SKonstantin Khlebnikov /* Update the iterator state */ 12523a08cd52SMatthew Wilcox iter->index = (index &~ node_maxindex(node)) | offset; 12538c1244deSMatthew Wilcox iter->next_index = (index | node_maxindex(node)) + 1; 1254268f42deSMatthew Wilcox iter->node = node; 125578c1d784SKonstantin Khlebnikov 1256148deab2SMatthew Wilcox if (flags & RADIX_TREE_ITER_TAGGED) 1257148deab2SMatthew Wilcox set_iter_tags(iter, node, offset, tag); 125878c1d784SKonstantin Khlebnikov 125978c1d784SKonstantin Khlebnikov return node->slots + offset; 126078c1d784SKonstantin Khlebnikov } 126178c1d784SKonstantin Khlebnikov EXPORT_SYMBOL(radix_tree_next_chunk); 126278c1d784SKonstantin Khlebnikov 126378c1d784SKonstantin Khlebnikov /** 12641da177e4SLinus Torvalds * radix_tree_gang_lookup - perform multiple lookup on a radix tree 12651da177e4SLinus Torvalds * @root: radix tree root 12661da177e4SLinus Torvalds * @results: where the results of the lookup are placed 12671da177e4SLinus Torvalds * @first_index: start the lookup from this key 12681da177e4SLinus Torvalds * @max_items: place up to this many items at *results 12691da177e4SLinus Torvalds * 12701da177e4SLinus Torvalds * Performs an index-ascending scan of the tree for present items. Places 12711da177e4SLinus Torvalds * them at *@results and returns the number of items which were placed at 12721da177e4SLinus Torvalds * *@results. 12731da177e4SLinus Torvalds * 12741da177e4SLinus Torvalds * The implementation is naive. 12757cf9c2c7SNick Piggin * 12767cf9c2c7SNick Piggin * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 12777cf9c2c7SNick Piggin * rcu_read_lock. In this case, rather than the returned results being 12782fcd9005SMatthew Wilcox * an atomic snapshot of the tree at a single point in time, the 12792fcd9005SMatthew Wilcox * semantics of an RCU protected gang lookup are as though multiple 12802fcd9005SMatthew Wilcox * radix_tree_lookups have been issued in individual locks, and results 12812fcd9005SMatthew Wilcox * stored in 'results'. 12821da177e4SLinus Torvalds */ 12831da177e4SLinus Torvalds unsigned int 128435534c86SMatthew Wilcox radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 12851da177e4SLinus Torvalds unsigned long first_index, unsigned int max_items) 12861da177e4SLinus Torvalds { 1287cebbd29eSKonstantin Khlebnikov struct radix_tree_iter iter; 1288d7b62727SMatthew Wilcox void __rcu **slot; 1289cebbd29eSKonstantin Khlebnikov unsigned int ret = 0; 12901da177e4SLinus Torvalds 1291cebbd29eSKonstantin Khlebnikov if (unlikely(!max_items)) 12927cf9c2c7SNick Piggin return 0; 12937cf9c2c7SNick Piggin 1294cebbd29eSKonstantin Khlebnikov radix_tree_for_each_slot(slot, root, &iter, first_index) { 129546437f9aSMatthew Wilcox results[ret] = rcu_dereference_raw(*slot); 1296cebbd29eSKonstantin Khlebnikov if (!results[ret]) 129747feff2cSNick Piggin continue; 1298b194d16cSMatthew Wilcox if (radix_tree_is_internal_node(results[ret])) { 129946437f9aSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 130046437f9aSMatthew Wilcox continue; 130146437f9aSMatthew Wilcox } 1302cebbd29eSKonstantin Khlebnikov if (++ret == max_items) 13031da177e4SLinus Torvalds break; 13041da177e4SLinus Torvalds } 13057cf9c2c7SNick Piggin 13061da177e4SLinus Torvalds return ret; 13071da177e4SLinus Torvalds } 13081da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup); 13091da177e4SLinus Torvalds 131047feff2cSNick Piggin /** 13111da177e4SLinus Torvalds * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 13121da177e4SLinus Torvalds * based on a tag 13131da177e4SLinus Torvalds * @root: radix tree root 13141da177e4SLinus Torvalds * @results: where the results of the lookup are placed 13151da177e4SLinus Torvalds * @first_index: start the lookup from this key 13161da177e4SLinus Torvalds * @max_items: place up to this many items at *results 1317daff89f3SJonathan Corbet * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 13181da177e4SLinus Torvalds * 13191da177e4SLinus Torvalds * Performs an index-ascending scan of the tree for present items which 13201da177e4SLinus Torvalds * have the tag indexed by @tag set. Places the items at *@results and 13211da177e4SLinus Torvalds * returns the number of items which were placed at *@results. 13221da177e4SLinus Torvalds */ 13231da177e4SLinus Torvalds unsigned int 132435534c86SMatthew Wilcox radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1325daff89f3SJonathan Corbet unsigned long first_index, unsigned int max_items, 1326daff89f3SJonathan Corbet unsigned int tag) 13271da177e4SLinus Torvalds { 1328cebbd29eSKonstantin Khlebnikov struct radix_tree_iter iter; 1329d7b62727SMatthew Wilcox void __rcu **slot; 1330cebbd29eSKonstantin Khlebnikov unsigned int ret = 0; 13311da177e4SLinus Torvalds 1332cebbd29eSKonstantin Khlebnikov if (unlikely(!max_items)) 1333612d6c19SNick Piggin return 0; 1334612d6c19SNick Piggin 1335cebbd29eSKonstantin Khlebnikov radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 133646437f9aSMatthew Wilcox results[ret] = rcu_dereference_raw(*slot); 1337cebbd29eSKonstantin Khlebnikov if (!results[ret]) 133847feff2cSNick Piggin continue; 1339b194d16cSMatthew Wilcox if (radix_tree_is_internal_node(results[ret])) { 134046437f9aSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 134146437f9aSMatthew Wilcox continue; 134246437f9aSMatthew Wilcox } 1343cebbd29eSKonstantin Khlebnikov if (++ret == max_items) 13441da177e4SLinus Torvalds break; 13451da177e4SLinus Torvalds } 13467cf9c2c7SNick Piggin 13471da177e4SLinus Torvalds return ret; 13481da177e4SLinus Torvalds } 13491da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 13501da177e4SLinus Torvalds 13511da177e4SLinus Torvalds /** 135247feff2cSNick Piggin * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 135347feff2cSNick Piggin * radix tree based on a tag 135447feff2cSNick Piggin * @root: radix tree root 135547feff2cSNick Piggin * @results: where the results of the lookup are placed 135647feff2cSNick Piggin * @first_index: start the lookup from this key 135747feff2cSNick Piggin * @max_items: place up to this many items at *results 135847feff2cSNick Piggin * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 135947feff2cSNick Piggin * 136047feff2cSNick Piggin * Performs an index-ascending scan of the tree for present items which 136147feff2cSNick Piggin * have the tag indexed by @tag set. Places the slots at *@results and 136247feff2cSNick Piggin * returns the number of slots which were placed at *@results. 136347feff2cSNick Piggin */ 136447feff2cSNick Piggin unsigned int 136535534c86SMatthew Wilcox radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1366d7b62727SMatthew Wilcox void __rcu ***results, unsigned long first_index, 136735534c86SMatthew Wilcox unsigned int max_items, unsigned int tag) 136847feff2cSNick Piggin { 1369cebbd29eSKonstantin Khlebnikov struct radix_tree_iter iter; 1370d7b62727SMatthew Wilcox void __rcu **slot; 1371cebbd29eSKonstantin Khlebnikov unsigned int ret = 0; 137247feff2cSNick Piggin 1373cebbd29eSKonstantin Khlebnikov if (unlikely(!max_items)) 137447feff2cSNick Piggin return 0; 137547feff2cSNick Piggin 1376cebbd29eSKonstantin Khlebnikov radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1377cebbd29eSKonstantin Khlebnikov results[ret] = slot; 1378cebbd29eSKonstantin Khlebnikov if (++ret == max_items) 137947feff2cSNick Piggin break; 138047feff2cSNick Piggin } 138147feff2cSNick Piggin 138247feff2cSNick Piggin return ret; 138347feff2cSNick Piggin } 138447feff2cSNick Piggin EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 138547feff2cSNick Piggin 13860ac398efSMatthew Wilcox static bool __radix_tree_delete(struct radix_tree_root *root, 1387d7b62727SMatthew Wilcox struct radix_tree_node *node, void __rcu **slot) 13880ac398efSMatthew Wilcox { 13890a835c4fSMatthew Wilcox void *old = rcu_dereference_raw(*slot); 139001959dfeSMatthew Wilcox int values = xa_is_value(old) ? -1 : 0; 13910ac398efSMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 13920ac398efSMatthew Wilcox int tag; 13930ac398efSMatthew Wilcox 13940a835c4fSMatthew Wilcox if (is_idr(root)) 13950a835c4fSMatthew Wilcox node_tag_set(root, node, IDR_FREE, offset); 13960a835c4fSMatthew Wilcox else 13970ac398efSMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 13980ac398efSMatthew Wilcox node_tag_clear(root, node, tag, offset); 13990ac398efSMatthew Wilcox 140001959dfeSMatthew Wilcox replace_slot(slot, NULL, node, -1, values); 14011cf56f9dSMatthew Wilcox return node && delete_node(root, node); 14020ac398efSMatthew Wilcox } 14030ac398efSMatthew Wilcox 14040ac398efSMatthew Wilcox /** 14050ac398efSMatthew Wilcox * radix_tree_iter_delete - delete the entry at this iterator position 14060ac398efSMatthew Wilcox * @root: radix tree root 14070ac398efSMatthew Wilcox * @iter: iterator state 14080ac398efSMatthew Wilcox * @slot: pointer to slot 14090ac398efSMatthew Wilcox * 14100ac398efSMatthew Wilcox * Delete the entry at the position currently pointed to by the iterator. 14110ac398efSMatthew Wilcox * This may result in the current node being freed; if it is, the iterator 14120ac398efSMatthew Wilcox * is advanced so that it will not reference the freed memory. This 14130ac398efSMatthew Wilcox * function may be called without any locking if there are no other threads 14140ac398efSMatthew Wilcox * which can access this tree. 14150ac398efSMatthew Wilcox */ 14160ac398efSMatthew Wilcox void radix_tree_iter_delete(struct radix_tree_root *root, 1417d7b62727SMatthew Wilcox struct radix_tree_iter *iter, void __rcu **slot) 14180ac398efSMatthew Wilcox { 14190ac398efSMatthew Wilcox if (__radix_tree_delete(root, iter->node, slot)) 14200ac398efSMatthew Wilcox iter->index = iter->next_index; 14210ac398efSMatthew Wilcox } 1422d1b48c1eSChris Wilson EXPORT_SYMBOL(radix_tree_iter_delete); 14230ac398efSMatthew Wilcox 1424139e5616SJohannes Weiner /** 142553c59f26SJohannes Weiner * radix_tree_delete_item - delete an item from a radix tree 14261da177e4SLinus Torvalds * @root: radix tree root 14271da177e4SLinus Torvalds * @index: index key 142853c59f26SJohannes Weiner * @item: expected item 14291da177e4SLinus Torvalds * 143053c59f26SJohannes Weiner * Remove @item at @index from the radix tree rooted at @root. 14311da177e4SLinus Torvalds * 14320ac398efSMatthew Wilcox * Return: the deleted entry, or %NULL if it was not present 143353c59f26SJohannes Weiner * or the entry at the given @index was not @item. 14341da177e4SLinus Torvalds */ 143553c59f26SJohannes Weiner void *radix_tree_delete_item(struct radix_tree_root *root, 143653c59f26SJohannes Weiner unsigned long index, void *item) 14371da177e4SLinus Torvalds { 14380a835c4fSMatthew Wilcox struct radix_tree_node *node = NULL; 14397a4deea1SMatthew Wilcox void __rcu **slot = NULL; 1440139e5616SJohannes Weiner void *entry; 14411da177e4SLinus Torvalds 1442139e5616SJohannes Weiner entry = __radix_tree_lookup(root, index, &node, &slot); 14437a4deea1SMatthew Wilcox if (!slot) 14447a4deea1SMatthew Wilcox return NULL; 14450a835c4fSMatthew Wilcox if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 14460a835c4fSMatthew Wilcox get_slot_offset(node, slot)))) 1447139e5616SJohannes Weiner return NULL; 14481da177e4SLinus Torvalds 1449139e5616SJohannes Weiner if (item && entry != item) 1450139e5616SJohannes Weiner return NULL; 1451139e5616SJohannes Weiner 14520ac398efSMatthew Wilcox __radix_tree_delete(root, node, slot); 1453201b6264SChristoph Lameter 1454139e5616SJohannes Weiner return entry; 14551da177e4SLinus Torvalds } 145653c59f26SJohannes Weiner EXPORT_SYMBOL(radix_tree_delete_item); 145753c59f26SJohannes Weiner 145853c59f26SJohannes Weiner /** 14590ac398efSMatthew Wilcox * radix_tree_delete - delete an entry from a radix tree 146053c59f26SJohannes Weiner * @root: radix tree root 146153c59f26SJohannes Weiner * @index: index key 146253c59f26SJohannes Weiner * 14630ac398efSMatthew Wilcox * Remove the entry at @index from the radix tree rooted at @root. 146453c59f26SJohannes Weiner * 14650ac398efSMatthew Wilcox * Return: The deleted entry, or %NULL if it was not present. 146653c59f26SJohannes Weiner */ 146753c59f26SJohannes Weiner void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 146853c59f26SJohannes Weiner { 146953c59f26SJohannes Weiner return radix_tree_delete_item(root, index, NULL); 147053c59f26SJohannes Weiner } 14711da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_delete); 14721da177e4SLinus Torvalds 14731da177e4SLinus Torvalds /** 14741da177e4SLinus Torvalds * radix_tree_tagged - test whether any items in the tree are tagged 14751da177e4SLinus Torvalds * @root: radix tree root 14761da177e4SLinus Torvalds * @tag: tag to test 14771da177e4SLinus Torvalds */ 147835534c86SMatthew Wilcox int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 14791da177e4SLinus Torvalds { 1480612d6c19SNick Piggin return root_tag_get(root, tag); 14811da177e4SLinus Torvalds } 14821da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tagged); 14831da177e4SLinus Torvalds 14840a835c4fSMatthew Wilcox /** 14850a835c4fSMatthew Wilcox * idr_preload - preload for idr_alloc() 14860a835c4fSMatthew Wilcox * @gfp_mask: allocation mask to use for preloading 14870a835c4fSMatthew Wilcox * 14880a835c4fSMatthew Wilcox * Preallocate memory to use for the next call to idr_alloc(). This function 14890a835c4fSMatthew Wilcox * returns with preemption disabled. It will be enabled by idr_preload_end(). 14900a835c4fSMatthew Wilcox */ 14910a835c4fSMatthew Wilcox void idr_preload(gfp_t gfp_mask) 14920a835c4fSMatthew Wilcox { 1493bc9ae224SEric Dumazet if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1494bc9ae224SEric Dumazet preempt_disable(); 14950a835c4fSMatthew Wilcox } 14960a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_preload); 14970a835c4fSMatthew Wilcox 1498460488c5SMatthew Wilcox void __rcu **idr_get_free(struct radix_tree_root *root, 1499388f79fdSChris Mi struct radix_tree_iter *iter, gfp_t gfp, 1500388f79fdSChris Mi unsigned long max) 15010a835c4fSMatthew Wilcox { 15020a835c4fSMatthew Wilcox struct radix_tree_node *node = NULL, *child; 1503f8d5d0ccSMatthew Wilcox void __rcu **slot = (void __rcu **)&root->xa_head; 15040a835c4fSMatthew Wilcox unsigned long maxindex, start = iter->next_index; 15050a835c4fSMatthew Wilcox unsigned int shift, offset = 0; 15060a835c4fSMatthew Wilcox 15070a835c4fSMatthew Wilcox grow: 15080a835c4fSMatthew Wilcox shift = radix_tree_load_root(root, &child, &maxindex); 15090a835c4fSMatthew Wilcox if (!radix_tree_tagged(root, IDR_FREE)) 15100a835c4fSMatthew Wilcox start = max(start, maxindex + 1); 15110a835c4fSMatthew Wilcox if (start > max) 15120a835c4fSMatthew Wilcox return ERR_PTR(-ENOSPC); 15130a835c4fSMatthew Wilcox 15140a835c4fSMatthew Wilcox if (start > maxindex) { 15150a835c4fSMatthew Wilcox int error = radix_tree_extend(root, gfp, start, shift); 15160a835c4fSMatthew Wilcox if (error < 0) 15170a835c4fSMatthew Wilcox return ERR_PTR(error); 15180a835c4fSMatthew Wilcox shift = error; 1519f8d5d0ccSMatthew Wilcox child = rcu_dereference_raw(root->xa_head); 15200a835c4fSMatthew Wilcox } 152166ee620fSMatthew Wilcox if (start == 0 && shift == 0) 152266ee620fSMatthew Wilcox shift = RADIX_TREE_MAP_SHIFT; 15230a835c4fSMatthew Wilcox 15240a835c4fSMatthew Wilcox while (shift) { 15250a835c4fSMatthew Wilcox shift -= RADIX_TREE_MAP_SHIFT; 15260a835c4fSMatthew Wilcox if (child == NULL) { 15270a835c4fSMatthew Wilcox /* Have to add a child node. */ 1528d58275bcSMatthew Wilcox child = radix_tree_node_alloc(gfp, node, root, shift, 1529d58275bcSMatthew Wilcox offset, 0, 0); 15300a835c4fSMatthew Wilcox if (!child) 15310a835c4fSMatthew Wilcox return ERR_PTR(-ENOMEM); 15320a835c4fSMatthew Wilcox all_tag_set(child, IDR_FREE); 15330a835c4fSMatthew Wilcox rcu_assign_pointer(*slot, node_to_entry(child)); 15340a835c4fSMatthew Wilcox if (node) 15350a835c4fSMatthew Wilcox node->count++; 15360a835c4fSMatthew Wilcox } else if (!radix_tree_is_internal_node(child)) 15370a835c4fSMatthew Wilcox break; 15380a835c4fSMatthew Wilcox 15390a835c4fSMatthew Wilcox node = entry_to_node(child); 15400a835c4fSMatthew Wilcox offset = radix_tree_descend(node, &child, start); 15410a835c4fSMatthew Wilcox if (!tag_get(node, IDR_FREE, offset)) { 15420a835c4fSMatthew Wilcox offset = radix_tree_find_next_bit(node, IDR_FREE, 15430a835c4fSMatthew Wilcox offset + 1); 15440a835c4fSMatthew Wilcox start = next_index(start, node, offset); 15450a835c4fSMatthew Wilcox if (start > max) 15460a835c4fSMatthew Wilcox return ERR_PTR(-ENOSPC); 15470a835c4fSMatthew Wilcox while (offset == RADIX_TREE_MAP_SIZE) { 15480a835c4fSMatthew Wilcox offset = node->offset + 1; 15490a835c4fSMatthew Wilcox node = node->parent; 15500a835c4fSMatthew Wilcox if (!node) 15510a835c4fSMatthew Wilcox goto grow; 15520a835c4fSMatthew Wilcox shift = node->shift; 15530a835c4fSMatthew Wilcox } 15540a835c4fSMatthew Wilcox child = rcu_dereference_raw(node->slots[offset]); 15550a835c4fSMatthew Wilcox } 15560a835c4fSMatthew Wilcox slot = &node->slots[offset]; 15570a835c4fSMatthew Wilcox } 15580a835c4fSMatthew Wilcox 15590a835c4fSMatthew Wilcox iter->index = start; 15600a835c4fSMatthew Wilcox if (node) 15610a835c4fSMatthew Wilcox iter->next_index = 1 + min(max, (start | node_maxindex(node))); 15620a835c4fSMatthew Wilcox else 15630a835c4fSMatthew Wilcox iter->next_index = 1; 15640a835c4fSMatthew Wilcox iter->node = node; 15650a835c4fSMatthew Wilcox set_iter_tags(iter, node, offset, IDR_FREE); 15660a835c4fSMatthew Wilcox 15670a835c4fSMatthew Wilcox return slot; 15680a835c4fSMatthew Wilcox } 15690a835c4fSMatthew Wilcox 15700a835c4fSMatthew Wilcox /** 15710a835c4fSMatthew Wilcox * idr_destroy - release all internal memory from an IDR 15720a835c4fSMatthew Wilcox * @idr: idr handle 15730a835c4fSMatthew Wilcox * 15740a835c4fSMatthew Wilcox * After this function is called, the IDR is empty, and may be reused or 15750a835c4fSMatthew Wilcox * the data structure containing it may be freed. 15760a835c4fSMatthew Wilcox * 15770a835c4fSMatthew Wilcox * A typical clean-up sequence for objects stored in an idr tree will use 15780a835c4fSMatthew Wilcox * idr_for_each() to free all objects, if necessary, then idr_destroy() to 15790a835c4fSMatthew Wilcox * free the memory used to keep track of those objects. 15800a835c4fSMatthew Wilcox */ 15810a835c4fSMatthew Wilcox void idr_destroy(struct idr *idr) 15820a835c4fSMatthew Wilcox { 1583f8d5d0ccSMatthew Wilcox struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 15840a835c4fSMatthew Wilcox if (radix_tree_is_internal_node(node)) 15850a835c4fSMatthew Wilcox radix_tree_free_nodes(node); 1586f8d5d0ccSMatthew Wilcox idr->idr_rt.xa_head = NULL; 15870a835c4fSMatthew Wilcox root_tag_set(&idr->idr_rt, IDR_FREE); 15880a835c4fSMatthew Wilcox } 15890a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_destroy); 15900a835c4fSMatthew Wilcox 15911da177e4SLinus Torvalds static void 1592449dd698SJohannes Weiner radix_tree_node_ctor(void *arg) 15931da177e4SLinus Torvalds { 1594449dd698SJohannes Weiner struct radix_tree_node *node = arg; 1595449dd698SJohannes Weiner 1596449dd698SJohannes Weiner memset(node, 0, sizeof(*node)); 1597449dd698SJohannes Weiner INIT_LIST_HEAD(&node->private_list); 15981da177e4SLinus Torvalds } 15991da177e4SLinus Torvalds 1600d544abd5SSebastian Andrzej Siewior static int radix_tree_cpu_dead(unsigned int cpu) 16011da177e4SLinus Torvalds { 16021da177e4SLinus Torvalds struct radix_tree_preload *rtp; 16039d2a8da0SKirill A. Shutemov struct radix_tree_node *node; 16041da177e4SLinus Torvalds 16052fcd9005SMatthew Wilcox /* Free per-cpu pool of preloaded nodes */ 16061da177e4SLinus Torvalds rtp = &per_cpu(radix_tree_preloads, cpu); 16071da177e4SLinus Torvalds while (rtp->nr) { 16089d2a8da0SKirill A. Shutemov node = rtp->nodes; 16091293d5c5SMatthew Wilcox rtp->nodes = node->parent; 16109d2a8da0SKirill A. Shutemov kmem_cache_free(radix_tree_node_cachep, node); 16111da177e4SLinus Torvalds rtp->nr--; 16121da177e4SLinus Torvalds } 1613d544abd5SSebastian Andrzej Siewior return 0; 16141da177e4SLinus Torvalds } 16151da177e4SLinus Torvalds 16161da177e4SLinus Torvalds void __init radix_tree_init(void) 16171da177e4SLinus Torvalds { 1618d544abd5SSebastian Andrzej Siewior int ret; 16197e784422SMichal Hocko 16207e784422SMichal Hocko BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1621fa290cdaSMatthew Wilcox BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 162202c02bf1SMatthew Wilcox BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 16231da177e4SLinus Torvalds radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 16241da177e4SLinus Torvalds sizeof(struct radix_tree_node), 0, 1625488514d1SChristoph Lameter SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1626488514d1SChristoph Lameter radix_tree_node_ctor); 1627d544abd5SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1628d544abd5SSebastian Andrzej Siewior NULL, radix_tree_cpu_dead); 1629d544abd5SSebastian Andrzej Siewior WARN_ON(ret < 0); 16301da177e4SLinus Torvalds } 1631