11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2001 Momchil Velikov 31da177e4SLinus Torvalds * Portions Copyright (C) 2001 Christoph Hellwig 4cde53535SChristoph Lameter * Copyright (C) 2005 SGI, Christoph Lameter 57cf9c2c7SNick Piggin * Copyright (C) 2006 Nick Piggin 678c1d784SKonstantin Khlebnikov * Copyright (C) 2012 Konstantin Khlebnikov 76b053b8eSMatthew Wilcox * Copyright (C) 2016 Intel, Matthew Wilcox 86b053b8eSMatthew Wilcox * Copyright (C) 2016 Intel, Ross Zwisler 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License as 121da177e4SLinus Torvalds * published by the Free Software Foundation; either version 2, or (at 131da177e4SLinus Torvalds * your option) any later version. 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * This program is distributed in the hope that it will be useful, but 161da177e4SLinus Torvalds * WITHOUT ANY WARRANTY; without even the implied warranty of 171da177e4SLinus Torvalds * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 181da177e4SLinus Torvalds * General Public License for more details. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * You should have received a copy of the GNU General Public License 211da177e4SLinus Torvalds * along with this program; if not, write to the Free Software 221da177e4SLinus Torvalds * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 231da177e4SLinus Torvalds */ 241da177e4SLinus Torvalds 250a835c4fSMatthew Wilcox #include <linux/bitmap.h> 260a835c4fSMatthew Wilcox #include <linux/bitops.h> 27460488c5SMatthew Wilcox #include <linux/bug.h> 28e157b555SMatthew Wilcox #include <linux/cpu.h> 291da177e4SLinus Torvalds #include <linux/errno.h> 300a835c4fSMatthew Wilcox #include <linux/export.h> 310a835c4fSMatthew Wilcox #include <linux/idr.h> 321da177e4SLinus Torvalds #include <linux/init.h> 331da177e4SLinus Torvalds #include <linux/kernel.h> 34ce80b067SCatalin Marinas #include <linux/kmemleak.h> 350a835c4fSMatthew Wilcox #include <linux/percpu.h> 3692cf2118SFrederic Weisbecker #include <linux/preempt.h> /* in_interrupt() */ 370a835c4fSMatthew Wilcox #include <linux/radix-tree.h> 380a835c4fSMatthew Wilcox #include <linux/rcupdate.h> 390a835c4fSMatthew Wilcox #include <linux/slab.h> 400a835c4fSMatthew Wilcox #include <linux/string.h> 4102c02bf1SMatthew Wilcox #include <linux/xarray.h> 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds 44c78c66d1SKirill A. Shutemov /* Number of nodes in fully populated tree of given height */ 45c78c66d1SKirill A. Shutemov static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; 46c78c66d1SKirill A. Shutemov 4726fb1589SJeff Moyer /* 481da177e4SLinus Torvalds * Radix tree node cache. 491da177e4SLinus Torvalds */ 5058d6ea30SMatthew Wilcox struct kmem_cache *radix_tree_node_cachep; 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds /* 5355368052SNick Piggin * The radix tree is variable-height, so an insert operation not only has 5455368052SNick Piggin * to build the branch to its corresponding item, it also has to build the 5555368052SNick Piggin * branch to existing items if the size has to be increased (by 5655368052SNick Piggin * radix_tree_extend). 5755368052SNick Piggin * 5855368052SNick Piggin * The worst case is a zero height tree with just a single item at index 0, 5955368052SNick Piggin * and then inserting an item at index ULONG_MAX. This requires 2 new branches 6055368052SNick Piggin * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 6155368052SNick Piggin * Hence: 6255368052SNick Piggin */ 6355368052SNick Piggin #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 6455368052SNick Piggin 6555368052SNick Piggin /* 660a835c4fSMatthew Wilcox * The IDR does not have to be as high as the radix tree since it uses 670a835c4fSMatthew Wilcox * signed integers, not unsigned longs. 680a835c4fSMatthew Wilcox */ 690a835c4fSMatthew Wilcox #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 700a835c4fSMatthew Wilcox #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 710a835c4fSMatthew Wilcox RADIX_TREE_MAP_SHIFT)) 720a835c4fSMatthew Wilcox #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 730a835c4fSMatthew Wilcox 740a835c4fSMatthew Wilcox /* 757ad3d4d8SMatthew Wilcox * The IDA is even shorter since it uses a bitmap at the last level. 767ad3d4d8SMatthew Wilcox */ 777ad3d4d8SMatthew Wilcox #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 787ad3d4d8SMatthew Wilcox #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 797ad3d4d8SMatthew Wilcox RADIX_TREE_MAP_SHIFT)) 807ad3d4d8SMatthew Wilcox #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 817ad3d4d8SMatthew Wilcox 827ad3d4d8SMatthew Wilcox /* 831da177e4SLinus Torvalds * Per-cpu pool of preloaded nodes 841da177e4SLinus Torvalds */ 851da177e4SLinus Torvalds struct radix_tree_preload { 862fcd9005SMatthew Wilcox unsigned nr; 871293d5c5SMatthew Wilcox /* nodes->parent points to next preallocated node */ 889d2a8da0SKirill A. Shutemov struct radix_tree_node *nodes; 891da177e4SLinus Torvalds }; 908cef7d57SHarvey Harrison static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 911da177e4SLinus Torvalds 92148deab2SMatthew Wilcox static inline struct radix_tree_node *entry_to_node(void *ptr) 93148deab2SMatthew Wilcox { 94148deab2SMatthew Wilcox return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 95148deab2SMatthew Wilcox } 96148deab2SMatthew Wilcox 97a4db4dceSMatthew Wilcox static inline void *node_to_entry(void *ptr) 9827d20fddSNick Piggin { 9930ff46ccSMatthew Wilcox return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 10027d20fddSNick Piggin } 10127d20fddSNick Piggin 10202c02bf1SMatthew Wilcox #define RADIX_TREE_RETRY XA_RETRY_ENTRY 103db050f29SMatthew Wilcox 104d7b62727SMatthew Wilcox static inline unsigned long 105d7b62727SMatthew Wilcox get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 106db050f29SMatthew Wilcox { 10776f070b4SMatthew Wilcox return parent ? slot - parent->slots : 0; 108db050f29SMatthew Wilcox } 109db050f29SMatthew Wilcox 11035534c86SMatthew Wilcox static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 1119e85d811SMatthew Wilcox struct radix_tree_node **nodep, unsigned long index) 112db050f29SMatthew Wilcox { 1139e85d811SMatthew Wilcox unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 114d7b62727SMatthew Wilcox void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 115db050f29SMatthew Wilcox 11602c02bf1SMatthew Wilcox if (xa_is_sibling(entry)) { 11702c02bf1SMatthew Wilcox offset = xa_to_sibling(entry); 11802c02bf1SMatthew Wilcox entry = rcu_dereference_raw(parent->slots[offset]); 119db050f29SMatthew Wilcox } 120db050f29SMatthew Wilcox 121db050f29SMatthew Wilcox *nodep = (void *)entry; 122db050f29SMatthew Wilcox return offset; 123db050f29SMatthew Wilcox } 124db050f29SMatthew Wilcox 12535534c86SMatthew Wilcox static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 126612d6c19SNick Piggin { 127f8d5d0ccSMatthew Wilcox return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 128612d6c19SNick Piggin } 129612d6c19SNick Piggin 130643b52b9SNick Piggin static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 131643b52b9SNick Piggin int offset) 132643b52b9SNick Piggin { 133643b52b9SNick Piggin __set_bit(offset, node->tags[tag]); 134643b52b9SNick Piggin } 135643b52b9SNick Piggin 136643b52b9SNick Piggin static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 137643b52b9SNick Piggin int offset) 138643b52b9SNick Piggin { 139643b52b9SNick Piggin __clear_bit(offset, node->tags[tag]); 140643b52b9SNick Piggin } 141643b52b9SNick Piggin 14235534c86SMatthew Wilcox static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 143643b52b9SNick Piggin int offset) 144643b52b9SNick Piggin { 145643b52b9SNick Piggin return test_bit(offset, node->tags[tag]); 146643b52b9SNick Piggin } 147643b52b9SNick Piggin 14835534c86SMatthew Wilcox static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 149643b52b9SNick Piggin { 150f8d5d0ccSMatthew Wilcox root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 151643b52b9SNick Piggin } 152643b52b9SNick Piggin 1532fcd9005SMatthew Wilcox static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 154643b52b9SNick Piggin { 155f8d5d0ccSMatthew Wilcox root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 156643b52b9SNick Piggin } 157643b52b9SNick Piggin 158643b52b9SNick Piggin static inline void root_tag_clear_all(struct radix_tree_root *root) 159643b52b9SNick Piggin { 160f8d5d0ccSMatthew Wilcox root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 161643b52b9SNick Piggin } 162643b52b9SNick Piggin 16335534c86SMatthew Wilcox static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 164643b52b9SNick Piggin { 165f8d5d0ccSMatthew Wilcox return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 166643b52b9SNick Piggin } 167643b52b9SNick Piggin 16835534c86SMatthew Wilcox static inline unsigned root_tags_get(const struct radix_tree_root *root) 1697b60e9adSMatthew Wilcox { 170f8d5d0ccSMatthew Wilcox return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 1710a835c4fSMatthew Wilcox } 1720a835c4fSMatthew Wilcox 1730a835c4fSMatthew Wilcox static inline bool is_idr(const struct radix_tree_root *root) 1740a835c4fSMatthew Wilcox { 175f8d5d0ccSMatthew Wilcox return !!(root->xa_flags & ROOT_IS_IDR); 1767b60e9adSMatthew Wilcox } 1777b60e9adSMatthew Wilcox 178643b52b9SNick Piggin /* 179643b52b9SNick Piggin * Returns 1 if any slot in the node has this tag set. 180643b52b9SNick Piggin * Otherwise returns 0. 181643b52b9SNick Piggin */ 18235534c86SMatthew Wilcox static inline int any_tag_set(const struct radix_tree_node *node, 18335534c86SMatthew Wilcox unsigned int tag) 184643b52b9SNick Piggin { 1852fcd9005SMatthew Wilcox unsigned idx; 186643b52b9SNick Piggin for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 187643b52b9SNick Piggin if (node->tags[tag][idx]) 188643b52b9SNick Piggin return 1; 189643b52b9SNick Piggin } 190643b52b9SNick Piggin return 0; 191643b52b9SNick Piggin } 19278c1d784SKonstantin Khlebnikov 1930a835c4fSMatthew Wilcox static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 1940a835c4fSMatthew Wilcox { 1950a835c4fSMatthew Wilcox bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 1960a835c4fSMatthew Wilcox } 1970a835c4fSMatthew Wilcox 19878c1d784SKonstantin Khlebnikov /** 19978c1d784SKonstantin Khlebnikov * radix_tree_find_next_bit - find the next set bit in a memory region 20078c1d784SKonstantin Khlebnikov * 20178c1d784SKonstantin Khlebnikov * @addr: The address to base the search on 20278c1d784SKonstantin Khlebnikov * @size: The bitmap size in bits 20378c1d784SKonstantin Khlebnikov * @offset: The bitnumber to start searching at 20478c1d784SKonstantin Khlebnikov * 20578c1d784SKonstantin Khlebnikov * Unrollable variant of find_next_bit() for constant size arrays. 20678c1d784SKonstantin Khlebnikov * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 20778c1d784SKonstantin Khlebnikov * Returns next bit offset, or size if nothing found. 20878c1d784SKonstantin Khlebnikov */ 20978c1d784SKonstantin Khlebnikov static __always_inline unsigned long 210bc412fcaSMatthew Wilcox radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 211bc412fcaSMatthew Wilcox unsigned long offset) 21278c1d784SKonstantin Khlebnikov { 213bc412fcaSMatthew Wilcox const unsigned long *addr = node->tags[tag]; 21478c1d784SKonstantin Khlebnikov 215bc412fcaSMatthew Wilcox if (offset < RADIX_TREE_MAP_SIZE) { 21678c1d784SKonstantin Khlebnikov unsigned long tmp; 21778c1d784SKonstantin Khlebnikov 21878c1d784SKonstantin Khlebnikov addr += offset / BITS_PER_LONG; 21978c1d784SKonstantin Khlebnikov tmp = *addr >> (offset % BITS_PER_LONG); 22078c1d784SKonstantin Khlebnikov if (tmp) 22178c1d784SKonstantin Khlebnikov return __ffs(tmp) + offset; 22278c1d784SKonstantin Khlebnikov offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 223bc412fcaSMatthew Wilcox while (offset < RADIX_TREE_MAP_SIZE) { 22478c1d784SKonstantin Khlebnikov tmp = *++addr; 22578c1d784SKonstantin Khlebnikov if (tmp) 22678c1d784SKonstantin Khlebnikov return __ffs(tmp) + offset; 22778c1d784SKonstantin Khlebnikov offset += BITS_PER_LONG; 22878c1d784SKonstantin Khlebnikov } 22978c1d784SKonstantin Khlebnikov } 230bc412fcaSMatthew Wilcox return RADIX_TREE_MAP_SIZE; 23178c1d784SKonstantin Khlebnikov } 23278c1d784SKonstantin Khlebnikov 233268f42deSMatthew Wilcox static unsigned int iter_offset(const struct radix_tree_iter *iter) 234268f42deSMatthew Wilcox { 235268f42deSMatthew Wilcox return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; 236268f42deSMatthew Wilcox } 237268f42deSMatthew Wilcox 238218ed750SMatthew Wilcox /* 239218ed750SMatthew Wilcox * The maximum index which can be stored in a radix tree 240218ed750SMatthew Wilcox */ 241218ed750SMatthew Wilcox static inline unsigned long shift_maxindex(unsigned int shift) 242218ed750SMatthew Wilcox { 243218ed750SMatthew Wilcox return (RADIX_TREE_MAP_SIZE << shift) - 1; 244218ed750SMatthew Wilcox } 245218ed750SMatthew Wilcox 24635534c86SMatthew Wilcox static inline unsigned long node_maxindex(const struct radix_tree_node *node) 247218ed750SMatthew Wilcox { 248218ed750SMatthew Wilcox return shift_maxindex(node->shift); 249218ed750SMatthew Wilcox } 250218ed750SMatthew Wilcox 2510a835c4fSMatthew Wilcox static unsigned long next_index(unsigned long index, 2520a835c4fSMatthew Wilcox const struct radix_tree_node *node, 2530a835c4fSMatthew Wilcox unsigned long offset) 2540a835c4fSMatthew Wilcox { 2550a835c4fSMatthew Wilcox return (index & ~node_maxindex(node)) + (offset << node->shift); 2560a835c4fSMatthew Wilcox } 2570a835c4fSMatthew Wilcox 2581da177e4SLinus Torvalds /* 2591da177e4SLinus Torvalds * This assumes that the caller has performed appropriate preallocation, and 2601da177e4SLinus Torvalds * that the caller has pinned this thread of control to the current CPU. 2611da177e4SLinus Torvalds */ 2621da177e4SLinus Torvalds static struct radix_tree_node * 2630a835c4fSMatthew Wilcox radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 264d58275bcSMatthew Wilcox struct radix_tree_root *root, 265e8de4340SMatthew Wilcox unsigned int shift, unsigned int offset, 26601959dfeSMatthew Wilcox unsigned int count, unsigned int nr_values) 2671da177e4SLinus Torvalds { 268e2848a0eSNick Piggin struct radix_tree_node *ret = NULL; 2691da177e4SLinus Torvalds 2705e4c0d97SJan Kara /* 2712fcd9005SMatthew Wilcox * Preload code isn't irq safe and it doesn't make sense to use 2722fcd9005SMatthew Wilcox * preloading during an interrupt anyway as all the allocations have 2732fcd9005SMatthew Wilcox * to be atomic. So just do normal allocation when in interrupt. 2745e4c0d97SJan Kara */ 275d0164adcSMel Gorman if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 2761da177e4SLinus Torvalds struct radix_tree_preload *rtp; 2771da177e4SLinus Torvalds 278e2848a0eSNick Piggin /* 27958e698afSVladimir Davydov * Even if the caller has preloaded, try to allocate from the 28005eb6e72SVladimir Davydov * cache first for the new node to get accounted to the memory 28105eb6e72SVladimir Davydov * cgroup. 28258e698afSVladimir Davydov */ 28358e698afSVladimir Davydov ret = kmem_cache_alloc(radix_tree_node_cachep, 28405eb6e72SVladimir Davydov gfp_mask | __GFP_NOWARN); 28558e698afSVladimir Davydov if (ret) 28658e698afSVladimir Davydov goto out; 28758e698afSVladimir Davydov 28858e698afSVladimir Davydov /* 289e2848a0eSNick Piggin * Provided the caller has preloaded here, we will always 290e2848a0eSNick Piggin * succeed in getting a node here (and never reach 291e2848a0eSNick Piggin * kmem_cache_alloc) 292e2848a0eSNick Piggin */ 2937c8e0181SChristoph Lameter rtp = this_cpu_ptr(&radix_tree_preloads); 2941da177e4SLinus Torvalds if (rtp->nr) { 2959d2a8da0SKirill A. Shutemov ret = rtp->nodes; 2961293d5c5SMatthew Wilcox rtp->nodes = ret->parent; 2971da177e4SLinus Torvalds rtp->nr--; 2981da177e4SLinus Torvalds } 299ce80b067SCatalin Marinas /* 300ce80b067SCatalin Marinas * Update the allocation stack trace as this is more useful 301ce80b067SCatalin Marinas * for debugging. 302ce80b067SCatalin Marinas */ 303ce80b067SCatalin Marinas kmemleak_update_trace(ret); 30458e698afSVladimir Davydov goto out; 3051da177e4SLinus Torvalds } 30605eb6e72SVladimir Davydov ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 30758e698afSVladimir Davydov out: 308b194d16cSMatthew Wilcox BUG_ON(radix_tree_is_internal_node(ret)); 309e8de4340SMatthew Wilcox if (ret) { 310e8de4340SMatthew Wilcox ret->shift = shift; 311e8de4340SMatthew Wilcox ret->offset = offset; 312e8de4340SMatthew Wilcox ret->count = count; 31301959dfeSMatthew Wilcox ret->nr_values = nr_values; 314d58275bcSMatthew Wilcox ret->parent = parent; 31501959dfeSMatthew Wilcox ret->array = root; 316e8de4340SMatthew Wilcox } 3171da177e4SLinus Torvalds return ret; 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds 32058d6ea30SMatthew Wilcox void radix_tree_node_rcu_free(struct rcu_head *head) 3217cf9c2c7SNick Piggin { 3227cf9c2c7SNick Piggin struct radix_tree_node *node = 3237cf9c2c7SNick Piggin container_of(head, struct radix_tree_node, rcu_head); 324643b52b9SNick Piggin 325643b52b9SNick Piggin /* 326175542f5SMatthew Wilcox * Must only free zeroed nodes into the slab. We can be left with 327175542f5SMatthew Wilcox * non-NULL entries by radix_tree_free_nodes, so clear the entries 328175542f5SMatthew Wilcox * and tags here. 329643b52b9SNick Piggin */ 330175542f5SMatthew Wilcox memset(node->slots, 0, sizeof(node->slots)); 331175542f5SMatthew Wilcox memset(node->tags, 0, sizeof(node->tags)); 33291d9c05aSMatthew Wilcox INIT_LIST_HEAD(&node->private_list); 333643b52b9SNick Piggin 3347cf9c2c7SNick Piggin kmem_cache_free(radix_tree_node_cachep, node); 3357cf9c2c7SNick Piggin } 3367cf9c2c7SNick Piggin 3371da177e4SLinus Torvalds static inline void 3381da177e4SLinus Torvalds radix_tree_node_free(struct radix_tree_node *node) 3391da177e4SLinus Torvalds { 3407cf9c2c7SNick Piggin call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 3411da177e4SLinus Torvalds } 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds /* 3441da177e4SLinus Torvalds * Load up this CPU's radix_tree_node buffer with sufficient objects to 3451da177e4SLinus Torvalds * ensure that the addition of a single element in the tree cannot fail. On 3461da177e4SLinus Torvalds * success, return zero, with preemption disabled. On error, return -ENOMEM 3471da177e4SLinus Torvalds * with preemption not disabled. 348b34df792SDavid Howells * 349b34df792SDavid Howells * To make use of this facility, the radix tree must be initialised without 350d0164adcSMel Gorman * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 3511da177e4SLinus Torvalds */ 352bc9ae224SEric Dumazet static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 3531da177e4SLinus Torvalds { 3541da177e4SLinus Torvalds struct radix_tree_preload *rtp; 3551da177e4SLinus Torvalds struct radix_tree_node *node; 3561da177e4SLinus Torvalds int ret = -ENOMEM; 3571da177e4SLinus Torvalds 35805eb6e72SVladimir Davydov /* 35905eb6e72SVladimir Davydov * Nodes preloaded by one cgroup can be be used by another cgroup, so 36005eb6e72SVladimir Davydov * they should never be accounted to any particular memory cgroup. 36105eb6e72SVladimir Davydov */ 36205eb6e72SVladimir Davydov gfp_mask &= ~__GFP_ACCOUNT; 36305eb6e72SVladimir Davydov 3641da177e4SLinus Torvalds preempt_disable(); 3657c8e0181SChristoph Lameter rtp = this_cpu_ptr(&radix_tree_preloads); 366c78c66d1SKirill A. Shutemov while (rtp->nr < nr) { 3671da177e4SLinus Torvalds preempt_enable(); 368488514d1SChristoph Lameter node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 3691da177e4SLinus Torvalds if (node == NULL) 3701da177e4SLinus Torvalds goto out; 3711da177e4SLinus Torvalds preempt_disable(); 3727c8e0181SChristoph Lameter rtp = this_cpu_ptr(&radix_tree_preloads); 373c78c66d1SKirill A. Shutemov if (rtp->nr < nr) { 3741293d5c5SMatthew Wilcox node->parent = rtp->nodes; 3759d2a8da0SKirill A. Shutemov rtp->nodes = node; 3769d2a8da0SKirill A. Shutemov rtp->nr++; 3779d2a8da0SKirill A. Shutemov } else { 3781da177e4SLinus Torvalds kmem_cache_free(radix_tree_node_cachep, node); 3791da177e4SLinus Torvalds } 3809d2a8da0SKirill A. Shutemov } 3811da177e4SLinus Torvalds ret = 0; 3821da177e4SLinus Torvalds out: 3831da177e4SLinus Torvalds return ret; 3841da177e4SLinus Torvalds } 3855e4c0d97SJan Kara 3865e4c0d97SJan Kara /* 3875e4c0d97SJan Kara * Load up this CPU's radix_tree_node buffer with sufficient objects to 3885e4c0d97SJan Kara * ensure that the addition of a single element in the tree cannot fail. On 3895e4c0d97SJan Kara * success, return zero, with preemption disabled. On error, return -ENOMEM 3905e4c0d97SJan Kara * with preemption not disabled. 3915e4c0d97SJan Kara * 3925e4c0d97SJan Kara * To make use of this facility, the radix tree must be initialised without 393d0164adcSMel Gorman * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 3945e4c0d97SJan Kara */ 3955e4c0d97SJan Kara int radix_tree_preload(gfp_t gfp_mask) 3965e4c0d97SJan Kara { 3975e4c0d97SJan Kara /* Warn on non-sensical use... */ 398d0164adcSMel Gorman WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 399c78c66d1SKirill A. Shutemov return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 4005e4c0d97SJan Kara } 401d7f0923dSDavid Chinner EXPORT_SYMBOL(radix_tree_preload); 4021da177e4SLinus Torvalds 4036e954b9eSNick Piggin /* 4045e4c0d97SJan Kara * The same as above function, except we don't guarantee preloading happens. 4055e4c0d97SJan Kara * We do it, if we decide it helps. On success, return zero with preemption 4065e4c0d97SJan Kara * disabled. On error, return -ENOMEM with preemption not disabled. 4075e4c0d97SJan Kara */ 4085e4c0d97SJan Kara int radix_tree_maybe_preload(gfp_t gfp_mask) 4095e4c0d97SJan Kara { 410d0164adcSMel Gorman if (gfpflags_allow_blocking(gfp_mask)) 411c78c66d1SKirill A. Shutemov return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 4125e4c0d97SJan Kara /* Preloading doesn't help anything with this gfp mask, skip it */ 4135e4c0d97SJan Kara preempt_disable(); 4145e4c0d97SJan Kara return 0; 4155e4c0d97SJan Kara } 4165e4c0d97SJan Kara EXPORT_SYMBOL(radix_tree_maybe_preload); 4175e4c0d97SJan Kara 4182791653aSMatthew Wilcox #ifdef CONFIG_RADIX_TREE_MULTIORDER 4192791653aSMatthew Wilcox /* 4202791653aSMatthew Wilcox * Preload with enough objects to ensure that we can split a single entry 4212791653aSMatthew Wilcox * of order @old_order into many entries of size @new_order 4222791653aSMatthew Wilcox */ 4232791653aSMatthew Wilcox int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, 4242791653aSMatthew Wilcox gfp_t gfp_mask) 4252791653aSMatthew Wilcox { 4262791653aSMatthew Wilcox unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); 4272791653aSMatthew Wilcox unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - 4282791653aSMatthew Wilcox (new_order / RADIX_TREE_MAP_SHIFT); 4292791653aSMatthew Wilcox unsigned nr = 0; 4302791653aSMatthew Wilcox 4312791653aSMatthew Wilcox WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 4322791653aSMatthew Wilcox BUG_ON(new_order >= old_order); 4332791653aSMatthew Wilcox 4342791653aSMatthew Wilcox while (layers--) 4352791653aSMatthew Wilcox nr = nr * RADIX_TREE_MAP_SIZE + 1; 4362791653aSMatthew Wilcox return __radix_tree_preload(gfp_mask, top * nr); 4372791653aSMatthew Wilcox } 4382791653aSMatthew Wilcox #endif 4392791653aSMatthew Wilcox 4405e4c0d97SJan Kara /* 441c78c66d1SKirill A. Shutemov * The same as function above, but preload number of nodes required to insert 442c78c66d1SKirill A. Shutemov * (1 << order) continuous naturally-aligned elements. 443c78c66d1SKirill A. Shutemov */ 444c78c66d1SKirill A. Shutemov int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) 445c78c66d1SKirill A. Shutemov { 446c78c66d1SKirill A. Shutemov unsigned long nr_subtrees; 447c78c66d1SKirill A. Shutemov int nr_nodes, subtree_height; 448c78c66d1SKirill A. Shutemov 449c78c66d1SKirill A. Shutemov /* Preloading doesn't help anything with this gfp mask, skip it */ 450c78c66d1SKirill A. Shutemov if (!gfpflags_allow_blocking(gfp_mask)) { 451c78c66d1SKirill A. Shutemov preempt_disable(); 452c78c66d1SKirill A. Shutemov return 0; 453c78c66d1SKirill A. Shutemov } 454c78c66d1SKirill A. Shutemov 455c78c66d1SKirill A. Shutemov /* 456c78c66d1SKirill A. Shutemov * Calculate number and height of fully populated subtrees it takes to 457c78c66d1SKirill A. Shutemov * store (1 << order) elements. 458c78c66d1SKirill A. Shutemov */ 459c78c66d1SKirill A. Shutemov nr_subtrees = 1 << order; 460c78c66d1SKirill A. Shutemov for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; 461c78c66d1SKirill A. Shutemov subtree_height++) 462c78c66d1SKirill A. Shutemov nr_subtrees >>= RADIX_TREE_MAP_SHIFT; 463c78c66d1SKirill A. Shutemov 464c78c66d1SKirill A. Shutemov /* 465c78c66d1SKirill A. Shutemov * The worst case is zero height tree with a single item at index 0 and 466c78c66d1SKirill A. Shutemov * then inserting items starting at ULONG_MAX - (1 << order). 467c78c66d1SKirill A. Shutemov * 468c78c66d1SKirill A. Shutemov * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to 469c78c66d1SKirill A. Shutemov * 0-index item. 470c78c66d1SKirill A. Shutemov */ 471c78c66d1SKirill A. Shutemov nr_nodes = RADIX_TREE_MAX_PATH; 472c78c66d1SKirill A. Shutemov 473c78c66d1SKirill A. Shutemov /* Plus branch to fully populated subtrees. */ 474c78c66d1SKirill A. Shutemov nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; 475c78c66d1SKirill A. Shutemov 476c78c66d1SKirill A. Shutemov /* Root node is shared. */ 477c78c66d1SKirill A. Shutemov nr_nodes--; 478c78c66d1SKirill A. Shutemov 479c78c66d1SKirill A. Shutemov /* Plus nodes required to build subtrees. */ 480c78c66d1SKirill A. Shutemov nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; 481c78c66d1SKirill A. Shutemov 482c78c66d1SKirill A. Shutemov return __radix_tree_preload(gfp_mask, nr_nodes); 483c78c66d1SKirill A. Shutemov } 484c78c66d1SKirill A. Shutemov 48535534c86SMatthew Wilcox static unsigned radix_tree_load_root(const struct radix_tree_root *root, 4861456a439SMatthew Wilcox struct radix_tree_node **nodep, unsigned long *maxindex) 4871456a439SMatthew Wilcox { 488f8d5d0ccSMatthew Wilcox struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 4891456a439SMatthew Wilcox 4901456a439SMatthew Wilcox *nodep = node; 4911456a439SMatthew Wilcox 492b194d16cSMatthew Wilcox if (likely(radix_tree_is_internal_node(node))) { 4934dd6c098SMatthew Wilcox node = entry_to_node(node); 4941456a439SMatthew Wilcox *maxindex = node_maxindex(node); 495c12e51b0SMatthew Wilcox return node->shift + RADIX_TREE_MAP_SHIFT; 4961456a439SMatthew Wilcox } 4971456a439SMatthew Wilcox 4981456a439SMatthew Wilcox *maxindex = 0; 4991456a439SMatthew Wilcox return 0; 5001456a439SMatthew Wilcox } 5011456a439SMatthew Wilcox 5021da177e4SLinus Torvalds /* 5031da177e4SLinus Torvalds * Extend a radix tree so it can store key @index. 5041da177e4SLinus Torvalds */ 5050a835c4fSMatthew Wilcox static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 506d0891265SMatthew Wilcox unsigned long index, unsigned int shift) 5071da177e4SLinus Torvalds { 508d7b62727SMatthew Wilcox void *entry; 509d0891265SMatthew Wilcox unsigned int maxshift; 5101da177e4SLinus Torvalds int tag; 5111da177e4SLinus Torvalds 512d0891265SMatthew Wilcox /* Figure out what the shift should be. */ 513d0891265SMatthew Wilcox maxshift = shift; 514d0891265SMatthew Wilcox while (index > shift_maxindex(maxshift)) 515d0891265SMatthew Wilcox maxshift += RADIX_TREE_MAP_SHIFT; 5161da177e4SLinus Torvalds 517f8d5d0ccSMatthew Wilcox entry = rcu_dereference_raw(root->xa_head); 518d7b62727SMatthew Wilcox if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 5191da177e4SLinus Torvalds goto out; 5201da177e4SLinus Torvalds 5211da177e4SLinus Torvalds do { 5220a835c4fSMatthew Wilcox struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 523d58275bcSMatthew Wilcox root, shift, 0, 1, 0); 5242fcd9005SMatthew Wilcox if (!node) 5251da177e4SLinus Torvalds return -ENOMEM; 5261da177e4SLinus Torvalds 5270a835c4fSMatthew Wilcox if (is_idr(root)) { 5280a835c4fSMatthew Wilcox all_tag_set(node, IDR_FREE); 5290a835c4fSMatthew Wilcox if (!root_tag_get(root, IDR_FREE)) { 5300a835c4fSMatthew Wilcox tag_clear(node, IDR_FREE, 0); 5310a835c4fSMatthew Wilcox root_tag_set(root, IDR_FREE); 5320a835c4fSMatthew Wilcox } 5330a835c4fSMatthew Wilcox } else { 5340a835c4fSMatthew Wilcox /* Propagate the aggregated tag info to the new child */ 535daff89f3SJonathan Corbet for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 536612d6c19SNick Piggin if (root_tag_get(root, tag)) 5371da177e4SLinus Torvalds tag_set(node, tag, 0); 5381da177e4SLinus Torvalds } 5390a835c4fSMatthew Wilcox } 5401da177e4SLinus Torvalds 541d0891265SMatthew Wilcox BUG_ON(shift > BITS_PER_LONG); 542d7b62727SMatthew Wilcox if (radix_tree_is_internal_node(entry)) { 543d7b62727SMatthew Wilcox entry_to_node(entry)->parent = node; 5443159f943SMatthew Wilcox } else if (xa_is_value(entry)) { 54501959dfeSMatthew Wilcox /* Moving a value entry root->xa_head to a node */ 54601959dfeSMatthew Wilcox node->nr_values = 1; 547f7942430SJohannes Weiner } 548d7b62727SMatthew Wilcox /* 549d7b62727SMatthew Wilcox * entry was already in the radix tree, so we do not need 550d7b62727SMatthew Wilcox * rcu_assign_pointer here 551d7b62727SMatthew Wilcox */ 552d7b62727SMatthew Wilcox node->slots[0] = (void __rcu *)entry; 553d7b62727SMatthew Wilcox entry = node_to_entry(node); 554f8d5d0ccSMatthew Wilcox rcu_assign_pointer(root->xa_head, entry); 555d0891265SMatthew Wilcox shift += RADIX_TREE_MAP_SHIFT; 556d0891265SMatthew Wilcox } while (shift <= maxshift); 5571da177e4SLinus Torvalds out: 558d0891265SMatthew Wilcox return maxshift + RADIX_TREE_MAP_SHIFT; 5591da177e4SLinus Torvalds } 5601da177e4SLinus Torvalds 5611da177e4SLinus Torvalds /** 562f4b109c6SJohannes Weiner * radix_tree_shrink - shrink radix tree to minimum height 563f4b109c6SJohannes Weiner * @root radix tree root 564f4b109c6SJohannes Weiner */ 565*1cf56f9dSMatthew Wilcox static inline bool radix_tree_shrink(struct radix_tree_root *root) 566f4b109c6SJohannes Weiner { 5670ac398efSMatthew Wilcox bool shrunk = false; 5680ac398efSMatthew Wilcox 569f4b109c6SJohannes Weiner for (;;) { 570f8d5d0ccSMatthew Wilcox struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 571f4b109c6SJohannes Weiner struct radix_tree_node *child; 572f4b109c6SJohannes Weiner 573f4b109c6SJohannes Weiner if (!radix_tree_is_internal_node(node)) 574f4b109c6SJohannes Weiner break; 575f4b109c6SJohannes Weiner node = entry_to_node(node); 576f4b109c6SJohannes Weiner 577f4b109c6SJohannes Weiner /* 578f4b109c6SJohannes Weiner * The candidate node has more than one child, or its child 579f4b109c6SJohannes Weiner * is not at the leftmost slot, or the child is a multiorder 580f4b109c6SJohannes Weiner * entry, we cannot shrink. 581f4b109c6SJohannes Weiner */ 582f4b109c6SJohannes Weiner if (node->count != 1) 583f4b109c6SJohannes Weiner break; 58412320d0fSMatthew Wilcox child = rcu_dereference_raw(node->slots[0]); 585f4b109c6SJohannes Weiner if (!child) 586f4b109c6SJohannes Weiner break; 587f4b109c6SJohannes Weiner if (!radix_tree_is_internal_node(child) && node->shift) 588f4b109c6SJohannes Weiner break; 589f4b109c6SJohannes Weiner 59066ee620fSMatthew Wilcox /* 59166ee620fSMatthew Wilcox * For an IDR, we must not shrink entry 0 into the root in 59266ee620fSMatthew Wilcox * case somebody calls idr_replace() with a pointer that 59366ee620fSMatthew Wilcox * appears to be an internal entry 59466ee620fSMatthew Wilcox */ 59566ee620fSMatthew Wilcox if (!node->shift && is_idr(root)) 59666ee620fSMatthew Wilcox break; 59766ee620fSMatthew Wilcox 598f4b109c6SJohannes Weiner if (radix_tree_is_internal_node(child)) 599f4b109c6SJohannes Weiner entry_to_node(child)->parent = NULL; 600f4b109c6SJohannes Weiner 601f4b109c6SJohannes Weiner /* 602f4b109c6SJohannes Weiner * We don't need rcu_assign_pointer(), since we are simply 603f4b109c6SJohannes Weiner * moving the node from one part of the tree to another: if it 604f4b109c6SJohannes Weiner * was safe to dereference the old pointer to it 605f4b109c6SJohannes Weiner * (node->slots[0]), it will be safe to dereference the new 606f8d5d0ccSMatthew Wilcox * one (root->xa_head) as far as dependent read barriers go. 607f4b109c6SJohannes Weiner */ 608f8d5d0ccSMatthew Wilcox root->xa_head = (void __rcu *)child; 6090a835c4fSMatthew Wilcox if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 6100a835c4fSMatthew Wilcox root_tag_clear(root, IDR_FREE); 611f4b109c6SJohannes Weiner 612f4b109c6SJohannes Weiner /* 613f4b109c6SJohannes Weiner * We have a dilemma here. The node's slot[0] must not be 614f4b109c6SJohannes Weiner * NULLed in case there are concurrent lookups expecting to 615f4b109c6SJohannes Weiner * find the item. However if this was a bottom-level node, 616f4b109c6SJohannes Weiner * then it may be subject to the slot pointer being visible 617f4b109c6SJohannes Weiner * to callers dereferencing it. If item corresponding to 618f4b109c6SJohannes Weiner * slot[0] is subsequently deleted, these callers would expect 619f4b109c6SJohannes Weiner * their slot to become empty sooner or later. 620f4b109c6SJohannes Weiner * 621f4b109c6SJohannes Weiner * For example, lockless pagecache will look up a slot, deref 622f4b109c6SJohannes Weiner * the page pointer, and if the page has 0 refcount it means it 623f4b109c6SJohannes Weiner * was concurrently deleted from pagecache so try the deref 624f4b109c6SJohannes Weiner * again. Fortunately there is already a requirement for logic 625f4b109c6SJohannes Weiner * to retry the entire slot lookup -- the indirect pointer 626f4b109c6SJohannes Weiner * problem (replacing direct root node with an indirect pointer 627f4b109c6SJohannes Weiner * also results in a stale slot). So tag the slot as indirect 628f4b109c6SJohannes Weiner * to force callers to retry. 629f4b109c6SJohannes Weiner */ 6304d693d08SJohannes Weiner node->count = 0; 6314d693d08SJohannes Weiner if (!radix_tree_is_internal_node(child)) { 632d7b62727SMatthew Wilcox node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 6334d693d08SJohannes Weiner } 634f4b109c6SJohannes Weiner 635ea07b862SJohannes Weiner WARN_ON_ONCE(!list_empty(&node->private_list)); 636f4b109c6SJohannes Weiner radix_tree_node_free(node); 6370ac398efSMatthew Wilcox shrunk = true; 638f4b109c6SJohannes Weiner } 639f4b109c6SJohannes Weiner 6400ac398efSMatthew Wilcox return shrunk; 6410ac398efSMatthew Wilcox } 6420ac398efSMatthew Wilcox 6430ac398efSMatthew Wilcox static bool delete_node(struct radix_tree_root *root, 644*1cf56f9dSMatthew Wilcox struct radix_tree_node *node) 645f4b109c6SJohannes Weiner { 6460ac398efSMatthew Wilcox bool deleted = false; 6470ac398efSMatthew Wilcox 648f4b109c6SJohannes Weiner do { 649f4b109c6SJohannes Weiner struct radix_tree_node *parent; 650f4b109c6SJohannes Weiner 651f4b109c6SJohannes Weiner if (node->count) { 65212320d0fSMatthew Wilcox if (node_to_entry(node) == 653f8d5d0ccSMatthew Wilcox rcu_dereference_raw(root->xa_head)) 654*1cf56f9dSMatthew Wilcox deleted |= radix_tree_shrink(root); 6550ac398efSMatthew Wilcox return deleted; 656f4b109c6SJohannes Weiner } 657f4b109c6SJohannes Weiner 658f4b109c6SJohannes Weiner parent = node->parent; 659f4b109c6SJohannes Weiner if (parent) { 660f4b109c6SJohannes Weiner parent->slots[node->offset] = NULL; 661f4b109c6SJohannes Weiner parent->count--; 662f4b109c6SJohannes Weiner } else { 6630a835c4fSMatthew Wilcox /* 6640a835c4fSMatthew Wilcox * Shouldn't the tags already have all been cleared 6650a835c4fSMatthew Wilcox * by the caller? 6660a835c4fSMatthew Wilcox */ 6670a835c4fSMatthew Wilcox if (!is_idr(root)) 668f4b109c6SJohannes Weiner root_tag_clear_all(root); 669f8d5d0ccSMatthew Wilcox root->xa_head = NULL; 670f4b109c6SJohannes Weiner } 671f4b109c6SJohannes Weiner 672ea07b862SJohannes Weiner WARN_ON_ONCE(!list_empty(&node->private_list)); 673f4b109c6SJohannes Weiner radix_tree_node_free(node); 6740ac398efSMatthew Wilcox deleted = true; 675f4b109c6SJohannes Weiner 676f4b109c6SJohannes Weiner node = parent; 677f4b109c6SJohannes Weiner } while (node); 6780ac398efSMatthew Wilcox 6790ac398efSMatthew Wilcox return deleted; 680f4b109c6SJohannes Weiner } 681f4b109c6SJohannes Weiner 682f4b109c6SJohannes Weiner /** 683139e5616SJohannes Weiner * __radix_tree_create - create a slot in a radix tree 6841da177e4SLinus Torvalds * @root: radix tree root 6851da177e4SLinus Torvalds * @index: index key 686e6145236SMatthew Wilcox * @order: index occupies 2^order aligned slots 687139e5616SJohannes Weiner * @nodep: returns node 688139e5616SJohannes Weiner * @slotp: returns slot 6891da177e4SLinus Torvalds * 690139e5616SJohannes Weiner * Create, if necessary, and return the node and slot for an item 691139e5616SJohannes Weiner * at position @index in the radix tree @root. 692139e5616SJohannes Weiner * 693139e5616SJohannes Weiner * Until there is more than one item in the tree, no nodes are 694f8d5d0ccSMatthew Wilcox * allocated and @root->xa_head is used as a direct slot instead of 695139e5616SJohannes Weiner * pointing to a node, in which case *@nodep will be NULL. 696139e5616SJohannes Weiner * 697139e5616SJohannes Weiner * Returns -ENOMEM, or 0 for success. 6981da177e4SLinus Torvalds */ 69974d60958SMatthew Wilcox static int __radix_tree_create(struct radix_tree_root *root, 70074d60958SMatthew Wilcox unsigned long index, unsigned order, 70174d60958SMatthew Wilcox struct radix_tree_node **nodep, void __rcu ***slotp) 7021da177e4SLinus Torvalds { 70389148aa4SMatthew Wilcox struct radix_tree_node *node = NULL, *child; 704f8d5d0ccSMatthew Wilcox void __rcu **slot = (void __rcu **)&root->xa_head; 70549ea6ebcSMatthew Wilcox unsigned long maxindex; 70689148aa4SMatthew Wilcox unsigned int shift, offset = 0; 70749ea6ebcSMatthew Wilcox unsigned long max = index | ((1UL << order) - 1); 7080a835c4fSMatthew Wilcox gfp_t gfp = root_gfp_mask(root); 70949ea6ebcSMatthew Wilcox 71089148aa4SMatthew Wilcox shift = radix_tree_load_root(root, &child, &maxindex); 7111da177e4SLinus Torvalds 7121da177e4SLinus Torvalds /* Make sure the tree is high enough. */ 713175542f5SMatthew Wilcox if (order > 0 && max == ((1UL << order) - 1)) 714175542f5SMatthew Wilcox max++; 71549ea6ebcSMatthew Wilcox if (max > maxindex) { 7160a835c4fSMatthew Wilcox int error = radix_tree_extend(root, gfp, max, shift); 71749ea6ebcSMatthew Wilcox if (error < 0) 7181da177e4SLinus Torvalds return error; 71949ea6ebcSMatthew Wilcox shift = error; 720f8d5d0ccSMatthew Wilcox child = rcu_dereference_raw(root->xa_head); 7211da177e4SLinus Torvalds } 7221da177e4SLinus Torvalds 723e6145236SMatthew Wilcox while (shift > order) { 724c12e51b0SMatthew Wilcox shift -= RADIX_TREE_MAP_SHIFT; 72589148aa4SMatthew Wilcox if (child == NULL) { 7261da177e4SLinus Torvalds /* Have to add a child node. */ 727d58275bcSMatthew Wilcox child = radix_tree_node_alloc(gfp, node, root, shift, 728e8de4340SMatthew Wilcox offset, 0, 0); 72989148aa4SMatthew Wilcox if (!child) 7301da177e4SLinus Torvalds return -ENOMEM; 73189148aa4SMatthew Wilcox rcu_assign_pointer(*slot, node_to_entry(child)); 73289148aa4SMatthew Wilcox if (node) 7331da177e4SLinus Torvalds node->count++; 73489148aa4SMatthew Wilcox } else if (!radix_tree_is_internal_node(child)) 735e6145236SMatthew Wilcox break; 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds /* Go a level down */ 73889148aa4SMatthew Wilcox node = entry_to_node(child); 7399e85d811SMatthew Wilcox offset = radix_tree_descend(node, &child, index); 74089148aa4SMatthew Wilcox slot = &node->slots[offset]; 741e6145236SMatthew Wilcox } 742e6145236SMatthew Wilcox 743139e5616SJohannes Weiner if (nodep) 744139e5616SJohannes Weiner *nodep = node; 745139e5616SJohannes Weiner if (slotp) 74689148aa4SMatthew Wilcox *slotp = slot; 747139e5616SJohannes Weiner return 0; 748139e5616SJohannes Weiner } 749139e5616SJohannes Weiner 750175542f5SMatthew Wilcox /* 751175542f5SMatthew Wilcox * Free any nodes below this node. The tree is presumed to not need 752175542f5SMatthew Wilcox * shrinking, and any user data in the tree is presumed to not need a 753175542f5SMatthew Wilcox * destructor called on it. If we need to add a destructor, we can 754175542f5SMatthew Wilcox * add that functionality later. Note that we may not clear tags or 755175542f5SMatthew Wilcox * slots from the tree as an RCU walker may still have a pointer into 756175542f5SMatthew Wilcox * this subtree. We could replace the entries with RADIX_TREE_RETRY, 757175542f5SMatthew Wilcox * but we'll still have to clear those in rcu_free. 758175542f5SMatthew Wilcox */ 759175542f5SMatthew Wilcox static void radix_tree_free_nodes(struct radix_tree_node *node) 760175542f5SMatthew Wilcox { 761175542f5SMatthew Wilcox unsigned offset = 0; 762175542f5SMatthew Wilcox struct radix_tree_node *child = entry_to_node(node); 763175542f5SMatthew Wilcox 764175542f5SMatthew Wilcox for (;;) { 76512320d0fSMatthew Wilcox void *entry = rcu_dereference_raw(child->slots[offset]); 76602c02bf1SMatthew Wilcox if (xa_is_node(entry) && child->shift) { 767175542f5SMatthew Wilcox child = entry_to_node(entry); 768175542f5SMatthew Wilcox offset = 0; 769175542f5SMatthew Wilcox continue; 770175542f5SMatthew Wilcox } 771175542f5SMatthew Wilcox offset++; 772175542f5SMatthew Wilcox while (offset == RADIX_TREE_MAP_SIZE) { 773175542f5SMatthew Wilcox struct radix_tree_node *old = child; 774175542f5SMatthew Wilcox offset = child->offset + 1; 775175542f5SMatthew Wilcox child = child->parent; 776dd040b6fSMatthew Wilcox WARN_ON_ONCE(!list_empty(&old->private_list)); 777175542f5SMatthew Wilcox radix_tree_node_free(old); 778175542f5SMatthew Wilcox if (old == entry_to_node(node)) 779175542f5SMatthew Wilcox return; 780175542f5SMatthew Wilcox } 781175542f5SMatthew Wilcox } 782175542f5SMatthew Wilcox } 783175542f5SMatthew Wilcox 7840a835c4fSMatthew Wilcox #ifdef CONFIG_RADIX_TREE_MULTIORDER 785d7b62727SMatthew Wilcox static inline int insert_entries(struct radix_tree_node *node, 786d7b62727SMatthew Wilcox void __rcu **slot, void *item, unsigned order, bool replace) 787175542f5SMatthew Wilcox { 78802c02bf1SMatthew Wilcox void *sibling; 789175542f5SMatthew Wilcox unsigned i, n, tag, offset, tags = 0; 790175542f5SMatthew Wilcox 791175542f5SMatthew Wilcox if (node) { 792e157b555SMatthew Wilcox if (order > node->shift) 793175542f5SMatthew Wilcox n = 1 << (order - node->shift); 794e157b555SMatthew Wilcox else 795e157b555SMatthew Wilcox n = 1; 796175542f5SMatthew Wilcox offset = get_slot_offset(node, slot); 797175542f5SMatthew Wilcox } else { 798175542f5SMatthew Wilcox n = 1; 799175542f5SMatthew Wilcox offset = 0; 800175542f5SMatthew Wilcox } 801175542f5SMatthew Wilcox 802175542f5SMatthew Wilcox if (n > 1) { 803175542f5SMatthew Wilcox offset = offset & ~(n - 1); 804175542f5SMatthew Wilcox slot = &node->slots[offset]; 805175542f5SMatthew Wilcox } 80602c02bf1SMatthew Wilcox sibling = xa_mk_sibling(offset); 807175542f5SMatthew Wilcox 808175542f5SMatthew Wilcox for (i = 0; i < n; i++) { 809175542f5SMatthew Wilcox if (slot[i]) { 810175542f5SMatthew Wilcox if (replace) { 811175542f5SMatthew Wilcox node->count--; 812175542f5SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 813175542f5SMatthew Wilcox if (tag_get(node, tag, offset + i)) 814175542f5SMatthew Wilcox tags |= 1 << tag; 815175542f5SMatthew Wilcox } else 816175542f5SMatthew Wilcox return -EEXIST; 817175542f5SMatthew Wilcox } 818175542f5SMatthew Wilcox } 819175542f5SMatthew Wilcox 820175542f5SMatthew Wilcox for (i = 0; i < n; i++) { 82112320d0fSMatthew Wilcox struct radix_tree_node *old = rcu_dereference_raw(slot[i]); 822175542f5SMatthew Wilcox if (i) { 82302c02bf1SMatthew Wilcox rcu_assign_pointer(slot[i], sibling); 824175542f5SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 825175542f5SMatthew Wilcox if (tags & (1 << tag)) 826175542f5SMatthew Wilcox tag_clear(node, tag, offset + i); 827175542f5SMatthew Wilcox } else { 828175542f5SMatthew Wilcox rcu_assign_pointer(slot[i], item); 829175542f5SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 830175542f5SMatthew Wilcox if (tags & (1 << tag)) 831175542f5SMatthew Wilcox tag_set(node, tag, offset); 832175542f5SMatthew Wilcox } 83302c02bf1SMatthew Wilcox if (xa_is_node(old)) 834175542f5SMatthew Wilcox radix_tree_free_nodes(old); 8353159f943SMatthew Wilcox if (xa_is_value(old)) 83601959dfeSMatthew Wilcox node->nr_values--; 837175542f5SMatthew Wilcox } 838175542f5SMatthew Wilcox if (node) { 839175542f5SMatthew Wilcox node->count += n; 8403159f943SMatthew Wilcox if (xa_is_value(item)) 84101959dfeSMatthew Wilcox node->nr_values += n; 842175542f5SMatthew Wilcox } 843175542f5SMatthew Wilcox return n; 844175542f5SMatthew Wilcox } 845175542f5SMatthew Wilcox #else 846d7b62727SMatthew Wilcox static inline int insert_entries(struct radix_tree_node *node, 847d7b62727SMatthew Wilcox void __rcu **slot, void *item, unsigned order, bool replace) 848175542f5SMatthew Wilcox { 849175542f5SMatthew Wilcox if (*slot) 850175542f5SMatthew Wilcox return -EEXIST; 851175542f5SMatthew Wilcox rcu_assign_pointer(*slot, item); 852175542f5SMatthew Wilcox if (node) { 853175542f5SMatthew Wilcox node->count++; 8543159f943SMatthew Wilcox if (xa_is_value(item)) 85501959dfeSMatthew Wilcox node->nr_values++; 856175542f5SMatthew Wilcox } 857175542f5SMatthew Wilcox return 1; 858175542f5SMatthew Wilcox } 859175542f5SMatthew Wilcox #endif 860175542f5SMatthew Wilcox 861139e5616SJohannes Weiner /** 862e6145236SMatthew Wilcox * __radix_tree_insert - insert into a radix tree 863139e5616SJohannes Weiner * @root: radix tree root 864139e5616SJohannes Weiner * @index: index key 865e6145236SMatthew Wilcox * @order: key covers the 2^order indices around index 866139e5616SJohannes Weiner * @item: item to insert 867139e5616SJohannes Weiner * 868139e5616SJohannes Weiner * Insert an item into the radix tree at position @index. 869139e5616SJohannes Weiner */ 870e6145236SMatthew Wilcox int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, 871e6145236SMatthew Wilcox unsigned order, void *item) 872139e5616SJohannes Weiner { 873139e5616SJohannes Weiner struct radix_tree_node *node; 874d7b62727SMatthew Wilcox void __rcu **slot; 875139e5616SJohannes Weiner int error; 876139e5616SJohannes Weiner 877b194d16cSMatthew Wilcox BUG_ON(radix_tree_is_internal_node(item)); 878139e5616SJohannes Weiner 879e6145236SMatthew Wilcox error = __radix_tree_create(root, index, order, &node, &slot); 880139e5616SJohannes Weiner if (error) 881139e5616SJohannes Weiner return error; 882175542f5SMatthew Wilcox 883175542f5SMatthew Wilcox error = insert_entries(node, slot, item, order, false); 884175542f5SMatthew Wilcox if (error < 0) 885175542f5SMatthew Wilcox return error; 886201b6264SChristoph Lameter 887612d6c19SNick Piggin if (node) { 8887b60e9adSMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 8897b60e9adSMatthew Wilcox BUG_ON(tag_get(node, 0, offset)); 8907b60e9adSMatthew Wilcox BUG_ON(tag_get(node, 1, offset)); 8917b60e9adSMatthew Wilcox BUG_ON(tag_get(node, 2, offset)); 892612d6c19SNick Piggin } else { 8937b60e9adSMatthew Wilcox BUG_ON(root_tags_get(root)); 894612d6c19SNick Piggin } 8951da177e4SLinus Torvalds 8961da177e4SLinus Torvalds return 0; 8971da177e4SLinus Torvalds } 898e6145236SMatthew Wilcox EXPORT_SYMBOL(__radix_tree_insert); 8991da177e4SLinus Torvalds 900139e5616SJohannes Weiner /** 901139e5616SJohannes Weiner * __radix_tree_lookup - lookup an item in a radix tree 902139e5616SJohannes Weiner * @root: radix tree root 903139e5616SJohannes Weiner * @index: index key 904139e5616SJohannes Weiner * @nodep: returns node 905139e5616SJohannes Weiner * @slotp: returns slot 906139e5616SJohannes Weiner * 907139e5616SJohannes Weiner * Lookup and return the item at position @index in the radix 908139e5616SJohannes Weiner * tree @root. 909139e5616SJohannes Weiner * 910139e5616SJohannes Weiner * Until there is more than one item in the tree, no nodes are 911f8d5d0ccSMatthew Wilcox * allocated and @root->xa_head is used as a direct slot instead of 912139e5616SJohannes Weiner * pointing to a node, in which case *@nodep will be NULL. 913a4331366SHans Reiser */ 91435534c86SMatthew Wilcox void *__radix_tree_lookup(const struct radix_tree_root *root, 91535534c86SMatthew Wilcox unsigned long index, struct radix_tree_node **nodep, 916d7b62727SMatthew Wilcox void __rcu ***slotp) 917a4331366SHans Reiser { 918139e5616SJohannes Weiner struct radix_tree_node *node, *parent; 91985829954SMatthew Wilcox unsigned long maxindex; 920d7b62727SMatthew Wilcox void __rcu **slot; 9217cf9c2c7SNick Piggin 92285829954SMatthew Wilcox restart: 92385829954SMatthew Wilcox parent = NULL; 924f8d5d0ccSMatthew Wilcox slot = (void __rcu **)&root->xa_head; 9259e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 92685829954SMatthew Wilcox if (index > maxindex) 9277cf9c2c7SNick Piggin return NULL; 9287cf9c2c7SNick Piggin 929b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 93085829954SMatthew Wilcox unsigned offset; 931139e5616SJohannes Weiner 93285829954SMatthew Wilcox if (node == RADIX_TREE_RETRY) 93385829954SMatthew Wilcox goto restart; 9344dd6c098SMatthew Wilcox parent = entry_to_node(node); 9359e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 93685829954SMatthew Wilcox slot = parent->slots + offset; 93766ee620fSMatthew Wilcox if (parent->shift == 0) 93866ee620fSMatthew Wilcox break; 93985829954SMatthew Wilcox } 9407cf9c2c7SNick Piggin 941139e5616SJohannes Weiner if (nodep) 942139e5616SJohannes Weiner *nodep = parent; 943139e5616SJohannes Weiner if (slotp) 944139e5616SJohannes Weiner *slotp = slot; 945139e5616SJohannes Weiner return node; 946b72b71c6SHuang Shijie } 947b72b71c6SHuang Shijie 948b72b71c6SHuang Shijie /** 949b72b71c6SHuang Shijie * radix_tree_lookup_slot - lookup a slot in a radix tree 950b72b71c6SHuang Shijie * @root: radix tree root 951b72b71c6SHuang Shijie * @index: index key 952b72b71c6SHuang Shijie * 953b72b71c6SHuang Shijie * Returns: the slot corresponding to the position @index in the 954b72b71c6SHuang Shijie * radix tree @root. This is useful for update-if-exists operations. 955b72b71c6SHuang Shijie * 956b72b71c6SHuang Shijie * This function can be called under rcu_read_lock iff the slot is not 957b72b71c6SHuang Shijie * modified by radix_tree_replace_slot, otherwise it must be called 958b72b71c6SHuang Shijie * exclusive from other writers. Any dereference of the slot must be done 959b72b71c6SHuang Shijie * using radix_tree_deref_slot. 960b72b71c6SHuang Shijie */ 961d7b62727SMatthew Wilcox void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 96235534c86SMatthew Wilcox unsigned long index) 963b72b71c6SHuang Shijie { 964d7b62727SMatthew Wilcox void __rcu **slot; 965139e5616SJohannes Weiner 966139e5616SJohannes Weiner if (!__radix_tree_lookup(root, index, NULL, &slot)) 967139e5616SJohannes Weiner return NULL; 968139e5616SJohannes Weiner return slot; 969a4331366SHans Reiser } 970a4331366SHans Reiser EXPORT_SYMBOL(radix_tree_lookup_slot); 971a4331366SHans Reiser 9721da177e4SLinus Torvalds /** 9731da177e4SLinus Torvalds * radix_tree_lookup - perform lookup operation on a radix tree 9741da177e4SLinus Torvalds * @root: radix tree root 9751da177e4SLinus Torvalds * @index: index key 9761da177e4SLinus Torvalds * 9771da177e4SLinus Torvalds * Lookup the item at the position @index in the radix tree @root. 9787cf9c2c7SNick Piggin * 9797cf9c2c7SNick Piggin * This function can be called under rcu_read_lock, however the caller 9807cf9c2c7SNick Piggin * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 9817cf9c2c7SNick Piggin * them safely). No RCU barriers are required to access or modify the 9827cf9c2c7SNick Piggin * returned item, however. 9831da177e4SLinus Torvalds */ 98435534c86SMatthew Wilcox void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 9851da177e4SLinus Torvalds { 986139e5616SJohannes Weiner return __radix_tree_lookup(root, index, NULL, NULL); 9871da177e4SLinus Torvalds } 9881da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_lookup); 9891da177e4SLinus Torvalds 9900a835c4fSMatthew Wilcox static inline void replace_sibling_entries(struct radix_tree_node *node, 99101959dfeSMatthew Wilcox void __rcu **slot, int count, int values) 992a90eb3a2SMatthew Wilcox { 993a90eb3a2SMatthew Wilcox #ifdef CONFIG_RADIX_TREE_MULTIORDER 99402c02bf1SMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 99502c02bf1SMatthew Wilcox void *ptr = xa_mk_sibling(offset); 996a90eb3a2SMatthew Wilcox 99702c02bf1SMatthew Wilcox while (++offset < RADIX_TREE_MAP_SIZE) { 99812320d0fSMatthew Wilcox if (rcu_dereference_raw(node->slots[offset]) != ptr) 999a90eb3a2SMatthew Wilcox break; 10000a835c4fSMatthew Wilcox if (count < 0) { 10010a835c4fSMatthew Wilcox node->slots[offset] = NULL; 10020a835c4fSMatthew Wilcox node->count--; 10030a835c4fSMatthew Wilcox } 100401959dfeSMatthew Wilcox node->nr_values += values; 1005a90eb3a2SMatthew Wilcox } 1006a90eb3a2SMatthew Wilcox #endif 1007a90eb3a2SMatthew Wilcox } 1008a90eb3a2SMatthew Wilcox 1009d7b62727SMatthew Wilcox static void replace_slot(void __rcu **slot, void *item, 101001959dfeSMatthew Wilcox struct radix_tree_node *node, int count, int values) 10116d75f366SJohannes Weiner { 101201959dfeSMatthew Wilcox if (node && (count || values)) { 1013f4b109c6SJohannes Weiner node->count += count; 101401959dfeSMatthew Wilcox node->nr_values += values; 101501959dfeSMatthew Wilcox replace_sibling_entries(node, slot, count, values); 1016a90eb3a2SMatthew Wilcox } 10176d75f366SJohannes Weiner 10186d75f366SJohannes Weiner rcu_assign_pointer(*slot, item); 10196d75f366SJohannes Weiner } 10206d75f366SJohannes Weiner 10210a835c4fSMatthew Wilcox static bool node_tag_get(const struct radix_tree_root *root, 10220a835c4fSMatthew Wilcox const struct radix_tree_node *node, 10230a835c4fSMatthew Wilcox unsigned int tag, unsigned int offset) 1024a90eb3a2SMatthew Wilcox { 10250a835c4fSMatthew Wilcox if (node) 10260a835c4fSMatthew Wilcox return tag_get(node, tag, offset); 10270a835c4fSMatthew Wilcox return root_tag_get(root, tag); 1028a90eb3a2SMatthew Wilcox } 10290a835c4fSMatthew Wilcox 10300a835c4fSMatthew Wilcox /* 10310a835c4fSMatthew Wilcox * IDR users want to be able to store NULL in the tree, so if the slot isn't 10320a835c4fSMatthew Wilcox * free, don't adjust the count, even if it's transitioning between NULL and 10330a835c4fSMatthew Wilcox * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 10340a835c4fSMatthew Wilcox * have empty bits, but it only stores NULL in slots when they're being 10350a835c4fSMatthew Wilcox * deleted. 10360a835c4fSMatthew Wilcox */ 10370a835c4fSMatthew Wilcox static int calculate_count(struct radix_tree_root *root, 1038d7b62727SMatthew Wilcox struct radix_tree_node *node, void __rcu **slot, 10390a835c4fSMatthew Wilcox void *item, void *old) 10400a835c4fSMatthew Wilcox { 10410a835c4fSMatthew Wilcox if (is_idr(root)) { 10420a835c4fSMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 10430a835c4fSMatthew Wilcox bool free = node_tag_get(root, node, IDR_FREE, offset); 10440a835c4fSMatthew Wilcox if (!free) 10450a835c4fSMatthew Wilcox return 0; 10460a835c4fSMatthew Wilcox if (!old) 10470a835c4fSMatthew Wilcox return 1; 10480a835c4fSMatthew Wilcox } 10490a835c4fSMatthew Wilcox return !!item - !!old; 1050a90eb3a2SMatthew Wilcox } 1051a90eb3a2SMatthew Wilcox 10521da177e4SLinus Torvalds /** 1053f7942430SJohannes Weiner * __radix_tree_replace - replace item in a slot 1054f7942430SJohannes Weiner * @root: radix tree root 1055f7942430SJohannes Weiner * @node: pointer to tree node 1056f7942430SJohannes Weiner * @slot: pointer to slot in @node 1057f7942430SJohannes Weiner * @item: new item to store in the slot. 1058f7942430SJohannes Weiner * 1059f7942430SJohannes Weiner * For use with __radix_tree_lookup(). Caller must hold tree write locked 1060f7942430SJohannes Weiner * across slot lookup and replacement. 1061f7942430SJohannes Weiner */ 1062f7942430SJohannes Weiner void __radix_tree_replace(struct radix_tree_root *root, 1063f7942430SJohannes Weiner struct radix_tree_node *node, 1064*1cf56f9dSMatthew Wilcox void __rcu **slot, void *item) 1065f7942430SJohannes Weiner { 10660a835c4fSMatthew Wilcox void *old = rcu_dereference_raw(*slot); 106701959dfeSMatthew Wilcox int values = !!xa_is_value(item) - !!xa_is_value(old); 10680a835c4fSMatthew Wilcox int count = calculate_count(root, node, slot, item, old); 10690a835c4fSMatthew Wilcox 10706d75f366SJohannes Weiner /* 107101959dfeSMatthew Wilcox * This function supports replacing value entries and 1072f4b109c6SJohannes Weiner * deleting entries, but that needs accounting against the 1073f8d5d0ccSMatthew Wilcox * node unless the slot is root->xa_head. 10746d75f366SJohannes Weiner */ 1075f8d5d0ccSMatthew Wilcox WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 107601959dfeSMatthew Wilcox (count || values)); 107701959dfeSMatthew Wilcox replace_slot(slot, item, node, count, values); 1078f4b109c6SJohannes Weiner 10794d693d08SJohannes Weiner if (!node) 10804d693d08SJohannes Weiner return; 10814d693d08SJohannes Weiner 1082*1cf56f9dSMatthew Wilcox delete_node(root, node); 10836d75f366SJohannes Weiner } 1084f7942430SJohannes Weiner 10856d75f366SJohannes Weiner /** 10866d75f366SJohannes Weiner * radix_tree_replace_slot - replace item in a slot 10876d75f366SJohannes Weiner * @root: radix tree root 10886d75f366SJohannes Weiner * @slot: pointer to slot 10896d75f366SJohannes Weiner * @item: new item to store in the slot. 10906d75f366SJohannes Weiner * 10917b8d046fSMatthew Wilcox * For use with radix_tree_lookup_slot() and 10926d75f366SJohannes Weiner * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 10936d75f366SJohannes Weiner * across slot lookup and replacement. 10946d75f366SJohannes Weiner * 10956d75f366SJohannes Weiner * NOTE: This cannot be used to switch between non-entries (empty slots), 109601959dfeSMatthew Wilcox * regular entries, and value entries, as that requires accounting 1097f4b109c6SJohannes Weiner * inside the radix tree node. When switching from one type of entry or 1098e157b555SMatthew Wilcox * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 1099e157b555SMatthew Wilcox * radix_tree_iter_replace(). 11006d75f366SJohannes Weiner */ 11016d75f366SJohannes Weiner void radix_tree_replace_slot(struct radix_tree_root *root, 1102d7b62727SMatthew Wilcox void __rcu **slot, void *item) 11036d75f366SJohannes Weiner { 1104*1cf56f9dSMatthew Wilcox __radix_tree_replace(root, NULL, slot, item); 1105f7942430SJohannes Weiner } 110610257d71SSong Liu EXPORT_SYMBOL(radix_tree_replace_slot); 1107f7942430SJohannes Weiner 1108e157b555SMatthew Wilcox /** 1109e157b555SMatthew Wilcox * radix_tree_iter_replace - replace item in a slot 1110e157b555SMatthew Wilcox * @root: radix tree root 1111e157b555SMatthew Wilcox * @slot: pointer to slot 1112e157b555SMatthew Wilcox * @item: new item to store in the slot. 1113e157b555SMatthew Wilcox * 1114e157b555SMatthew Wilcox * For use with radix_tree_split() and radix_tree_for_each_slot(). 1115e157b555SMatthew Wilcox * Caller must hold tree write locked across split and replacement. 1116e157b555SMatthew Wilcox */ 1117e157b555SMatthew Wilcox void radix_tree_iter_replace(struct radix_tree_root *root, 1118d7b62727SMatthew Wilcox const struct radix_tree_iter *iter, 1119d7b62727SMatthew Wilcox void __rcu **slot, void *item) 1120e157b555SMatthew Wilcox { 1121*1cf56f9dSMatthew Wilcox __radix_tree_replace(root, iter->node, slot, item); 1122e157b555SMatthew Wilcox } 1123e157b555SMatthew Wilcox 1124175542f5SMatthew Wilcox #ifdef CONFIG_RADIX_TREE_MULTIORDER 1125175542f5SMatthew Wilcox /** 1126175542f5SMatthew Wilcox * radix_tree_join - replace multiple entries with one multiorder entry 1127175542f5SMatthew Wilcox * @root: radix tree root 1128175542f5SMatthew Wilcox * @index: an index inside the new entry 1129175542f5SMatthew Wilcox * @order: order of the new entry 1130175542f5SMatthew Wilcox * @item: new entry 1131175542f5SMatthew Wilcox * 1132175542f5SMatthew Wilcox * Call this function to replace several entries with one larger entry. 1133175542f5SMatthew Wilcox * The existing entries are presumed to not need freeing as a result of 1134175542f5SMatthew Wilcox * this call. 1135175542f5SMatthew Wilcox * 1136175542f5SMatthew Wilcox * The replacement entry will have all the tags set on it that were set 1137175542f5SMatthew Wilcox * on any of the entries it is replacing. 1138175542f5SMatthew Wilcox */ 1139175542f5SMatthew Wilcox int radix_tree_join(struct radix_tree_root *root, unsigned long index, 1140175542f5SMatthew Wilcox unsigned order, void *item) 1141175542f5SMatthew Wilcox { 1142175542f5SMatthew Wilcox struct radix_tree_node *node; 1143d7b62727SMatthew Wilcox void __rcu **slot; 1144175542f5SMatthew Wilcox int error; 1145175542f5SMatthew Wilcox 1146175542f5SMatthew Wilcox BUG_ON(radix_tree_is_internal_node(item)); 1147175542f5SMatthew Wilcox 1148175542f5SMatthew Wilcox error = __radix_tree_create(root, index, order, &node, &slot); 1149175542f5SMatthew Wilcox if (!error) 1150175542f5SMatthew Wilcox error = insert_entries(node, slot, item, order, true); 1151175542f5SMatthew Wilcox if (error > 0) 1152175542f5SMatthew Wilcox error = 0; 1153175542f5SMatthew Wilcox 1154175542f5SMatthew Wilcox return error; 1155175542f5SMatthew Wilcox } 1156e157b555SMatthew Wilcox 1157e157b555SMatthew Wilcox /** 1158e157b555SMatthew Wilcox * radix_tree_split - Split an entry into smaller entries 1159e157b555SMatthew Wilcox * @root: radix tree root 1160e157b555SMatthew Wilcox * @index: An index within the large entry 1161e157b555SMatthew Wilcox * @order: Order of new entries 1162e157b555SMatthew Wilcox * 1163e157b555SMatthew Wilcox * Call this function as the first step in replacing a multiorder entry 1164e157b555SMatthew Wilcox * with several entries of lower order. After this function returns, 1165e157b555SMatthew Wilcox * loop over the relevant portion of the tree using radix_tree_for_each_slot() 1166e157b555SMatthew Wilcox * and call radix_tree_iter_replace() to set up each new entry. 1167e157b555SMatthew Wilcox * 1168e157b555SMatthew Wilcox * The tags from this entry are replicated to all the new entries. 1169e157b555SMatthew Wilcox * 1170e157b555SMatthew Wilcox * The radix tree should be locked against modification during the entire 1171e157b555SMatthew Wilcox * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which 1172e157b555SMatthew Wilcox * should prompt RCU walkers to restart the lookup from the root. 1173e157b555SMatthew Wilcox */ 1174e157b555SMatthew Wilcox int radix_tree_split(struct radix_tree_root *root, unsigned long index, 1175e157b555SMatthew Wilcox unsigned order) 1176e157b555SMatthew Wilcox { 1177e157b555SMatthew Wilcox struct radix_tree_node *parent, *node, *child; 1178d7b62727SMatthew Wilcox void __rcu **slot; 1179e157b555SMatthew Wilcox unsigned int offset, end; 1180e157b555SMatthew Wilcox unsigned n, tag, tags = 0; 11810a835c4fSMatthew Wilcox gfp_t gfp = root_gfp_mask(root); 1182e157b555SMatthew Wilcox 1183e157b555SMatthew Wilcox if (!__radix_tree_lookup(root, index, &parent, &slot)) 1184e157b555SMatthew Wilcox return -ENOENT; 1185e157b555SMatthew Wilcox if (!parent) 1186e157b555SMatthew Wilcox return -ENOENT; 1187e157b555SMatthew Wilcox 1188e157b555SMatthew Wilcox offset = get_slot_offset(parent, slot); 1189e157b555SMatthew Wilcox 1190e157b555SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1191e157b555SMatthew Wilcox if (tag_get(parent, tag, offset)) 1192e157b555SMatthew Wilcox tags |= 1 << tag; 1193e157b555SMatthew Wilcox 1194e157b555SMatthew Wilcox for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { 119502c02bf1SMatthew Wilcox if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end]))) 1196e157b555SMatthew Wilcox break; 1197e157b555SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1198e157b555SMatthew Wilcox if (tags & (1 << tag)) 1199e157b555SMatthew Wilcox tag_set(parent, tag, end); 1200e157b555SMatthew Wilcox /* rcu_assign_pointer ensures tags are set before RETRY */ 1201e157b555SMatthew Wilcox rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); 1202e157b555SMatthew Wilcox } 1203e157b555SMatthew Wilcox rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); 120401959dfeSMatthew Wilcox parent->nr_values -= (end - offset); 1205e157b555SMatthew Wilcox 1206e157b555SMatthew Wilcox if (order == parent->shift) 1207e157b555SMatthew Wilcox return 0; 1208e157b555SMatthew Wilcox if (order > parent->shift) { 1209e157b555SMatthew Wilcox while (offset < end) 1210e157b555SMatthew Wilcox offset += insert_entries(parent, &parent->slots[offset], 1211e157b555SMatthew Wilcox RADIX_TREE_RETRY, order, true); 1212e157b555SMatthew Wilcox return 0; 1213e157b555SMatthew Wilcox } 1214e157b555SMatthew Wilcox 1215e157b555SMatthew Wilcox node = parent; 1216e157b555SMatthew Wilcox 1217e157b555SMatthew Wilcox for (;;) { 1218e157b555SMatthew Wilcox if (node->shift > order) { 1219d58275bcSMatthew Wilcox child = radix_tree_node_alloc(gfp, node, root, 1220e8de4340SMatthew Wilcox node->shift - RADIX_TREE_MAP_SHIFT, 1221e8de4340SMatthew Wilcox offset, 0, 0); 1222e157b555SMatthew Wilcox if (!child) 1223e157b555SMatthew Wilcox goto nomem; 1224e157b555SMatthew Wilcox if (node != parent) { 1225e157b555SMatthew Wilcox node->count++; 122612320d0fSMatthew Wilcox rcu_assign_pointer(node->slots[offset], 122712320d0fSMatthew Wilcox node_to_entry(child)); 1228e157b555SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1229e157b555SMatthew Wilcox if (tags & (1 << tag)) 1230e157b555SMatthew Wilcox tag_set(node, tag, offset); 1231e157b555SMatthew Wilcox } 1232e157b555SMatthew Wilcox 1233e157b555SMatthew Wilcox node = child; 1234e157b555SMatthew Wilcox offset = 0; 1235e157b555SMatthew Wilcox continue; 1236e157b555SMatthew Wilcox } 1237e157b555SMatthew Wilcox 1238e157b555SMatthew Wilcox n = insert_entries(node, &node->slots[offset], 1239e157b555SMatthew Wilcox RADIX_TREE_RETRY, order, false); 1240e157b555SMatthew Wilcox BUG_ON(n > RADIX_TREE_MAP_SIZE); 1241e157b555SMatthew Wilcox 1242e157b555SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1243e157b555SMatthew Wilcox if (tags & (1 << tag)) 1244e157b555SMatthew Wilcox tag_set(node, tag, offset); 1245e157b555SMatthew Wilcox offset += n; 1246e157b555SMatthew Wilcox 1247e157b555SMatthew Wilcox while (offset == RADIX_TREE_MAP_SIZE) { 1248e157b555SMatthew Wilcox if (node == parent) 1249e157b555SMatthew Wilcox break; 1250e157b555SMatthew Wilcox offset = node->offset; 1251e157b555SMatthew Wilcox child = node; 1252e157b555SMatthew Wilcox node = node->parent; 1253e157b555SMatthew Wilcox rcu_assign_pointer(node->slots[offset], 1254e157b555SMatthew Wilcox node_to_entry(child)); 1255e157b555SMatthew Wilcox offset++; 1256e157b555SMatthew Wilcox } 1257e157b555SMatthew Wilcox if ((node == parent) && (offset == end)) 1258e157b555SMatthew Wilcox return 0; 1259e157b555SMatthew Wilcox } 1260e157b555SMatthew Wilcox 1261e157b555SMatthew Wilcox nomem: 1262e157b555SMatthew Wilcox /* Shouldn't happen; did user forget to preload? */ 1263e157b555SMatthew Wilcox /* TODO: free all the allocated nodes */ 1264e157b555SMatthew Wilcox WARN_ON(1); 1265e157b555SMatthew Wilcox return -ENOMEM; 1266e157b555SMatthew Wilcox } 1267175542f5SMatthew Wilcox #endif 1268175542f5SMatthew Wilcox 126930b888baSMatthew Wilcox static void node_tag_set(struct radix_tree_root *root, 127030b888baSMatthew Wilcox struct radix_tree_node *node, 127130b888baSMatthew Wilcox unsigned int tag, unsigned int offset) 127230b888baSMatthew Wilcox { 127330b888baSMatthew Wilcox while (node) { 127430b888baSMatthew Wilcox if (tag_get(node, tag, offset)) 127530b888baSMatthew Wilcox return; 127630b888baSMatthew Wilcox tag_set(node, tag, offset); 127730b888baSMatthew Wilcox offset = node->offset; 127830b888baSMatthew Wilcox node = node->parent; 127930b888baSMatthew Wilcox } 128030b888baSMatthew Wilcox 128130b888baSMatthew Wilcox if (!root_tag_get(root, tag)) 128230b888baSMatthew Wilcox root_tag_set(root, tag); 128330b888baSMatthew Wilcox } 128430b888baSMatthew Wilcox 12851da177e4SLinus Torvalds /** 12861da177e4SLinus Torvalds * radix_tree_tag_set - set a tag on a radix tree node 12871da177e4SLinus Torvalds * @root: radix tree root 12881da177e4SLinus Torvalds * @index: index key 12891da177e4SLinus Torvalds * @tag: tag index 12901da177e4SLinus Torvalds * 1291daff89f3SJonathan Corbet * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 1292daff89f3SJonathan Corbet * corresponding to @index in the radix tree. From 12931da177e4SLinus Torvalds * the root all the way down to the leaf node. 12941da177e4SLinus Torvalds * 12951da177e4SLinus Torvalds * Returns the address of the tagged item. Setting a tag on a not-present 12961da177e4SLinus Torvalds * item is a bug. 12971da177e4SLinus Torvalds */ 12981da177e4SLinus Torvalds void *radix_tree_tag_set(struct radix_tree_root *root, 1299daff89f3SJonathan Corbet unsigned long index, unsigned int tag) 13001da177e4SLinus Torvalds { 1301fb969909SRoss Zwisler struct radix_tree_node *node, *parent; 1302fb969909SRoss Zwisler unsigned long maxindex; 13031da177e4SLinus Torvalds 13049e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 1305fb969909SRoss Zwisler BUG_ON(index > maxindex); 13061da177e4SLinus Torvalds 1307b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 1308fb969909SRoss Zwisler unsigned offset; 13091da177e4SLinus Torvalds 13104dd6c098SMatthew Wilcox parent = entry_to_node(node); 13119e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 1312fb969909SRoss Zwisler BUG_ON(!node); 1313fb969909SRoss Zwisler 1314fb969909SRoss Zwisler if (!tag_get(parent, tag, offset)) 1315fb969909SRoss Zwisler tag_set(parent, tag, offset); 13161da177e4SLinus Torvalds } 13171da177e4SLinus Torvalds 1318612d6c19SNick Piggin /* set the root's tag bit */ 1319fb969909SRoss Zwisler if (!root_tag_get(root, tag)) 1320612d6c19SNick Piggin root_tag_set(root, tag); 1321612d6c19SNick Piggin 1322fb969909SRoss Zwisler return node; 13231da177e4SLinus Torvalds } 13241da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_set); 13251da177e4SLinus Torvalds 132630b888baSMatthew Wilcox /** 132730b888baSMatthew Wilcox * radix_tree_iter_tag_set - set a tag on the current iterator entry 132830b888baSMatthew Wilcox * @root: radix tree root 132930b888baSMatthew Wilcox * @iter: iterator state 133030b888baSMatthew Wilcox * @tag: tag to set 133130b888baSMatthew Wilcox */ 133230b888baSMatthew Wilcox void radix_tree_iter_tag_set(struct radix_tree_root *root, 133330b888baSMatthew Wilcox const struct radix_tree_iter *iter, unsigned int tag) 133430b888baSMatthew Wilcox { 133530b888baSMatthew Wilcox node_tag_set(root, iter->node, tag, iter_offset(iter)); 133630b888baSMatthew Wilcox } 133730b888baSMatthew Wilcox 1338d604c324SMatthew Wilcox static void node_tag_clear(struct radix_tree_root *root, 1339d604c324SMatthew Wilcox struct radix_tree_node *node, 1340d604c324SMatthew Wilcox unsigned int tag, unsigned int offset) 1341d604c324SMatthew Wilcox { 1342d604c324SMatthew Wilcox while (node) { 1343d604c324SMatthew Wilcox if (!tag_get(node, tag, offset)) 1344d604c324SMatthew Wilcox return; 1345d604c324SMatthew Wilcox tag_clear(node, tag, offset); 1346d604c324SMatthew Wilcox if (any_tag_set(node, tag)) 1347d604c324SMatthew Wilcox return; 1348d604c324SMatthew Wilcox 1349d604c324SMatthew Wilcox offset = node->offset; 1350d604c324SMatthew Wilcox node = node->parent; 1351d604c324SMatthew Wilcox } 1352d604c324SMatthew Wilcox 1353d604c324SMatthew Wilcox /* clear the root's tag bit */ 1354d604c324SMatthew Wilcox if (root_tag_get(root, tag)) 1355d604c324SMatthew Wilcox root_tag_clear(root, tag); 1356d604c324SMatthew Wilcox } 1357d604c324SMatthew Wilcox 1358268f42deSMatthew Wilcox /** 13591da177e4SLinus Torvalds * radix_tree_tag_clear - clear a tag on a radix tree node 13601da177e4SLinus Torvalds * @root: radix tree root 13611da177e4SLinus Torvalds * @index: index key 13621da177e4SLinus Torvalds * @tag: tag index 13631da177e4SLinus Torvalds * 1364daff89f3SJonathan Corbet * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 13652fcd9005SMatthew Wilcox * corresponding to @index in the radix tree. If this causes 13662fcd9005SMatthew Wilcox * the leaf node to have no tags set then clear the tag in the 13671da177e4SLinus Torvalds * next-to-leaf node, etc. 13681da177e4SLinus Torvalds * 13691da177e4SLinus Torvalds * Returns the address of the tagged item on success, else NULL. ie: 13701da177e4SLinus Torvalds * has the same return value and semantics as radix_tree_lookup(). 13711da177e4SLinus Torvalds */ 13721da177e4SLinus Torvalds void *radix_tree_tag_clear(struct radix_tree_root *root, 1373daff89f3SJonathan Corbet unsigned long index, unsigned int tag) 13741da177e4SLinus Torvalds { 137500f47b58SRoss Zwisler struct radix_tree_node *node, *parent; 137600f47b58SRoss Zwisler unsigned long maxindex; 1377e2bdb933SHugh Dickins int uninitialized_var(offset); 13781da177e4SLinus Torvalds 13799e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 138000f47b58SRoss Zwisler if (index > maxindex) 138100f47b58SRoss Zwisler return NULL; 13821da177e4SLinus Torvalds 138300f47b58SRoss Zwisler parent = NULL; 13841da177e4SLinus Torvalds 1385b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 13864dd6c098SMatthew Wilcox parent = entry_to_node(node); 13879e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 13881da177e4SLinus Torvalds } 13891da177e4SLinus Torvalds 1390d604c324SMatthew Wilcox if (node) 1391d604c324SMatthew Wilcox node_tag_clear(root, parent, tag, offset); 13921da177e4SLinus Torvalds 139300f47b58SRoss Zwisler return node; 13941da177e4SLinus Torvalds } 13951da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_clear); 13961da177e4SLinus Torvalds 13971da177e4SLinus Torvalds /** 139830b888baSMatthew Wilcox * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 139930b888baSMatthew Wilcox * @root: radix tree root 140030b888baSMatthew Wilcox * @iter: iterator state 140130b888baSMatthew Wilcox * @tag: tag to clear 140230b888baSMatthew Wilcox */ 140330b888baSMatthew Wilcox void radix_tree_iter_tag_clear(struct radix_tree_root *root, 140430b888baSMatthew Wilcox const struct radix_tree_iter *iter, unsigned int tag) 140530b888baSMatthew Wilcox { 140630b888baSMatthew Wilcox node_tag_clear(root, iter->node, tag, iter_offset(iter)); 140730b888baSMatthew Wilcox } 140830b888baSMatthew Wilcox 140930b888baSMatthew Wilcox /** 14101da177e4SLinus Torvalds * radix_tree_tag_get - get a tag on a radix tree node 14111da177e4SLinus Torvalds * @root: radix tree root 14121da177e4SLinus Torvalds * @index: index key 1413daff89f3SJonathan Corbet * @tag: tag index (< RADIX_TREE_MAX_TAGS) 14141da177e4SLinus Torvalds * 141532605a18SMarcelo Tosatti * Return values: 14161da177e4SLinus Torvalds * 1417612d6c19SNick Piggin * 0: tag not present or not set 1418612d6c19SNick Piggin * 1: tag set 1419ce82653dSDavid Howells * 1420ce82653dSDavid Howells * Note that the return value of this function may not be relied on, even if 1421ce82653dSDavid Howells * the RCU lock is held, unless tag modification and node deletion are excluded 1422ce82653dSDavid Howells * from concurrency. 14231da177e4SLinus Torvalds */ 142435534c86SMatthew Wilcox int radix_tree_tag_get(const struct radix_tree_root *root, 1425daff89f3SJonathan Corbet unsigned long index, unsigned int tag) 14261da177e4SLinus Torvalds { 14274589ba6dSRoss Zwisler struct radix_tree_node *node, *parent; 14284589ba6dSRoss Zwisler unsigned long maxindex; 14291da177e4SLinus Torvalds 1430612d6c19SNick Piggin if (!root_tag_get(root, tag)) 1431612d6c19SNick Piggin return 0; 1432612d6c19SNick Piggin 14339e85d811SMatthew Wilcox radix_tree_load_root(root, &node, &maxindex); 14344589ba6dSRoss Zwisler if (index > maxindex) 14354589ba6dSRoss Zwisler return 0; 14367cf9c2c7SNick Piggin 1437b194d16cSMatthew Wilcox while (radix_tree_is_internal_node(node)) { 14389e85d811SMatthew Wilcox unsigned offset; 14394589ba6dSRoss Zwisler 14404dd6c098SMatthew Wilcox parent = entry_to_node(node); 14419e85d811SMatthew Wilcox offset = radix_tree_descend(parent, &node, index); 14424589ba6dSRoss Zwisler 14434589ba6dSRoss Zwisler if (!tag_get(parent, tag, offset)) 14444589ba6dSRoss Zwisler return 0; 14454589ba6dSRoss Zwisler if (node == RADIX_TREE_RETRY) 14464589ba6dSRoss Zwisler break; 14471da177e4SLinus Torvalds } 14484589ba6dSRoss Zwisler 14494589ba6dSRoss Zwisler return 1; 14501da177e4SLinus Torvalds } 14511da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_get); 14521da177e4SLinus Torvalds 145321ef5339SRoss Zwisler static inline void __set_iter_shift(struct radix_tree_iter *iter, 145421ef5339SRoss Zwisler unsigned int shift) 145521ef5339SRoss Zwisler { 145621ef5339SRoss Zwisler #ifdef CONFIG_RADIX_TREE_MULTIORDER 145721ef5339SRoss Zwisler iter->shift = shift; 145821ef5339SRoss Zwisler #endif 145921ef5339SRoss Zwisler } 146021ef5339SRoss Zwisler 1461148deab2SMatthew Wilcox /* Construct iter->tags bit-mask from node->tags[tag] array */ 1462148deab2SMatthew Wilcox static void set_iter_tags(struct radix_tree_iter *iter, 1463148deab2SMatthew Wilcox struct radix_tree_node *node, unsigned offset, 1464148deab2SMatthew Wilcox unsigned tag) 1465148deab2SMatthew Wilcox { 1466148deab2SMatthew Wilcox unsigned tag_long = offset / BITS_PER_LONG; 1467148deab2SMatthew Wilcox unsigned tag_bit = offset % BITS_PER_LONG; 1468148deab2SMatthew Wilcox 14690a835c4fSMatthew Wilcox if (!node) { 14700a835c4fSMatthew Wilcox iter->tags = 1; 14710a835c4fSMatthew Wilcox return; 14720a835c4fSMatthew Wilcox } 14730a835c4fSMatthew Wilcox 1474148deab2SMatthew Wilcox iter->tags = node->tags[tag][tag_long] >> tag_bit; 1475148deab2SMatthew Wilcox 1476148deab2SMatthew Wilcox /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1477148deab2SMatthew Wilcox if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1478148deab2SMatthew Wilcox /* Pick tags from next element */ 1479148deab2SMatthew Wilcox if (tag_bit) 1480148deab2SMatthew Wilcox iter->tags |= node->tags[tag][tag_long + 1] << 1481148deab2SMatthew Wilcox (BITS_PER_LONG - tag_bit); 1482148deab2SMatthew Wilcox /* Clip chunk size, here only BITS_PER_LONG tags */ 1483148deab2SMatthew Wilcox iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1484148deab2SMatthew Wilcox } 1485148deab2SMatthew Wilcox } 1486148deab2SMatthew Wilcox 1487148deab2SMatthew Wilcox #ifdef CONFIG_RADIX_TREE_MULTIORDER 1488d7b62727SMatthew Wilcox static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1489d7b62727SMatthew Wilcox void __rcu **slot, struct radix_tree_iter *iter) 1490148deab2SMatthew Wilcox { 1491148deab2SMatthew Wilcox while (iter->index < iter->next_index) { 1492148deab2SMatthew Wilcox *nodep = rcu_dereference_raw(*slot); 149302c02bf1SMatthew Wilcox if (*nodep && !xa_is_sibling(*nodep)) 1494148deab2SMatthew Wilcox return slot; 1495148deab2SMatthew Wilcox slot++; 1496148deab2SMatthew Wilcox iter->index = __radix_tree_iter_add(iter, 1); 1497148deab2SMatthew Wilcox iter->tags >>= 1; 1498148deab2SMatthew Wilcox } 1499148deab2SMatthew Wilcox 1500148deab2SMatthew Wilcox *nodep = NULL; 1501148deab2SMatthew Wilcox return NULL; 1502148deab2SMatthew Wilcox } 1503148deab2SMatthew Wilcox 1504d7b62727SMatthew Wilcox void __rcu **__radix_tree_next_slot(void __rcu **slot, 1505d7b62727SMatthew Wilcox struct radix_tree_iter *iter, unsigned flags) 1506148deab2SMatthew Wilcox { 1507148deab2SMatthew Wilcox unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 15089f418224SRoss Zwisler struct radix_tree_node *node; 1509148deab2SMatthew Wilcox 1510148deab2SMatthew Wilcox slot = skip_siblings(&node, slot, iter); 1511148deab2SMatthew Wilcox 1512148deab2SMatthew Wilcox while (radix_tree_is_internal_node(node)) { 1513148deab2SMatthew Wilcox unsigned offset; 1514148deab2SMatthew Wilcox unsigned long next_index; 1515148deab2SMatthew Wilcox 1516148deab2SMatthew Wilcox if (node == RADIX_TREE_RETRY) 1517148deab2SMatthew Wilcox return slot; 1518148deab2SMatthew Wilcox node = entry_to_node(node); 1519268f42deSMatthew Wilcox iter->node = node; 1520148deab2SMatthew Wilcox iter->shift = node->shift; 1521148deab2SMatthew Wilcox 1522148deab2SMatthew Wilcox if (flags & RADIX_TREE_ITER_TAGGED) { 1523148deab2SMatthew Wilcox offset = radix_tree_find_next_bit(node, tag, 0); 1524148deab2SMatthew Wilcox if (offset == RADIX_TREE_MAP_SIZE) 1525148deab2SMatthew Wilcox return NULL; 1526148deab2SMatthew Wilcox slot = &node->slots[offset]; 1527148deab2SMatthew Wilcox iter->index = __radix_tree_iter_add(iter, offset); 1528148deab2SMatthew Wilcox set_iter_tags(iter, node, offset, tag); 1529148deab2SMatthew Wilcox node = rcu_dereference_raw(*slot); 1530148deab2SMatthew Wilcox } else { 1531148deab2SMatthew Wilcox offset = 0; 1532148deab2SMatthew Wilcox slot = &node->slots[0]; 1533148deab2SMatthew Wilcox for (;;) { 1534148deab2SMatthew Wilcox node = rcu_dereference_raw(*slot); 1535148deab2SMatthew Wilcox if (node) 1536148deab2SMatthew Wilcox break; 1537148deab2SMatthew Wilcox slot++; 1538148deab2SMatthew Wilcox offset++; 1539148deab2SMatthew Wilcox if (offset == RADIX_TREE_MAP_SIZE) 1540148deab2SMatthew Wilcox return NULL; 1541148deab2SMatthew Wilcox } 1542148deab2SMatthew Wilcox iter->index = __radix_tree_iter_add(iter, offset); 1543148deab2SMatthew Wilcox } 1544148deab2SMatthew Wilcox if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) 1545148deab2SMatthew Wilcox goto none; 1546148deab2SMatthew Wilcox next_index = (iter->index | shift_maxindex(iter->shift)) + 1; 1547148deab2SMatthew Wilcox if (next_index < iter->next_index) 1548148deab2SMatthew Wilcox iter->next_index = next_index; 1549148deab2SMatthew Wilcox } 1550148deab2SMatthew Wilcox 1551148deab2SMatthew Wilcox return slot; 1552148deab2SMatthew Wilcox none: 1553148deab2SMatthew Wilcox iter->next_index = 0; 1554148deab2SMatthew Wilcox return NULL; 1555148deab2SMatthew Wilcox } 1556148deab2SMatthew Wilcox EXPORT_SYMBOL(__radix_tree_next_slot); 1557148deab2SMatthew Wilcox #else 1558d7b62727SMatthew Wilcox static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1559d7b62727SMatthew Wilcox void __rcu **slot, struct radix_tree_iter *iter) 1560148deab2SMatthew Wilcox { 1561148deab2SMatthew Wilcox return slot; 1562148deab2SMatthew Wilcox } 1563148deab2SMatthew Wilcox #endif 1564148deab2SMatthew Wilcox 1565d7b62727SMatthew Wilcox void __rcu **radix_tree_iter_resume(void __rcu **slot, 1566d7b62727SMatthew Wilcox struct radix_tree_iter *iter) 1567148deab2SMatthew Wilcox { 1568148deab2SMatthew Wilcox struct radix_tree_node *node; 1569148deab2SMatthew Wilcox 1570148deab2SMatthew Wilcox slot++; 1571148deab2SMatthew Wilcox iter->index = __radix_tree_iter_add(iter, 1); 1572148deab2SMatthew Wilcox skip_siblings(&node, slot, iter); 1573148deab2SMatthew Wilcox iter->next_index = iter->index; 1574148deab2SMatthew Wilcox iter->tags = 0; 1575148deab2SMatthew Wilcox return NULL; 1576148deab2SMatthew Wilcox } 1577148deab2SMatthew Wilcox EXPORT_SYMBOL(radix_tree_iter_resume); 1578148deab2SMatthew Wilcox 15796df8ba4fSFengguang Wu /** 158078c1d784SKonstantin Khlebnikov * radix_tree_next_chunk - find next chunk of slots for iteration 158178c1d784SKonstantin Khlebnikov * 158278c1d784SKonstantin Khlebnikov * @root: radix tree root 158378c1d784SKonstantin Khlebnikov * @iter: iterator state 158478c1d784SKonstantin Khlebnikov * @flags: RADIX_TREE_ITER_* flags and tag index 158578c1d784SKonstantin Khlebnikov * Returns: pointer to chunk first slot, or NULL if iteration is over 158678c1d784SKonstantin Khlebnikov */ 1587d7b62727SMatthew Wilcox void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 158878c1d784SKonstantin Khlebnikov struct radix_tree_iter *iter, unsigned flags) 158978c1d784SKonstantin Khlebnikov { 15909e85d811SMatthew Wilcox unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 15918c1244deSMatthew Wilcox struct radix_tree_node *node, *child; 159221ef5339SRoss Zwisler unsigned long index, offset, maxindex; 159378c1d784SKonstantin Khlebnikov 159478c1d784SKonstantin Khlebnikov if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 159578c1d784SKonstantin Khlebnikov return NULL; 159678c1d784SKonstantin Khlebnikov 159778c1d784SKonstantin Khlebnikov /* 159878c1d784SKonstantin Khlebnikov * Catch next_index overflow after ~0UL. iter->index never overflows 159978c1d784SKonstantin Khlebnikov * during iterating; it can be zero only at the beginning. 160078c1d784SKonstantin Khlebnikov * And we cannot overflow iter->next_index in a single step, 160178c1d784SKonstantin Khlebnikov * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1602fffaee36SKonstantin Khlebnikov * 1603fffaee36SKonstantin Khlebnikov * This condition also used by radix_tree_next_slot() to stop 160491b9677cSMatthew Wilcox * contiguous iterating, and forbid switching to the next chunk. 160578c1d784SKonstantin Khlebnikov */ 160678c1d784SKonstantin Khlebnikov index = iter->next_index; 160778c1d784SKonstantin Khlebnikov if (!index && iter->index) 160878c1d784SKonstantin Khlebnikov return NULL; 160978c1d784SKonstantin Khlebnikov 161021ef5339SRoss Zwisler restart: 16119e85d811SMatthew Wilcox radix_tree_load_root(root, &child, &maxindex); 161221ef5339SRoss Zwisler if (index > maxindex) 161321ef5339SRoss Zwisler return NULL; 16148c1244deSMatthew Wilcox if (!child) 16158c1244deSMatthew Wilcox return NULL; 161621ef5339SRoss Zwisler 16178c1244deSMatthew Wilcox if (!radix_tree_is_internal_node(child)) { 161878c1d784SKonstantin Khlebnikov /* Single-slot tree */ 161921ef5339SRoss Zwisler iter->index = index; 162021ef5339SRoss Zwisler iter->next_index = maxindex + 1; 162178c1d784SKonstantin Khlebnikov iter->tags = 1; 1622268f42deSMatthew Wilcox iter->node = NULL; 16238c1244deSMatthew Wilcox __set_iter_shift(iter, 0); 1624f8d5d0ccSMatthew Wilcox return (void __rcu **)&root->xa_head; 162521ef5339SRoss Zwisler } 162621ef5339SRoss Zwisler 16278c1244deSMatthew Wilcox do { 16288c1244deSMatthew Wilcox node = entry_to_node(child); 16299e85d811SMatthew Wilcox offset = radix_tree_descend(node, &child, index); 16308c1244deSMatthew Wilcox 163178c1d784SKonstantin Khlebnikov if ((flags & RADIX_TREE_ITER_TAGGED) ? 16328c1244deSMatthew Wilcox !tag_get(node, tag, offset) : !child) { 163378c1d784SKonstantin Khlebnikov /* Hole detected */ 163478c1d784SKonstantin Khlebnikov if (flags & RADIX_TREE_ITER_CONTIG) 163578c1d784SKonstantin Khlebnikov return NULL; 163678c1d784SKonstantin Khlebnikov 163778c1d784SKonstantin Khlebnikov if (flags & RADIX_TREE_ITER_TAGGED) 1638bc412fcaSMatthew Wilcox offset = radix_tree_find_next_bit(node, tag, 163978c1d784SKonstantin Khlebnikov offset + 1); 164078c1d784SKonstantin Khlebnikov else 164178c1d784SKonstantin Khlebnikov while (++offset < RADIX_TREE_MAP_SIZE) { 164212320d0fSMatthew Wilcox void *slot = rcu_dereference_raw( 164312320d0fSMatthew Wilcox node->slots[offset]); 164402c02bf1SMatthew Wilcox if (xa_is_sibling(slot)) 164521ef5339SRoss Zwisler continue; 164621ef5339SRoss Zwisler if (slot) 164778c1d784SKonstantin Khlebnikov break; 164878c1d784SKonstantin Khlebnikov } 16498c1244deSMatthew Wilcox index &= ~node_maxindex(node); 16509e85d811SMatthew Wilcox index += offset << node->shift; 165178c1d784SKonstantin Khlebnikov /* Overflow after ~0UL */ 165278c1d784SKonstantin Khlebnikov if (!index) 165378c1d784SKonstantin Khlebnikov return NULL; 165478c1d784SKonstantin Khlebnikov if (offset == RADIX_TREE_MAP_SIZE) 165578c1d784SKonstantin Khlebnikov goto restart; 16568c1244deSMatthew Wilcox child = rcu_dereference_raw(node->slots[offset]); 165778c1d784SKonstantin Khlebnikov } 165878c1d784SKonstantin Khlebnikov 1659e157b555SMatthew Wilcox if (!child) 166078c1d784SKonstantin Khlebnikov goto restart; 1661e157b555SMatthew Wilcox if (child == RADIX_TREE_RETRY) 1662e157b555SMatthew Wilcox break; 166366ee620fSMatthew Wilcox } while (node->shift && radix_tree_is_internal_node(child)); 166478c1d784SKonstantin Khlebnikov 166578c1d784SKonstantin Khlebnikov /* Update the iterator state */ 16668c1244deSMatthew Wilcox iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 16678c1244deSMatthew Wilcox iter->next_index = (index | node_maxindex(node)) + 1; 1668268f42deSMatthew Wilcox iter->node = node; 16699e85d811SMatthew Wilcox __set_iter_shift(iter, node->shift); 167078c1d784SKonstantin Khlebnikov 1671148deab2SMatthew Wilcox if (flags & RADIX_TREE_ITER_TAGGED) 1672148deab2SMatthew Wilcox set_iter_tags(iter, node, offset, tag); 167378c1d784SKonstantin Khlebnikov 167478c1d784SKonstantin Khlebnikov return node->slots + offset; 167578c1d784SKonstantin Khlebnikov } 167678c1d784SKonstantin Khlebnikov EXPORT_SYMBOL(radix_tree_next_chunk); 167778c1d784SKonstantin Khlebnikov 167878c1d784SKonstantin Khlebnikov /** 16791da177e4SLinus Torvalds * radix_tree_gang_lookup - perform multiple lookup on a radix tree 16801da177e4SLinus Torvalds * @root: radix tree root 16811da177e4SLinus Torvalds * @results: where the results of the lookup are placed 16821da177e4SLinus Torvalds * @first_index: start the lookup from this key 16831da177e4SLinus Torvalds * @max_items: place up to this many items at *results 16841da177e4SLinus Torvalds * 16851da177e4SLinus Torvalds * Performs an index-ascending scan of the tree for present items. Places 16861da177e4SLinus Torvalds * them at *@results and returns the number of items which were placed at 16871da177e4SLinus Torvalds * *@results. 16881da177e4SLinus Torvalds * 16891da177e4SLinus Torvalds * The implementation is naive. 16907cf9c2c7SNick Piggin * 16917cf9c2c7SNick Piggin * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 16927cf9c2c7SNick Piggin * rcu_read_lock. In this case, rather than the returned results being 16932fcd9005SMatthew Wilcox * an atomic snapshot of the tree at a single point in time, the 16942fcd9005SMatthew Wilcox * semantics of an RCU protected gang lookup are as though multiple 16952fcd9005SMatthew Wilcox * radix_tree_lookups have been issued in individual locks, and results 16962fcd9005SMatthew Wilcox * stored in 'results'. 16971da177e4SLinus Torvalds */ 16981da177e4SLinus Torvalds unsigned int 169935534c86SMatthew Wilcox radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 17001da177e4SLinus Torvalds unsigned long first_index, unsigned int max_items) 17011da177e4SLinus Torvalds { 1702cebbd29eSKonstantin Khlebnikov struct radix_tree_iter iter; 1703d7b62727SMatthew Wilcox void __rcu **slot; 1704cebbd29eSKonstantin Khlebnikov unsigned int ret = 0; 17051da177e4SLinus Torvalds 1706cebbd29eSKonstantin Khlebnikov if (unlikely(!max_items)) 17077cf9c2c7SNick Piggin return 0; 17087cf9c2c7SNick Piggin 1709cebbd29eSKonstantin Khlebnikov radix_tree_for_each_slot(slot, root, &iter, first_index) { 171046437f9aSMatthew Wilcox results[ret] = rcu_dereference_raw(*slot); 1711cebbd29eSKonstantin Khlebnikov if (!results[ret]) 171247feff2cSNick Piggin continue; 1713b194d16cSMatthew Wilcox if (radix_tree_is_internal_node(results[ret])) { 171446437f9aSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 171546437f9aSMatthew Wilcox continue; 171646437f9aSMatthew Wilcox } 1717cebbd29eSKonstantin Khlebnikov if (++ret == max_items) 17181da177e4SLinus Torvalds break; 17191da177e4SLinus Torvalds } 17207cf9c2c7SNick Piggin 17211da177e4SLinus Torvalds return ret; 17221da177e4SLinus Torvalds } 17231da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup); 17241da177e4SLinus Torvalds 172547feff2cSNick Piggin /** 17261da177e4SLinus Torvalds * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 17271da177e4SLinus Torvalds * based on a tag 17281da177e4SLinus Torvalds * @root: radix tree root 17291da177e4SLinus Torvalds * @results: where the results of the lookup are placed 17301da177e4SLinus Torvalds * @first_index: start the lookup from this key 17311da177e4SLinus Torvalds * @max_items: place up to this many items at *results 1732daff89f3SJonathan Corbet * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 17331da177e4SLinus Torvalds * 17341da177e4SLinus Torvalds * Performs an index-ascending scan of the tree for present items which 17351da177e4SLinus Torvalds * have the tag indexed by @tag set. Places the items at *@results and 17361da177e4SLinus Torvalds * returns the number of items which were placed at *@results. 17371da177e4SLinus Torvalds */ 17381da177e4SLinus Torvalds unsigned int 173935534c86SMatthew Wilcox radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1740daff89f3SJonathan Corbet unsigned long first_index, unsigned int max_items, 1741daff89f3SJonathan Corbet unsigned int tag) 17421da177e4SLinus Torvalds { 1743cebbd29eSKonstantin Khlebnikov struct radix_tree_iter iter; 1744d7b62727SMatthew Wilcox void __rcu **slot; 1745cebbd29eSKonstantin Khlebnikov unsigned int ret = 0; 17461da177e4SLinus Torvalds 1747cebbd29eSKonstantin Khlebnikov if (unlikely(!max_items)) 1748612d6c19SNick Piggin return 0; 1749612d6c19SNick Piggin 1750cebbd29eSKonstantin Khlebnikov radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 175146437f9aSMatthew Wilcox results[ret] = rcu_dereference_raw(*slot); 1752cebbd29eSKonstantin Khlebnikov if (!results[ret]) 175347feff2cSNick Piggin continue; 1754b194d16cSMatthew Wilcox if (radix_tree_is_internal_node(results[ret])) { 175546437f9aSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 175646437f9aSMatthew Wilcox continue; 175746437f9aSMatthew Wilcox } 1758cebbd29eSKonstantin Khlebnikov if (++ret == max_items) 17591da177e4SLinus Torvalds break; 17601da177e4SLinus Torvalds } 17617cf9c2c7SNick Piggin 17621da177e4SLinus Torvalds return ret; 17631da177e4SLinus Torvalds } 17641da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 17651da177e4SLinus Torvalds 17661da177e4SLinus Torvalds /** 176747feff2cSNick Piggin * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 176847feff2cSNick Piggin * radix tree based on a tag 176947feff2cSNick Piggin * @root: radix tree root 177047feff2cSNick Piggin * @results: where the results of the lookup are placed 177147feff2cSNick Piggin * @first_index: start the lookup from this key 177247feff2cSNick Piggin * @max_items: place up to this many items at *results 177347feff2cSNick Piggin * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 177447feff2cSNick Piggin * 177547feff2cSNick Piggin * Performs an index-ascending scan of the tree for present items which 177647feff2cSNick Piggin * have the tag indexed by @tag set. Places the slots at *@results and 177747feff2cSNick Piggin * returns the number of slots which were placed at *@results. 177847feff2cSNick Piggin */ 177947feff2cSNick Piggin unsigned int 178035534c86SMatthew Wilcox radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1781d7b62727SMatthew Wilcox void __rcu ***results, unsigned long first_index, 178235534c86SMatthew Wilcox unsigned int max_items, unsigned int tag) 178347feff2cSNick Piggin { 1784cebbd29eSKonstantin Khlebnikov struct radix_tree_iter iter; 1785d7b62727SMatthew Wilcox void __rcu **slot; 1786cebbd29eSKonstantin Khlebnikov unsigned int ret = 0; 178747feff2cSNick Piggin 1788cebbd29eSKonstantin Khlebnikov if (unlikely(!max_items)) 178947feff2cSNick Piggin return 0; 179047feff2cSNick Piggin 1791cebbd29eSKonstantin Khlebnikov radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1792cebbd29eSKonstantin Khlebnikov results[ret] = slot; 1793cebbd29eSKonstantin Khlebnikov if (++ret == max_items) 179447feff2cSNick Piggin break; 179547feff2cSNick Piggin } 179647feff2cSNick Piggin 179747feff2cSNick Piggin return ret; 179847feff2cSNick Piggin } 179947feff2cSNick Piggin EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 180047feff2cSNick Piggin 18010ac398efSMatthew Wilcox static bool __radix_tree_delete(struct radix_tree_root *root, 1802d7b62727SMatthew Wilcox struct radix_tree_node *node, void __rcu **slot) 18030ac398efSMatthew Wilcox { 18040a835c4fSMatthew Wilcox void *old = rcu_dereference_raw(*slot); 180501959dfeSMatthew Wilcox int values = xa_is_value(old) ? -1 : 0; 18060ac398efSMatthew Wilcox unsigned offset = get_slot_offset(node, slot); 18070ac398efSMatthew Wilcox int tag; 18080ac398efSMatthew Wilcox 18090a835c4fSMatthew Wilcox if (is_idr(root)) 18100a835c4fSMatthew Wilcox node_tag_set(root, node, IDR_FREE, offset); 18110a835c4fSMatthew Wilcox else 18120ac398efSMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 18130ac398efSMatthew Wilcox node_tag_clear(root, node, tag, offset); 18140ac398efSMatthew Wilcox 181501959dfeSMatthew Wilcox replace_slot(slot, NULL, node, -1, values); 1816*1cf56f9dSMatthew Wilcox return node && delete_node(root, node); 18170ac398efSMatthew Wilcox } 18180ac398efSMatthew Wilcox 18190ac398efSMatthew Wilcox /** 18200ac398efSMatthew Wilcox * radix_tree_iter_delete - delete the entry at this iterator position 18210ac398efSMatthew Wilcox * @root: radix tree root 18220ac398efSMatthew Wilcox * @iter: iterator state 18230ac398efSMatthew Wilcox * @slot: pointer to slot 18240ac398efSMatthew Wilcox * 18250ac398efSMatthew Wilcox * Delete the entry at the position currently pointed to by the iterator. 18260ac398efSMatthew Wilcox * This may result in the current node being freed; if it is, the iterator 18270ac398efSMatthew Wilcox * is advanced so that it will not reference the freed memory. This 18280ac398efSMatthew Wilcox * function may be called without any locking if there are no other threads 18290ac398efSMatthew Wilcox * which can access this tree. 18300ac398efSMatthew Wilcox */ 18310ac398efSMatthew Wilcox void radix_tree_iter_delete(struct radix_tree_root *root, 1832d7b62727SMatthew Wilcox struct radix_tree_iter *iter, void __rcu **slot) 18330ac398efSMatthew Wilcox { 18340ac398efSMatthew Wilcox if (__radix_tree_delete(root, iter->node, slot)) 18350ac398efSMatthew Wilcox iter->index = iter->next_index; 18360ac398efSMatthew Wilcox } 1837d1b48c1eSChris Wilson EXPORT_SYMBOL(radix_tree_iter_delete); 18380ac398efSMatthew Wilcox 1839139e5616SJohannes Weiner /** 184053c59f26SJohannes Weiner * radix_tree_delete_item - delete an item from a radix tree 18411da177e4SLinus Torvalds * @root: radix tree root 18421da177e4SLinus Torvalds * @index: index key 184353c59f26SJohannes Weiner * @item: expected item 18441da177e4SLinus Torvalds * 184553c59f26SJohannes Weiner * Remove @item at @index from the radix tree rooted at @root. 18461da177e4SLinus Torvalds * 18470ac398efSMatthew Wilcox * Return: the deleted entry, or %NULL if it was not present 184853c59f26SJohannes Weiner * or the entry at the given @index was not @item. 18491da177e4SLinus Torvalds */ 185053c59f26SJohannes Weiner void *radix_tree_delete_item(struct radix_tree_root *root, 185153c59f26SJohannes Weiner unsigned long index, void *item) 18521da177e4SLinus Torvalds { 18530a835c4fSMatthew Wilcox struct radix_tree_node *node = NULL; 18547a4deea1SMatthew Wilcox void __rcu **slot = NULL; 1855139e5616SJohannes Weiner void *entry; 18561da177e4SLinus Torvalds 1857139e5616SJohannes Weiner entry = __radix_tree_lookup(root, index, &node, &slot); 18587a4deea1SMatthew Wilcox if (!slot) 18597a4deea1SMatthew Wilcox return NULL; 18600a835c4fSMatthew Wilcox if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 18610a835c4fSMatthew Wilcox get_slot_offset(node, slot)))) 1862139e5616SJohannes Weiner return NULL; 18631da177e4SLinus Torvalds 1864139e5616SJohannes Weiner if (item && entry != item) 1865139e5616SJohannes Weiner return NULL; 1866139e5616SJohannes Weiner 18670ac398efSMatthew Wilcox __radix_tree_delete(root, node, slot); 1868201b6264SChristoph Lameter 1869139e5616SJohannes Weiner return entry; 18701da177e4SLinus Torvalds } 187153c59f26SJohannes Weiner EXPORT_SYMBOL(radix_tree_delete_item); 187253c59f26SJohannes Weiner 187353c59f26SJohannes Weiner /** 18740ac398efSMatthew Wilcox * radix_tree_delete - delete an entry from a radix tree 187553c59f26SJohannes Weiner * @root: radix tree root 187653c59f26SJohannes Weiner * @index: index key 187753c59f26SJohannes Weiner * 18780ac398efSMatthew Wilcox * Remove the entry at @index from the radix tree rooted at @root. 187953c59f26SJohannes Weiner * 18800ac398efSMatthew Wilcox * Return: The deleted entry, or %NULL if it was not present. 188153c59f26SJohannes Weiner */ 188253c59f26SJohannes Weiner void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 188353c59f26SJohannes Weiner { 188453c59f26SJohannes Weiner return radix_tree_delete_item(root, index, NULL); 188553c59f26SJohannes Weiner } 18861da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_delete); 18871da177e4SLinus Torvalds 1888d3798ae8SJohannes Weiner void radix_tree_clear_tags(struct radix_tree_root *root, 1889d3798ae8SJohannes Weiner struct radix_tree_node *node, 1890d7b62727SMatthew Wilcox void __rcu **slot) 1891d604c324SMatthew Wilcox { 1892d604c324SMatthew Wilcox if (node) { 1893d604c324SMatthew Wilcox unsigned int tag, offset = get_slot_offset(node, slot); 1894d604c324SMatthew Wilcox for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1895d604c324SMatthew Wilcox node_tag_clear(root, node, tag, offset); 1896d604c324SMatthew Wilcox } else { 18970a835c4fSMatthew Wilcox root_tag_clear_all(root); 1898d604c324SMatthew Wilcox } 1899d604c324SMatthew Wilcox } 1900d604c324SMatthew Wilcox 19011da177e4SLinus Torvalds /** 19021da177e4SLinus Torvalds * radix_tree_tagged - test whether any items in the tree are tagged 19031da177e4SLinus Torvalds * @root: radix tree root 19041da177e4SLinus Torvalds * @tag: tag to test 19051da177e4SLinus Torvalds */ 190635534c86SMatthew Wilcox int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 19071da177e4SLinus Torvalds { 1908612d6c19SNick Piggin return root_tag_get(root, tag); 19091da177e4SLinus Torvalds } 19101da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tagged); 19111da177e4SLinus Torvalds 19120a835c4fSMatthew Wilcox /** 19130a835c4fSMatthew Wilcox * idr_preload - preload for idr_alloc() 19140a835c4fSMatthew Wilcox * @gfp_mask: allocation mask to use for preloading 19150a835c4fSMatthew Wilcox * 19160a835c4fSMatthew Wilcox * Preallocate memory to use for the next call to idr_alloc(). This function 19170a835c4fSMatthew Wilcox * returns with preemption disabled. It will be enabled by idr_preload_end(). 19180a835c4fSMatthew Wilcox */ 19190a835c4fSMatthew Wilcox void idr_preload(gfp_t gfp_mask) 19200a835c4fSMatthew Wilcox { 1921bc9ae224SEric Dumazet if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1922bc9ae224SEric Dumazet preempt_disable(); 19230a835c4fSMatthew Wilcox } 19240a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_preload); 19250a835c4fSMatthew Wilcox 1926460488c5SMatthew Wilcox void __rcu **idr_get_free(struct radix_tree_root *root, 1927388f79fdSChris Mi struct radix_tree_iter *iter, gfp_t gfp, 1928388f79fdSChris Mi unsigned long max) 19290a835c4fSMatthew Wilcox { 19300a835c4fSMatthew Wilcox struct radix_tree_node *node = NULL, *child; 1931f8d5d0ccSMatthew Wilcox void __rcu **slot = (void __rcu **)&root->xa_head; 19320a835c4fSMatthew Wilcox unsigned long maxindex, start = iter->next_index; 19330a835c4fSMatthew Wilcox unsigned int shift, offset = 0; 19340a835c4fSMatthew Wilcox 19350a835c4fSMatthew Wilcox grow: 19360a835c4fSMatthew Wilcox shift = radix_tree_load_root(root, &child, &maxindex); 19370a835c4fSMatthew Wilcox if (!radix_tree_tagged(root, IDR_FREE)) 19380a835c4fSMatthew Wilcox start = max(start, maxindex + 1); 19390a835c4fSMatthew Wilcox if (start > max) 19400a835c4fSMatthew Wilcox return ERR_PTR(-ENOSPC); 19410a835c4fSMatthew Wilcox 19420a835c4fSMatthew Wilcox if (start > maxindex) { 19430a835c4fSMatthew Wilcox int error = radix_tree_extend(root, gfp, start, shift); 19440a835c4fSMatthew Wilcox if (error < 0) 19450a835c4fSMatthew Wilcox return ERR_PTR(error); 19460a835c4fSMatthew Wilcox shift = error; 1947f8d5d0ccSMatthew Wilcox child = rcu_dereference_raw(root->xa_head); 19480a835c4fSMatthew Wilcox } 194966ee620fSMatthew Wilcox if (start == 0 && shift == 0) 195066ee620fSMatthew Wilcox shift = RADIX_TREE_MAP_SHIFT; 19510a835c4fSMatthew Wilcox 19520a835c4fSMatthew Wilcox while (shift) { 19530a835c4fSMatthew Wilcox shift -= RADIX_TREE_MAP_SHIFT; 19540a835c4fSMatthew Wilcox if (child == NULL) { 19550a835c4fSMatthew Wilcox /* Have to add a child node. */ 1956d58275bcSMatthew Wilcox child = radix_tree_node_alloc(gfp, node, root, shift, 1957d58275bcSMatthew Wilcox offset, 0, 0); 19580a835c4fSMatthew Wilcox if (!child) 19590a835c4fSMatthew Wilcox return ERR_PTR(-ENOMEM); 19600a835c4fSMatthew Wilcox all_tag_set(child, IDR_FREE); 19610a835c4fSMatthew Wilcox rcu_assign_pointer(*slot, node_to_entry(child)); 19620a835c4fSMatthew Wilcox if (node) 19630a835c4fSMatthew Wilcox node->count++; 19640a835c4fSMatthew Wilcox } else if (!radix_tree_is_internal_node(child)) 19650a835c4fSMatthew Wilcox break; 19660a835c4fSMatthew Wilcox 19670a835c4fSMatthew Wilcox node = entry_to_node(child); 19680a835c4fSMatthew Wilcox offset = radix_tree_descend(node, &child, start); 19690a835c4fSMatthew Wilcox if (!tag_get(node, IDR_FREE, offset)) { 19700a835c4fSMatthew Wilcox offset = radix_tree_find_next_bit(node, IDR_FREE, 19710a835c4fSMatthew Wilcox offset + 1); 19720a835c4fSMatthew Wilcox start = next_index(start, node, offset); 19730a835c4fSMatthew Wilcox if (start > max) 19740a835c4fSMatthew Wilcox return ERR_PTR(-ENOSPC); 19750a835c4fSMatthew Wilcox while (offset == RADIX_TREE_MAP_SIZE) { 19760a835c4fSMatthew Wilcox offset = node->offset + 1; 19770a835c4fSMatthew Wilcox node = node->parent; 19780a835c4fSMatthew Wilcox if (!node) 19790a835c4fSMatthew Wilcox goto grow; 19800a835c4fSMatthew Wilcox shift = node->shift; 19810a835c4fSMatthew Wilcox } 19820a835c4fSMatthew Wilcox child = rcu_dereference_raw(node->slots[offset]); 19830a835c4fSMatthew Wilcox } 19840a835c4fSMatthew Wilcox slot = &node->slots[offset]; 19850a835c4fSMatthew Wilcox } 19860a835c4fSMatthew Wilcox 19870a835c4fSMatthew Wilcox iter->index = start; 19880a835c4fSMatthew Wilcox if (node) 19890a835c4fSMatthew Wilcox iter->next_index = 1 + min(max, (start | node_maxindex(node))); 19900a835c4fSMatthew Wilcox else 19910a835c4fSMatthew Wilcox iter->next_index = 1; 19920a835c4fSMatthew Wilcox iter->node = node; 19930a835c4fSMatthew Wilcox __set_iter_shift(iter, shift); 19940a835c4fSMatthew Wilcox set_iter_tags(iter, node, offset, IDR_FREE); 19950a835c4fSMatthew Wilcox 19960a835c4fSMatthew Wilcox return slot; 19970a835c4fSMatthew Wilcox } 19980a835c4fSMatthew Wilcox 19990a835c4fSMatthew Wilcox /** 20000a835c4fSMatthew Wilcox * idr_destroy - release all internal memory from an IDR 20010a835c4fSMatthew Wilcox * @idr: idr handle 20020a835c4fSMatthew Wilcox * 20030a835c4fSMatthew Wilcox * After this function is called, the IDR is empty, and may be reused or 20040a835c4fSMatthew Wilcox * the data structure containing it may be freed. 20050a835c4fSMatthew Wilcox * 20060a835c4fSMatthew Wilcox * A typical clean-up sequence for objects stored in an idr tree will use 20070a835c4fSMatthew Wilcox * idr_for_each() to free all objects, if necessary, then idr_destroy() to 20080a835c4fSMatthew Wilcox * free the memory used to keep track of those objects. 20090a835c4fSMatthew Wilcox */ 20100a835c4fSMatthew Wilcox void idr_destroy(struct idr *idr) 20110a835c4fSMatthew Wilcox { 2012f8d5d0ccSMatthew Wilcox struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 20130a835c4fSMatthew Wilcox if (radix_tree_is_internal_node(node)) 20140a835c4fSMatthew Wilcox radix_tree_free_nodes(node); 2015f8d5d0ccSMatthew Wilcox idr->idr_rt.xa_head = NULL; 20160a835c4fSMatthew Wilcox root_tag_set(&idr->idr_rt, IDR_FREE); 20170a835c4fSMatthew Wilcox } 20180a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_destroy); 20190a835c4fSMatthew Wilcox 20201da177e4SLinus Torvalds static void 2021449dd698SJohannes Weiner radix_tree_node_ctor(void *arg) 20221da177e4SLinus Torvalds { 2023449dd698SJohannes Weiner struct radix_tree_node *node = arg; 2024449dd698SJohannes Weiner 2025449dd698SJohannes Weiner memset(node, 0, sizeof(*node)); 2026449dd698SJohannes Weiner INIT_LIST_HEAD(&node->private_list); 20271da177e4SLinus Torvalds } 20281da177e4SLinus Torvalds 2029c78c66d1SKirill A. Shutemov static __init unsigned long __maxindex(unsigned int height) 2030c78c66d1SKirill A. Shutemov { 2031c78c66d1SKirill A. Shutemov unsigned int width = height * RADIX_TREE_MAP_SHIFT; 2032c78c66d1SKirill A. Shutemov int shift = RADIX_TREE_INDEX_BITS - width; 2033c78c66d1SKirill A. Shutemov 2034c78c66d1SKirill A. Shutemov if (shift < 0) 2035c78c66d1SKirill A. Shutemov return ~0UL; 2036c78c66d1SKirill A. Shutemov if (shift >= BITS_PER_LONG) 2037c78c66d1SKirill A. Shutemov return 0UL; 2038c78c66d1SKirill A. Shutemov return ~0UL >> shift; 2039c78c66d1SKirill A. Shutemov } 2040c78c66d1SKirill A. Shutemov 2041c78c66d1SKirill A. Shutemov static __init void radix_tree_init_maxnodes(void) 2042c78c66d1SKirill A. Shutemov { 2043c78c66d1SKirill A. Shutemov unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; 2044c78c66d1SKirill A. Shutemov unsigned int i, j; 2045c78c66d1SKirill A. Shutemov 2046c78c66d1SKirill A. Shutemov for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 2047c78c66d1SKirill A. Shutemov height_to_maxindex[i] = __maxindex(i); 2048c78c66d1SKirill A. Shutemov for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { 2049c78c66d1SKirill A. Shutemov for (j = i; j > 0; j--) 2050c78c66d1SKirill A. Shutemov height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; 2051c78c66d1SKirill A. Shutemov } 2052c78c66d1SKirill A. Shutemov } 2053c78c66d1SKirill A. Shutemov 2054d544abd5SSebastian Andrzej Siewior static int radix_tree_cpu_dead(unsigned int cpu) 20551da177e4SLinus Torvalds { 20561da177e4SLinus Torvalds struct radix_tree_preload *rtp; 20579d2a8da0SKirill A. Shutemov struct radix_tree_node *node; 20581da177e4SLinus Torvalds 20592fcd9005SMatthew Wilcox /* Free per-cpu pool of preloaded nodes */ 20601da177e4SLinus Torvalds rtp = &per_cpu(radix_tree_preloads, cpu); 20611da177e4SLinus Torvalds while (rtp->nr) { 20629d2a8da0SKirill A. Shutemov node = rtp->nodes; 20631293d5c5SMatthew Wilcox rtp->nodes = node->parent; 20649d2a8da0SKirill A. Shutemov kmem_cache_free(radix_tree_node_cachep, node); 20651da177e4SLinus Torvalds rtp->nr--; 20661da177e4SLinus Torvalds } 2067d544abd5SSebastian Andrzej Siewior return 0; 20681da177e4SLinus Torvalds } 20691da177e4SLinus Torvalds 20701da177e4SLinus Torvalds void __init radix_tree_init(void) 20711da177e4SLinus Torvalds { 2072d544abd5SSebastian Andrzej Siewior int ret; 20737e784422SMichal Hocko 20747e784422SMichal Hocko BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 2075fa290cdaSMatthew Wilcox BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 207602c02bf1SMatthew Wilcox BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 20771da177e4SLinus Torvalds radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 20781da177e4SLinus Torvalds sizeof(struct radix_tree_node), 0, 2079488514d1SChristoph Lameter SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 2080488514d1SChristoph Lameter radix_tree_node_ctor); 2081c78c66d1SKirill A. Shutemov radix_tree_init_maxnodes(); 2082d544abd5SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 2083d544abd5SSebastian Andrzej Siewior NULL, radix_tree_cpu_dead); 2084d544abd5SSebastian Andrzej Siewior WARN_ON(ret < 0); 20851da177e4SLinus Torvalds } 2086