xref: /openbmc/linux/lib/radix-tree.c (revision d59070d1)
1de6cc651SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * Copyright (C) 2001 Momchil Velikov
41da177e4SLinus Torvalds  * Portions Copyright (C) 2001 Christoph Hellwig
5cde53535SChristoph Lameter  * Copyright (C) 2005 SGI, Christoph Lameter
67cf9c2c7SNick Piggin  * Copyright (C) 2006 Nick Piggin
778c1d784SKonstantin Khlebnikov  * Copyright (C) 2012 Konstantin Khlebnikov
86b053b8eSMatthew Wilcox  * Copyright (C) 2016 Intel, Matthew Wilcox
96b053b8eSMatthew Wilcox  * Copyright (C) 2016 Intel, Ross Zwisler
101da177e4SLinus Torvalds  */
111da177e4SLinus Torvalds 
120a835c4fSMatthew Wilcox #include <linux/bitmap.h>
130a835c4fSMatthew Wilcox #include <linux/bitops.h>
14460488c5SMatthew Wilcox #include <linux/bug.h>
15e157b555SMatthew Wilcox #include <linux/cpu.h>
161da177e4SLinus Torvalds #include <linux/errno.h>
170a835c4fSMatthew Wilcox #include <linux/export.h>
180a835c4fSMatthew Wilcox #include <linux/idr.h>
191da177e4SLinus Torvalds #include <linux/init.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
21ce80b067SCatalin Marinas #include <linux/kmemleak.h>
220a835c4fSMatthew Wilcox #include <linux/percpu.h>
2392cf2118SFrederic Weisbecker #include <linux/preempt.h>		/* in_interrupt() */
240a835c4fSMatthew Wilcox #include <linux/radix-tree.h>
250a835c4fSMatthew Wilcox #include <linux/rcupdate.h>
260a835c4fSMatthew Wilcox #include <linux/slab.h>
270a835c4fSMatthew Wilcox #include <linux/string.h>
2802c02bf1SMatthew Wilcox #include <linux/xarray.h>
291da177e4SLinus Torvalds 
30*bde1597dSArnd Bergmann #include "radix-tree.h"
31*bde1597dSArnd Bergmann 
3226fb1589SJeff Moyer /*
331da177e4SLinus Torvalds  * Radix tree node cache.
341da177e4SLinus Torvalds  */
3558d6ea30SMatthew Wilcox struct kmem_cache *radix_tree_node_cachep;
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds /*
3855368052SNick Piggin  * The radix tree is variable-height, so an insert operation not only has
3955368052SNick Piggin  * to build the branch to its corresponding item, it also has to build the
4055368052SNick Piggin  * branch to existing items if the size has to be increased (by
4155368052SNick Piggin  * radix_tree_extend).
4255368052SNick Piggin  *
4355368052SNick Piggin  * The worst case is a zero height tree with just a single item at index 0,
4455368052SNick Piggin  * and then inserting an item at index ULONG_MAX. This requires 2 new branches
4555368052SNick Piggin  * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
4655368052SNick Piggin  * Hence:
4755368052SNick Piggin  */
4855368052SNick Piggin #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
4955368052SNick Piggin 
5055368052SNick Piggin /*
510a835c4fSMatthew Wilcox  * The IDR does not have to be as high as the radix tree since it uses
520a835c4fSMatthew Wilcox  * signed integers, not unsigned longs.
530a835c4fSMatthew Wilcox  */
540a835c4fSMatthew Wilcox #define IDR_INDEX_BITS		(8 /* CHAR_BIT */ * sizeof(int) - 1)
550a835c4fSMatthew Wilcox #define IDR_MAX_PATH		(DIV_ROUND_UP(IDR_INDEX_BITS, \
560a835c4fSMatthew Wilcox 						RADIX_TREE_MAP_SHIFT))
570a835c4fSMatthew Wilcox #define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
580a835c4fSMatthew Wilcox 
590a835c4fSMatthew Wilcox /*
601da177e4SLinus Torvalds  * Per-cpu pool of preloaded nodes
611da177e4SLinus Torvalds  */
62cfa6705dSSebastian Andrzej Siewior DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
63cfa6705dSSebastian Andrzej Siewior 	.lock = INIT_LOCAL_LOCK(lock),
641da177e4SLinus Torvalds };
65cfa6705dSSebastian Andrzej Siewior EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
661da177e4SLinus Torvalds 
entry_to_node(void * ptr)67148deab2SMatthew Wilcox static inline struct radix_tree_node *entry_to_node(void *ptr)
68148deab2SMatthew Wilcox {
69148deab2SMatthew Wilcox 	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
70148deab2SMatthew Wilcox }
71148deab2SMatthew Wilcox 
node_to_entry(void * ptr)72a4db4dceSMatthew Wilcox static inline void *node_to_entry(void *ptr)
7327d20fddSNick Piggin {
7430ff46ccSMatthew Wilcox 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
7527d20fddSNick Piggin }
7627d20fddSNick Piggin 
7702c02bf1SMatthew Wilcox #define RADIX_TREE_RETRY	XA_RETRY_ENTRY
78db050f29SMatthew Wilcox 
79d7b62727SMatthew Wilcox static inline unsigned long
get_slot_offset(const struct radix_tree_node * parent,void __rcu ** slot)80d7b62727SMatthew Wilcox get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
81db050f29SMatthew Wilcox {
8276f070b4SMatthew Wilcox 	return parent ? slot - parent->slots : 0;
83db050f29SMatthew Wilcox }
84db050f29SMatthew Wilcox 
radix_tree_descend(const struct radix_tree_node * parent,struct radix_tree_node ** nodep,unsigned long index)8535534c86SMatthew Wilcox static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
869e85d811SMatthew Wilcox 			struct radix_tree_node **nodep, unsigned long index)
87db050f29SMatthew Wilcox {
889e85d811SMatthew Wilcox 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
89d7b62727SMatthew Wilcox 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
90db050f29SMatthew Wilcox 
91db050f29SMatthew Wilcox 	*nodep = (void *)entry;
92db050f29SMatthew Wilcox 	return offset;
93db050f29SMatthew Wilcox }
94db050f29SMatthew Wilcox 
root_gfp_mask(const struct radix_tree_root * root)9535534c86SMatthew Wilcox static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
96612d6c19SNick Piggin {
97f8d5d0ccSMatthew Wilcox 	return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
98612d6c19SNick Piggin }
99612d6c19SNick Piggin 
tag_set(struct radix_tree_node * node,unsigned int tag,int offset)100643b52b9SNick Piggin static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
101643b52b9SNick Piggin 		int offset)
102643b52b9SNick Piggin {
103643b52b9SNick Piggin 	__set_bit(offset, node->tags[tag]);
104643b52b9SNick Piggin }
105643b52b9SNick Piggin 
tag_clear(struct radix_tree_node * node,unsigned int tag,int offset)106643b52b9SNick Piggin static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
107643b52b9SNick Piggin 		int offset)
108643b52b9SNick Piggin {
109643b52b9SNick Piggin 	__clear_bit(offset, node->tags[tag]);
110643b52b9SNick Piggin }
111643b52b9SNick Piggin 
tag_get(const struct radix_tree_node * node,unsigned int tag,int offset)11235534c86SMatthew Wilcox static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
113643b52b9SNick Piggin 		int offset)
114643b52b9SNick Piggin {
115643b52b9SNick Piggin 	return test_bit(offset, node->tags[tag]);
116643b52b9SNick Piggin }
117643b52b9SNick Piggin 
root_tag_set(struct radix_tree_root * root,unsigned tag)11835534c86SMatthew Wilcox static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
119643b52b9SNick Piggin {
120f8d5d0ccSMatthew Wilcox 	root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
121643b52b9SNick Piggin }
122643b52b9SNick Piggin 
root_tag_clear(struct radix_tree_root * root,unsigned tag)1232fcd9005SMatthew Wilcox static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
124643b52b9SNick Piggin {
125f8d5d0ccSMatthew Wilcox 	root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
126643b52b9SNick Piggin }
127643b52b9SNick Piggin 
root_tag_clear_all(struct radix_tree_root * root)128643b52b9SNick Piggin static inline void root_tag_clear_all(struct radix_tree_root *root)
129643b52b9SNick Piggin {
130f8d5d0ccSMatthew Wilcox 	root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
131643b52b9SNick Piggin }
132643b52b9SNick Piggin 
root_tag_get(const struct radix_tree_root * root,unsigned tag)13335534c86SMatthew Wilcox static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
134643b52b9SNick Piggin {
135f8d5d0ccSMatthew Wilcox 	return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
136643b52b9SNick Piggin }
137643b52b9SNick Piggin 
root_tags_get(const struct radix_tree_root * root)13835534c86SMatthew Wilcox static inline unsigned root_tags_get(const struct radix_tree_root *root)
1397b60e9adSMatthew Wilcox {
140f8d5d0ccSMatthew Wilcox 	return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
1410a835c4fSMatthew Wilcox }
1420a835c4fSMatthew Wilcox 
is_idr(const struct radix_tree_root * root)1430a835c4fSMatthew Wilcox static inline bool is_idr(const struct radix_tree_root *root)
1440a835c4fSMatthew Wilcox {
145f8d5d0ccSMatthew Wilcox 	return !!(root->xa_flags & ROOT_IS_IDR);
1467b60e9adSMatthew Wilcox }
1477b60e9adSMatthew Wilcox 
148643b52b9SNick Piggin /*
149643b52b9SNick Piggin  * Returns 1 if any slot in the node has this tag set.
150643b52b9SNick Piggin  * Otherwise returns 0.
151643b52b9SNick Piggin  */
any_tag_set(const struct radix_tree_node * node,unsigned int tag)15235534c86SMatthew Wilcox static inline int any_tag_set(const struct radix_tree_node *node,
15335534c86SMatthew Wilcox 							unsigned int tag)
154643b52b9SNick Piggin {
1552fcd9005SMatthew Wilcox 	unsigned idx;
156643b52b9SNick Piggin 	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
157643b52b9SNick Piggin 		if (node->tags[tag][idx])
158643b52b9SNick Piggin 			return 1;
159643b52b9SNick Piggin 	}
160643b52b9SNick Piggin 	return 0;
161643b52b9SNick Piggin }
16278c1d784SKonstantin Khlebnikov 
all_tag_set(struct radix_tree_node * node,unsigned int tag)1630a835c4fSMatthew Wilcox static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
1640a835c4fSMatthew Wilcox {
1650a835c4fSMatthew Wilcox 	bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
1660a835c4fSMatthew Wilcox }
1670a835c4fSMatthew Wilcox 
16878c1d784SKonstantin Khlebnikov /**
16978c1d784SKonstantin Khlebnikov  * radix_tree_find_next_bit - find the next set bit in a memory region
17078c1d784SKonstantin Khlebnikov  *
171c95c2d32SRandy Dunlap  * @node: where to begin the search
172c95c2d32SRandy Dunlap  * @tag: the tag index
173c95c2d32SRandy Dunlap  * @offset: the bitnumber to start searching at
17478c1d784SKonstantin Khlebnikov  *
17578c1d784SKonstantin Khlebnikov  * Unrollable variant of find_next_bit() for constant size arrays.
17678c1d784SKonstantin Khlebnikov  * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
17778c1d784SKonstantin Khlebnikov  * Returns next bit offset, or size if nothing found.
17878c1d784SKonstantin Khlebnikov  */
17978c1d784SKonstantin Khlebnikov static __always_inline unsigned long
radix_tree_find_next_bit(struct radix_tree_node * node,unsigned int tag,unsigned long offset)180bc412fcaSMatthew Wilcox radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
181bc412fcaSMatthew Wilcox 			 unsigned long offset)
18278c1d784SKonstantin Khlebnikov {
183bc412fcaSMatthew Wilcox 	const unsigned long *addr = node->tags[tag];
18478c1d784SKonstantin Khlebnikov 
185bc412fcaSMatthew Wilcox 	if (offset < RADIX_TREE_MAP_SIZE) {
18678c1d784SKonstantin Khlebnikov 		unsigned long tmp;
18778c1d784SKonstantin Khlebnikov 
18878c1d784SKonstantin Khlebnikov 		addr += offset / BITS_PER_LONG;
18978c1d784SKonstantin Khlebnikov 		tmp = *addr >> (offset % BITS_PER_LONG);
19078c1d784SKonstantin Khlebnikov 		if (tmp)
19178c1d784SKonstantin Khlebnikov 			return __ffs(tmp) + offset;
19278c1d784SKonstantin Khlebnikov 		offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
193bc412fcaSMatthew Wilcox 		while (offset < RADIX_TREE_MAP_SIZE) {
19478c1d784SKonstantin Khlebnikov 			tmp = *++addr;
19578c1d784SKonstantin Khlebnikov 			if (tmp)
19678c1d784SKonstantin Khlebnikov 				return __ffs(tmp) + offset;
19778c1d784SKonstantin Khlebnikov 			offset += BITS_PER_LONG;
19878c1d784SKonstantin Khlebnikov 		}
19978c1d784SKonstantin Khlebnikov 	}
200bc412fcaSMatthew Wilcox 	return RADIX_TREE_MAP_SIZE;
20178c1d784SKonstantin Khlebnikov }
20278c1d784SKonstantin Khlebnikov 
iter_offset(const struct radix_tree_iter * iter)203268f42deSMatthew Wilcox static unsigned int iter_offset(const struct radix_tree_iter *iter)
204268f42deSMatthew Wilcox {
2053a08cd52SMatthew Wilcox 	return iter->index & RADIX_TREE_MAP_MASK;
206268f42deSMatthew Wilcox }
207268f42deSMatthew Wilcox 
208218ed750SMatthew Wilcox /*
209218ed750SMatthew Wilcox  * The maximum index which can be stored in a radix tree
210218ed750SMatthew Wilcox  */
shift_maxindex(unsigned int shift)211218ed750SMatthew Wilcox static inline unsigned long shift_maxindex(unsigned int shift)
212218ed750SMatthew Wilcox {
213218ed750SMatthew Wilcox 	return (RADIX_TREE_MAP_SIZE << shift) - 1;
214218ed750SMatthew Wilcox }
215218ed750SMatthew Wilcox 
node_maxindex(const struct radix_tree_node * node)21635534c86SMatthew Wilcox static inline unsigned long node_maxindex(const struct radix_tree_node *node)
217218ed750SMatthew Wilcox {
218218ed750SMatthew Wilcox 	return shift_maxindex(node->shift);
219218ed750SMatthew Wilcox }
220218ed750SMatthew Wilcox 
next_index(unsigned long index,const struct radix_tree_node * node,unsigned long offset)2210a835c4fSMatthew Wilcox static unsigned long next_index(unsigned long index,
2220a835c4fSMatthew Wilcox 				const struct radix_tree_node *node,
2230a835c4fSMatthew Wilcox 				unsigned long offset)
2240a835c4fSMatthew Wilcox {
2250a835c4fSMatthew Wilcox 	return (index & ~node_maxindex(node)) + (offset << node->shift);
2260a835c4fSMatthew Wilcox }
2270a835c4fSMatthew Wilcox 
2281da177e4SLinus Torvalds /*
2291da177e4SLinus Torvalds  * This assumes that the caller has performed appropriate preallocation, and
2301da177e4SLinus Torvalds  * that the caller has pinned this thread of control to the current CPU.
2311da177e4SLinus Torvalds  */
2321da177e4SLinus Torvalds static struct radix_tree_node *
radix_tree_node_alloc(gfp_t gfp_mask,struct radix_tree_node * parent,struct radix_tree_root * root,unsigned int shift,unsigned int offset,unsigned int count,unsigned int nr_values)2330a835c4fSMatthew Wilcox radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
234d58275bcSMatthew Wilcox 			struct radix_tree_root *root,
235e8de4340SMatthew Wilcox 			unsigned int shift, unsigned int offset,
23601959dfeSMatthew Wilcox 			unsigned int count, unsigned int nr_values)
2371da177e4SLinus Torvalds {
238e2848a0eSNick Piggin 	struct radix_tree_node *ret = NULL;
2391da177e4SLinus Torvalds 
2405e4c0d97SJan Kara 	/*
2412fcd9005SMatthew Wilcox 	 * Preload code isn't irq safe and it doesn't make sense to use
2422fcd9005SMatthew Wilcox 	 * preloading during an interrupt anyway as all the allocations have
2432fcd9005SMatthew Wilcox 	 * to be atomic. So just do normal allocation when in interrupt.
2445e4c0d97SJan Kara 	 */
245d0164adcSMel Gorman 	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
2461da177e4SLinus Torvalds 		struct radix_tree_preload *rtp;
2471da177e4SLinus Torvalds 
248e2848a0eSNick Piggin 		/*
24958e698afSVladimir Davydov 		 * Even if the caller has preloaded, try to allocate from the
25005eb6e72SVladimir Davydov 		 * cache first for the new node to get accounted to the memory
25105eb6e72SVladimir Davydov 		 * cgroup.
25258e698afSVladimir Davydov 		 */
25358e698afSVladimir Davydov 		ret = kmem_cache_alloc(radix_tree_node_cachep,
25405eb6e72SVladimir Davydov 				       gfp_mask | __GFP_NOWARN);
25558e698afSVladimir Davydov 		if (ret)
25658e698afSVladimir Davydov 			goto out;
25758e698afSVladimir Davydov 
25858e698afSVladimir Davydov 		/*
259e2848a0eSNick Piggin 		 * Provided the caller has preloaded here, we will always
260e2848a0eSNick Piggin 		 * succeed in getting a node here (and never reach
261e2848a0eSNick Piggin 		 * kmem_cache_alloc)
262e2848a0eSNick Piggin 		 */
2637c8e0181SChristoph Lameter 		rtp = this_cpu_ptr(&radix_tree_preloads);
2641da177e4SLinus Torvalds 		if (rtp->nr) {
2659d2a8da0SKirill A. Shutemov 			ret = rtp->nodes;
2661293d5c5SMatthew Wilcox 			rtp->nodes = ret->parent;
2671da177e4SLinus Torvalds 			rtp->nr--;
2681da177e4SLinus Torvalds 		}
269ce80b067SCatalin Marinas 		/*
270ce80b067SCatalin Marinas 		 * Update the allocation stack trace as this is more useful
271ce80b067SCatalin Marinas 		 * for debugging.
272ce80b067SCatalin Marinas 		 */
273ce80b067SCatalin Marinas 		kmemleak_update_trace(ret);
27458e698afSVladimir Davydov 		goto out;
2751da177e4SLinus Torvalds 	}
27605eb6e72SVladimir Davydov 	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
27758e698afSVladimir Davydov out:
278b194d16cSMatthew Wilcox 	BUG_ON(radix_tree_is_internal_node(ret));
279e8de4340SMatthew Wilcox 	if (ret) {
280e8de4340SMatthew Wilcox 		ret->shift = shift;
281e8de4340SMatthew Wilcox 		ret->offset = offset;
282e8de4340SMatthew Wilcox 		ret->count = count;
28301959dfeSMatthew Wilcox 		ret->nr_values = nr_values;
284d58275bcSMatthew Wilcox 		ret->parent = parent;
28501959dfeSMatthew Wilcox 		ret->array = root;
286e8de4340SMatthew Wilcox 	}
2871da177e4SLinus Torvalds 	return ret;
2881da177e4SLinus Torvalds }
2891da177e4SLinus Torvalds 
radix_tree_node_rcu_free(struct rcu_head * head)29058d6ea30SMatthew Wilcox void radix_tree_node_rcu_free(struct rcu_head *head)
2917cf9c2c7SNick Piggin {
2927cf9c2c7SNick Piggin 	struct radix_tree_node *node =
2937cf9c2c7SNick Piggin 			container_of(head, struct radix_tree_node, rcu_head);
294643b52b9SNick Piggin 
295643b52b9SNick Piggin 	/*
296175542f5SMatthew Wilcox 	 * Must only free zeroed nodes into the slab.  We can be left with
297175542f5SMatthew Wilcox 	 * non-NULL entries by radix_tree_free_nodes, so clear the entries
298175542f5SMatthew Wilcox 	 * and tags here.
299643b52b9SNick Piggin 	 */
300175542f5SMatthew Wilcox 	memset(node->slots, 0, sizeof(node->slots));
301175542f5SMatthew Wilcox 	memset(node->tags, 0, sizeof(node->tags));
30291d9c05aSMatthew Wilcox 	INIT_LIST_HEAD(&node->private_list);
303643b52b9SNick Piggin 
3047cf9c2c7SNick Piggin 	kmem_cache_free(radix_tree_node_cachep, node);
3057cf9c2c7SNick Piggin }
3067cf9c2c7SNick Piggin 
3071da177e4SLinus Torvalds static inline void
radix_tree_node_free(struct radix_tree_node * node)3081da177e4SLinus Torvalds radix_tree_node_free(struct radix_tree_node *node)
3091da177e4SLinus Torvalds {
3107cf9c2c7SNick Piggin 	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
3111da177e4SLinus Torvalds }
3121da177e4SLinus Torvalds 
3131da177e4SLinus Torvalds /*
3141da177e4SLinus Torvalds  * Load up this CPU's radix_tree_node buffer with sufficient objects to
3151da177e4SLinus Torvalds  * ensure that the addition of a single element in the tree cannot fail.  On
3161da177e4SLinus Torvalds  * success, return zero, with preemption disabled.  On error, return -ENOMEM
3171da177e4SLinus Torvalds  * with preemption not disabled.
318b34df792SDavid Howells  *
319b34df792SDavid Howells  * To make use of this facility, the radix tree must be initialised without
320d0164adcSMel Gorman  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
3211da177e4SLinus Torvalds  */
__radix_tree_preload(gfp_t gfp_mask,unsigned nr)322bc9ae224SEric Dumazet static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
3231da177e4SLinus Torvalds {
3241da177e4SLinus Torvalds 	struct radix_tree_preload *rtp;
3251da177e4SLinus Torvalds 	struct radix_tree_node *node;
3261da177e4SLinus Torvalds 	int ret = -ENOMEM;
3271da177e4SLinus Torvalds 
32805eb6e72SVladimir Davydov 	/*
329e0656501SRandy Dunlap 	 * Nodes preloaded by one cgroup can be used by another cgroup, so
33005eb6e72SVladimir Davydov 	 * they should never be accounted to any particular memory cgroup.
33105eb6e72SVladimir Davydov 	 */
33205eb6e72SVladimir Davydov 	gfp_mask &= ~__GFP_ACCOUNT;
33305eb6e72SVladimir Davydov 
334cfa6705dSSebastian Andrzej Siewior 	local_lock(&radix_tree_preloads.lock);
3357c8e0181SChristoph Lameter 	rtp = this_cpu_ptr(&radix_tree_preloads);
336c78c66d1SKirill A. Shutemov 	while (rtp->nr < nr) {
337cfa6705dSSebastian Andrzej Siewior 		local_unlock(&radix_tree_preloads.lock);
338488514d1SChristoph Lameter 		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
3391da177e4SLinus Torvalds 		if (node == NULL)
3401da177e4SLinus Torvalds 			goto out;
341cfa6705dSSebastian Andrzej Siewior 		local_lock(&radix_tree_preloads.lock);
3427c8e0181SChristoph Lameter 		rtp = this_cpu_ptr(&radix_tree_preloads);
343c78c66d1SKirill A. Shutemov 		if (rtp->nr < nr) {
3441293d5c5SMatthew Wilcox 			node->parent = rtp->nodes;
3459d2a8da0SKirill A. Shutemov 			rtp->nodes = node;
3469d2a8da0SKirill A. Shutemov 			rtp->nr++;
3479d2a8da0SKirill A. Shutemov 		} else {
3481da177e4SLinus Torvalds 			kmem_cache_free(radix_tree_node_cachep, node);
3491da177e4SLinus Torvalds 		}
3509d2a8da0SKirill A. Shutemov 	}
3511da177e4SLinus Torvalds 	ret = 0;
3521da177e4SLinus Torvalds out:
3531da177e4SLinus Torvalds 	return ret;
3541da177e4SLinus Torvalds }
3555e4c0d97SJan Kara 
3565e4c0d97SJan Kara /*
3575e4c0d97SJan Kara  * Load up this CPU's radix_tree_node buffer with sufficient objects to
3585e4c0d97SJan Kara  * ensure that the addition of a single element in the tree cannot fail.  On
3595e4c0d97SJan Kara  * success, return zero, with preemption disabled.  On error, return -ENOMEM
3605e4c0d97SJan Kara  * with preemption not disabled.
3615e4c0d97SJan Kara  *
3625e4c0d97SJan Kara  * To make use of this facility, the radix tree must be initialised without
363d0164adcSMel Gorman  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
3645e4c0d97SJan Kara  */
radix_tree_preload(gfp_t gfp_mask)3655e4c0d97SJan Kara int radix_tree_preload(gfp_t gfp_mask)
3665e4c0d97SJan Kara {
3675e4c0d97SJan Kara 	/* Warn on non-sensical use... */
368d0164adcSMel Gorman 	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
369c78c66d1SKirill A. Shutemov 	return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
3705e4c0d97SJan Kara }
371d7f0923dSDavid Chinner EXPORT_SYMBOL(radix_tree_preload);
3721da177e4SLinus Torvalds 
3736e954b9eSNick Piggin /*
3745e4c0d97SJan Kara  * The same as above function, except we don't guarantee preloading happens.
3755e4c0d97SJan Kara  * We do it, if we decide it helps. On success, return zero with preemption
3765e4c0d97SJan Kara  * disabled. On error, return -ENOMEM with preemption not disabled.
3775e4c0d97SJan Kara  */
radix_tree_maybe_preload(gfp_t gfp_mask)3785e4c0d97SJan Kara int radix_tree_maybe_preload(gfp_t gfp_mask)
3795e4c0d97SJan Kara {
380d0164adcSMel Gorman 	if (gfpflags_allow_blocking(gfp_mask))
381c78c66d1SKirill A. Shutemov 		return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
3825e4c0d97SJan Kara 	/* Preloading doesn't help anything with this gfp mask, skip it */
383cfa6705dSSebastian Andrzej Siewior 	local_lock(&radix_tree_preloads.lock);
3845e4c0d97SJan Kara 	return 0;
3855e4c0d97SJan Kara }
3865e4c0d97SJan Kara EXPORT_SYMBOL(radix_tree_maybe_preload);
3875e4c0d97SJan Kara 
radix_tree_load_root(const struct radix_tree_root * root,struct radix_tree_node ** nodep,unsigned long * maxindex)38835534c86SMatthew Wilcox static unsigned radix_tree_load_root(const struct radix_tree_root *root,
3891456a439SMatthew Wilcox 		struct radix_tree_node **nodep, unsigned long *maxindex)
3901456a439SMatthew Wilcox {
391f8d5d0ccSMatthew Wilcox 	struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
3921456a439SMatthew Wilcox 
3931456a439SMatthew Wilcox 	*nodep = node;
3941456a439SMatthew Wilcox 
395b194d16cSMatthew Wilcox 	if (likely(radix_tree_is_internal_node(node))) {
3964dd6c098SMatthew Wilcox 		node = entry_to_node(node);
3971456a439SMatthew Wilcox 		*maxindex = node_maxindex(node);
398c12e51b0SMatthew Wilcox 		return node->shift + RADIX_TREE_MAP_SHIFT;
3991456a439SMatthew Wilcox 	}
4001456a439SMatthew Wilcox 
4011456a439SMatthew Wilcox 	*maxindex = 0;
4021456a439SMatthew Wilcox 	return 0;
4031456a439SMatthew Wilcox }
4041456a439SMatthew Wilcox 
4051da177e4SLinus Torvalds /*
4061da177e4SLinus Torvalds  *	Extend a radix tree so it can store key @index.
4071da177e4SLinus Torvalds  */
radix_tree_extend(struct radix_tree_root * root,gfp_t gfp,unsigned long index,unsigned int shift)4080a835c4fSMatthew Wilcox static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
409d0891265SMatthew Wilcox 				unsigned long index, unsigned int shift)
4101da177e4SLinus Torvalds {
411d7b62727SMatthew Wilcox 	void *entry;
412d0891265SMatthew Wilcox 	unsigned int maxshift;
4131da177e4SLinus Torvalds 	int tag;
4141da177e4SLinus Torvalds 
415d0891265SMatthew Wilcox 	/* Figure out what the shift should be.  */
416d0891265SMatthew Wilcox 	maxshift = shift;
417d0891265SMatthew Wilcox 	while (index > shift_maxindex(maxshift))
418d0891265SMatthew Wilcox 		maxshift += RADIX_TREE_MAP_SHIFT;
4191da177e4SLinus Torvalds 
420f8d5d0ccSMatthew Wilcox 	entry = rcu_dereference_raw(root->xa_head);
421d7b62727SMatthew Wilcox 	if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
4221da177e4SLinus Torvalds 		goto out;
4231da177e4SLinus Torvalds 
4241da177e4SLinus Torvalds 	do {
4250a835c4fSMatthew Wilcox 		struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
426d58275bcSMatthew Wilcox 							root, shift, 0, 1, 0);
4272fcd9005SMatthew Wilcox 		if (!node)
4281da177e4SLinus Torvalds 			return -ENOMEM;
4291da177e4SLinus Torvalds 
4300a835c4fSMatthew Wilcox 		if (is_idr(root)) {
4310a835c4fSMatthew Wilcox 			all_tag_set(node, IDR_FREE);
4320a835c4fSMatthew Wilcox 			if (!root_tag_get(root, IDR_FREE)) {
4330a835c4fSMatthew Wilcox 				tag_clear(node, IDR_FREE, 0);
4340a835c4fSMatthew Wilcox 				root_tag_set(root, IDR_FREE);
4350a835c4fSMatthew Wilcox 			}
4360a835c4fSMatthew Wilcox 		} else {
4370a835c4fSMatthew Wilcox 			/* Propagate the aggregated tag info to the new child */
438daff89f3SJonathan Corbet 			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
439612d6c19SNick Piggin 				if (root_tag_get(root, tag))
4401da177e4SLinus Torvalds 					tag_set(node, tag, 0);
4411da177e4SLinus Torvalds 			}
4420a835c4fSMatthew Wilcox 		}
4431da177e4SLinus Torvalds 
444d0891265SMatthew Wilcox 		BUG_ON(shift > BITS_PER_LONG);
445d7b62727SMatthew Wilcox 		if (radix_tree_is_internal_node(entry)) {
446d7b62727SMatthew Wilcox 			entry_to_node(entry)->parent = node;
4473159f943SMatthew Wilcox 		} else if (xa_is_value(entry)) {
44801959dfeSMatthew Wilcox 			/* Moving a value entry root->xa_head to a node */
44901959dfeSMatthew Wilcox 			node->nr_values = 1;
450f7942430SJohannes Weiner 		}
451d7b62727SMatthew Wilcox 		/*
452d7b62727SMatthew Wilcox 		 * entry was already in the radix tree, so we do not need
453d7b62727SMatthew Wilcox 		 * rcu_assign_pointer here
454d7b62727SMatthew Wilcox 		 */
455d7b62727SMatthew Wilcox 		node->slots[0] = (void __rcu *)entry;
456d7b62727SMatthew Wilcox 		entry = node_to_entry(node);
457f8d5d0ccSMatthew Wilcox 		rcu_assign_pointer(root->xa_head, entry);
458d0891265SMatthew Wilcox 		shift += RADIX_TREE_MAP_SHIFT;
459d0891265SMatthew Wilcox 	} while (shift <= maxshift);
4601da177e4SLinus Torvalds out:
461d0891265SMatthew Wilcox 	return maxshift + RADIX_TREE_MAP_SHIFT;
4621da177e4SLinus Torvalds }
4631da177e4SLinus Torvalds 
4641da177e4SLinus Torvalds /**
465f4b109c6SJohannes Weiner  *	radix_tree_shrink    -    shrink radix tree to minimum height
466c95c2d32SRandy Dunlap  *	@root:		radix tree root
467f4b109c6SJohannes Weiner  */
radix_tree_shrink(struct radix_tree_root * root)4681cf56f9dSMatthew Wilcox static inline bool radix_tree_shrink(struct radix_tree_root *root)
469f4b109c6SJohannes Weiner {
4700ac398efSMatthew Wilcox 	bool shrunk = false;
4710ac398efSMatthew Wilcox 
472f4b109c6SJohannes Weiner 	for (;;) {
473f8d5d0ccSMatthew Wilcox 		struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
474f4b109c6SJohannes Weiner 		struct radix_tree_node *child;
475f4b109c6SJohannes Weiner 
476f4b109c6SJohannes Weiner 		if (!radix_tree_is_internal_node(node))
477f4b109c6SJohannes Weiner 			break;
478f4b109c6SJohannes Weiner 		node = entry_to_node(node);
479f4b109c6SJohannes Weiner 
480f4b109c6SJohannes Weiner 		/*
481f4b109c6SJohannes Weiner 		 * The candidate node has more than one child, or its child
4823a08cd52SMatthew Wilcox 		 * is not at the leftmost slot, we cannot shrink.
483f4b109c6SJohannes Weiner 		 */
484f4b109c6SJohannes Weiner 		if (node->count != 1)
485f4b109c6SJohannes Weiner 			break;
48612320d0fSMatthew Wilcox 		child = rcu_dereference_raw(node->slots[0]);
487f4b109c6SJohannes Weiner 		if (!child)
488f4b109c6SJohannes Weiner 			break;
489f4b109c6SJohannes Weiner 
49066ee620fSMatthew Wilcox 		/*
49166ee620fSMatthew Wilcox 		 * For an IDR, we must not shrink entry 0 into the root in
49266ee620fSMatthew Wilcox 		 * case somebody calls idr_replace() with a pointer that
49366ee620fSMatthew Wilcox 		 * appears to be an internal entry
49466ee620fSMatthew Wilcox 		 */
49566ee620fSMatthew Wilcox 		if (!node->shift && is_idr(root))
49666ee620fSMatthew Wilcox 			break;
49766ee620fSMatthew Wilcox 
498f4b109c6SJohannes Weiner 		if (radix_tree_is_internal_node(child))
499f4b109c6SJohannes Weiner 			entry_to_node(child)->parent = NULL;
500f4b109c6SJohannes Weiner 
501f4b109c6SJohannes Weiner 		/*
502f4b109c6SJohannes Weiner 		 * We don't need rcu_assign_pointer(), since we are simply
503f4b109c6SJohannes Weiner 		 * moving the node from one part of the tree to another: if it
504f4b109c6SJohannes Weiner 		 * was safe to dereference the old pointer to it
505f4b109c6SJohannes Weiner 		 * (node->slots[0]), it will be safe to dereference the new
506f8d5d0ccSMatthew Wilcox 		 * one (root->xa_head) as far as dependent read barriers go.
507f4b109c6SJohannes Weiner 		 */
508f8d5d0ccSMatthew Wilcox 		root->xa_head = (void __rcu *)child;
5090a835c4fSMatthew Wilcox 		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
5100a835c4fSMatthew Wilcox 			root_tag_clear(root, IDR_FREE);
511f4b109c6SJohannes Weiner 
512f4b109c6SJohannes Weiner 		/*
513f4b109c6SJohannes Weiner 		 * We have a dilemma here. The node's slot[0] must not be
514f4b109c6SJohannes Weiner 		 * NULLed in case there are concurrent lookups expecting to
515f4b109c6SJohannes Weiner 		 * find the item. However if this was a bottom-level node,
516f4b109c6SJohannes Weiner 		 * then it may be subject to the slot pointer being visible
517f4b109c6SJohannes Weiner 		 * to callers dereferencing it. If item corresponding to
518f4b109c6SJohannes Weiner 		 * slot[0] is subsequently deleted, these callers would expect
519f4b109c6SJohannes Weiner 		 * their slot to become empty sooner or later.
520f4b109c6SJohannes Weiner 		 *
521f4b109c6SJohannes Weiner 		 * For example, lockless pagecache will look up a slot, deref
522f4b109c6SJohannes Weiner 		 * the page pointer, and if the page has 0 refcount it means it
523f4b109c6SJohannes Weiner 		 * was concurrently deleted from pagecache so try the deref
524f4b109c6SJohannes Weiner 		 * again. Fortunately there is already a requirement for logic
525f4b109c6SJohannes Weiner 		 * to retry the entire slot lookup -- the indirect pointer
526f4b109c6SJohannes Weiner 		 * problem (replacing direct root node with an indirect pointer
527f4b109c6SJohannes Weiner 		 * also results in a stale slot). So tag the slot as indirect
528f4b109c6SJohannes Weiner 		 * to force callers to retry.
529f4b109c6SJohannes Weiner 		 */
5304d693d08SJohannes Weiner 		node->count = 0;
5314d693d08SJohannes Weiner 		if (!radix_tree_is_internal_node(child)) {
532d7b62727SMatthew Wilcox 			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
5334d693d08SJohannes Weiner 		}
534f4b109c6SJohannes Weiner 
535ea07b862SJohannes Weiner 		WARN_ON_ONCE(!list_empty(&node->private_list));
536f4b109c6SJohannes Weiner 		radix_tree_node_free(node);
5370ac398efSMatthew Wilcox 		shrunk = true;
538f4b109c6SJohannes Weiner 	}
539f4b109c6SJohannes Weiner 
5400ac398efSMatthew Wilcox 	return shrunk;
5410ac398efSMatthew Wilcox }
5420ac398efSMatthew Wilcox 
delete_node(struct radix_tree_root * root,struct radix_tree_node * node)5430ac398efSMatthew Wilcox static bool delete_node(struct radix_tree_root *root,
5441cf56f9dSMatthew Wilcox 			struct radix_tree_node *node)
545f4b109c6SJohannes Weiner {
5460ac398efSMatthew Wilcox 	bool deleted = false;
5470ac398efSMatthew Wilcox 
548f4b109c6SJohannes Weiner 	do {
549f4b109c6SJohannes Weiner 		struct radix_tree_node *parent;
550f4b109c6SJohannes Weiner 
551f4b109c6SJohannes Weiner 		if (node->count) {
55212320d0fSMatthew Wilcox 			if (node_to_entry(node) ==
553f8d5d0ccSMatthew Wilcox 					rcu_dereference_raw(root->xa_head))
5541cf56f9dSMatthew Wilcox 				deleted |= radix_tree_shrink(root);
5550ac398efSMatthew Wilcox 			return deleted;
556f4b109c6SJohannes Weiner 		}
557f4b109c6SJohannes Weiner 
558f4b109c6SJohannes Weiner 		parent = node->parent;
559f4b109c6SJohannes Weiner 		if (parent) {
560f4b109c6SJohannes Weiner 			parent->slots[node->offset] = NULL;
561f4b109c6SJohannes Weiner 			parent->count--;
562f4b109c6SJohannes Weiner 		} else {
5630a835c4fSMatthew Wilcox 			/*
5640a835c4fSMatthew Wilcox 			 * Shouldn't the tags already have all been cleared
5650a835c4fSMatthew Wilcox 			 * by the caller?
5660a835c4fSMatthew Wilcox 			 */
5670a835c4fSMatthew Wilcox 			if (!is_idr(root))
568f4b109c6SJohannes Weiner 				root_tag_clear_all(root);
569f8d5d0ccSMatthew Wilcox 			root->xa_head = NULL;
570f4b109c6SJohannes Weiner 		}
571f4b109c6SJohannes Weiner 
572ea07b862SJohannes Weiner 		WARN_ON_ONCE(!list_empty(&node->private_list));
573f4b109c6SJohannes Weiner 		radix_tree_node_free(node);
5740ac398efSMatthew Wilcox 		deleted = true;
575f4b109c6SJohannes Weiner 
576f4b109c6SJohannes Weiner 		node = parent;
577f4b109c6SJohannes Weiner 	} while (node);
5780ac398efSMatthew Wilcox 
5790ac398efSMatthew Wilcox 	return deleted;
580f4b109c6SJohannes Weiner }
581f4b109c6SJohannes Weiner 
582f4b109c6SJohannes Weiner /**
583139e5616SJohannes Weiner  *	__radix_tree_create	-	create a slot in a radix tree
5841da177e4SLinus Torvalds  *	@root:		radix tree root
5851da177e4SLinus Torvalds  *	@index:		index key
586139e5616SJohannes Weiner  *	@nodep:		returns node
587139e5616SJohannes Weiner  *	@slotp:		returns slot
5881da177e4SLinus Torvalds  *
589139e5616SJohannes Weiner  *	Create, if necessary, and return the node and slot for an item
590139e5616SJohannes Weiner  *	at position @index in the radix tree @root.
591139e5616SJohannes Weiner  *
592139e5616SJohannes Weiner  *	Until there is more than one item in the tree, no nodes are
593f8d5d0ccSMatthew Wilcox  *	allocated and @root->xa_head is used as a direct slot instead of
594139e5616SJohannes Weiner  *	pointing to a node, in which case *@nodep will be NULL.
595139e5616SJohannes Weiner  *
596139e5616SJohannes Weiner  *	Returns -ENOMEM, or 0 for success.
5971da177e4SLinus Torvalds  */
__radix_tree_create(struct radix_tree_root * root,unsigned long index,struct radix_tree_node ** nodep,void __rcu *** slotp)59874d60958SMatthew Wilcox static int __radix_tree_create(struct radix_tree_root *root,
5993a08cd52SMatthew Wilcox 		unsigned long index, struct radix_tree_node **nodep,
6003a08cd52SMatthew Wilcox 		void __rcu ***slotp)
6011da177e4SLinus Torvalds {
60289148aa4SMatthew Wilcox 	struct radix_tree_node *node = NULL, *child;
603f8d5d0ccSMatthew Wilcox 	void __rcu **slot = (void __rcu **)&root->xa_head;
60449ea6ebcSMatthew Wilcox 	unsigned long maxindex;
60589148aa4SMatthew Wilcox 	unsigned int shift, offset = 0;
6063a08cd52SMatthew Wilcox 	unsigned long max = index;
6070a835c4fSMatthew Wilcox 	gfp_t gfp = root_gfp_mask(root);
60849ea6ebcSMatthew Wilcox 
60989148aa4SMatthew Wilcox 	shift = radix_tree_load_root(root, &child, &maxindex);
6101da177e4SLinus Torvalds 
6111da177e4SLinus Torvalds 	/* Make sure the tree is high enough.  */
61249ea6ebcSMatthew Wilcox 	if (max > maxindex) {
6130a835c4fSMatthew Wilcox 		int error = radix_tree_extend(root, gfp, max, shift);
61449ea6ebcSMatthew Wilcox 		if (error < 0)
6151da177e4SLinus Torvalds 			return error;
61649ea6ebcSMatthew Wilcox 		shift = error;
617f8d5d0ccSMatthew Wilcox 		child = rcu_dereference_raw(root->xa_head);
6181da177e4SLinus Torvalds 	}
6191da177e4SLinus Torvalds 
6203a08cd52SMatthew Wilcox 	while (shift > 0) {
621c12e51b0SMatthew Wilcox 		shift -= RADIX_TREE_MAP_SHIFT;
62289148aa4SMatthew Wilcox 		if (child == NULL) {
6231da177e4SLinus Torvalds 			/* Have to add a child node.  */
624d58275bcSMatthew Wilcox 			child = radix_tree_node_alloc(gfp, node, root, shift,
625e8de4340SMatthew Wilcox 							offset, 0, 0);
62689148aa4SMatthew Wilcox 			if (!child)
6271da177e4SLinus Torvalds 				return -ENOMEM;
62889148aa4SMatthew Wilcox 			rcu_assign_pointer(*slot, node_to_entry(child));
62989148aa4SMatthew Wilcox 			if (node)
6301da177e4SLinus Torvalds 				node->count++;
63189148aa4SMatthew Wilcox 		} else if (!radix_tree_is_internal_node(child))
632e6145236SMatthew Wilcox 			break;
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds 		/* Go a level down */
63589148aa4SMatthew Wilcox 		node = entry_to_node(child);
6369e85d811SMatthew Wilcox 		offset = radix_tree_descend(node, &child, index);
63789148aa4SMatthew Wilcox 		slot = &node->slots[offset];
638e6145236SMatthew Wilcox 	}
639e6145236SMatthew Wilcox 
640139e5616SJohannes Weiner 	if (nodep)
641139e5616SJohannes Weiner 		*nodep = node;
642139e5616SJohannes Weiner 	if (slotp)
64389148aa4SMatthew Wilcox 		*slotp = slot;
644139e5616SJohannes Weiner 	return 0;
645139e5616SJohannes Weiner }
646139e5616SJohannes Weiner 
647175542f5SMatthew Wilcox /*
648175542f5SMatthew Wilcox  * Free any nodes below this node.  The tree is presumed to not need
649175542f5SMatthew Wilcox  * shrinking, and any user data in the tree is presumed to not need a
650175542f5SMatthew Wilcox  * destructor called on it.  If we need to add a destructor, we can
651175542f5SMatthew Wilcox  * add that functionality later.  Note that we may not clear tags or
652175542f5SMatthew Wilcox  * slots from the tree as an RCU walker may still have a pointer into
653175542f5SMatthew Wilcox  * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
654175542f5SMatthew Wilcox  * but we'll still have to clear those in rcu_free.
655175542f5SMatthew Wilcox  */
radix_tree_free_nodes(struct radix_tree_node * node)656175542f5SMatthew Wilcox static void radix_tree_free_nodes(struct radix_tree_node *node)
657175542f5SMatthew Wilcox {
658175542f5SMatthew Wilcox 	unsigned offset = 0;
659175542f5SMatthew Wilcox 	struct radix_tree_node *child = entry_to_node(node);
660175542f5SMatthew Wilcox 
661175542f5SMatthew Wilcox 	for (;;) {
66212320d0fSMatthew Wilcox 		void *entry = rcu_dereference_raw(child->slots[offset]);
66302c02bf1SMatthew Wilcox 		if (xa_is_node(entry) && child->shift) {
664175542f5SMatthew Wilcox 			child = entry_to_node(entry);
665175542f5SMatthew Wilcox 			offset = 0;
666175542f5SMatthew Wilcox 			continue;
667175542f5SMatthew Wilcox 		}
668175542f5SMatthew Wilcox 		offset++;
669175542f5SMatthew Wilcox 		while (offset == RADIX_TREE_MAP_SIZE) {
670175542f5SMatthew Wilcox 			struct radix_tree_node *old = child;
671175542f5SMatthew Wilcox 			offset = child->offset + 1;
672175542f5SMatthew Wilcox 			child = child->parent;
673dd040b6fSMatthew Wilcox 			WARN_ON_ONCE(!list_empty(&old->private_list));
674175542f5SMatthew Wilcox 			radix_tree_node_free(old);
675175542f5SMatthew Wilcox 			if (old == entry_to_node(node))
676175542f5SMatthew Wilcox 				return;
677175542f5SMatthew Wilcox 		}
678175542f5SMatthew Wilcox 	}
679175542f5SMatthew Wilcox }
680175542f5SMatthew Wilcox 
insert_entries(struct radix_tree_node * node,void __rcu ** slot,void * item)681d7b62727SMatthew Wilcox static inline int insert_entries(struct radix_tree_node *node,
682cda83bb8Swuchi 		void __rcu **slot, void *item)
683175542f5SMatthew Wilcox {
684175542f5SMatthew Wilcox 	if (*slot)
685175542f5SMatthew Wilcox 		return -EEXIST;
686175542f5SMatthew Wilcox 	rcu_assign_pointer(*slot, item);
687175542f5SMatthew Wilcox 	if (node) {
688175542f5SMatthew Wilcox 		node->count++;
6893159f943SMatthew Wilcox 		if (xa_is_value(item))
69001959dfeSMatthew Wilcox 			node->nr_values++;
691175542f5SMatthew Wilcox 	}
692175542f5SMatthew Wilcox 	return 1;
693175542f5SMatthew Wilcox }
694175542f5SMatthew Wilcox 
695139e5616SJohannes Weiner /**
696c95c2d32SRandy Dunlap  *	radix_tree_insert    -    insert into a radix tree
697139e5616SJohannes Weiner  *	@root:		radix tree root
698139e5616SJohannes Weiner  *	@index:		index key
699139e5616SJohannes Weiner  *	@item:		item to insert
700139e5616SJohannes Weiner  *
701139e5616SJohannes Weiner  *	Insert an item into the radix tree at position @index.
702139e5616SJohannes Weiner  */
radix_tree_insert(struct radix_tree_root * root,unsigned long index,void * item)7033a08cd52SMatthew Wilcox int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
7043a08cd52SMatthew Wilcox 			void *item)
705139e5616SJohannes Weiner {
706139e5616SJohannes Weiner 	struct radix_tree_node *node;
707d7b62727SMatthew Wilcox 	void __rcu **slot;
708139e5616SJohannes Weiner 	int error;
709139e5616SJohannes Weiner 
710b194d16cSMatthew Wilcox 	BUG_ON(radix_tree_is_internal_node(item));
711139e5616SJohannes Weiner 
7123a08cd52SMatthew Wilcox 	error = __radix_tree_create(root, index, &node, &slot);
713139e5616SJohannes Weiner 	if (error)
714139e5616SJohannes Weiner 		return error;
715175542f5SMatthew Wilcox 
716cda83bb8Swuchi 	error = insert_entries(node, slot, item);
717175542f5SMatthew Wilcox 	if (error < 0)
718175542f5SMatthew Wilcox 		return error;
719201b6264SChristoph Lameter 
720612d6c19SNick Piggin 	if (node) {
7217b60e9adSMatthew Wilcox 		unsigned offset = get_slot_offset(node, slot);
7227b60e9adSMatthew Wilcox 		BUG_ON(tag_get(node, 0, offset));
7237b60e9adSMatthew Wilcox 		BUG_ON(tag_get(node, 1, offset));
7247b60e9adSMatthew Wilcox 		BUG_ON(tag_get(node, 2, offset));
725612d6c19SNick Piggin 	} else {
7267b60e9adSMatthew Wilcox 		BUG_ON(root_tags_get(root));
727612d6c19SNick Piggin 	}
7281da177e4SLinus Torvalds 
7291da177e4SLinus Torvalds 	return 0;
7301da177e4SLinus Torvalds }
7313a08cd52SMatthew Wilcox EXPORT_SYMBOL(radix_tree_insert);
7321da177e4SLinus Torvalds 
733139e5616SJohannes Weiner /**
734139e5616SJohannes Weiner  *	__radix_tree_lookup	-	lookup an item in a radix tree
735139e5616SJohannes Weiner  *	@root:		radix tree root
736139e5616SJohannes Weiner  *	@index:		index key
737139e5616SJohannes Weiner  *	@nodep:		returns node
738139e5616SJohannes Weiner  *	@slotp:		returns slot
739139e5616SJohannes Weiner  *
740139e5616SJohannes Weiner  *	Lookup and return the item at position @index in the radix
741139e5616SJohannes Weiner  *	tree @root.
742139e5616SJohannes Weiner  *
743139e5616SJohannes Weiner  *	Until there is more than one item in the tree, no nodes are
744f8d5d0ccSMatthew Wilcox  *	allocated and @root->xa_head is used as a direct slot instead of
745139e5616SJohannes Weiner  *	pointing to a node, in which case *@nodep will be NULL.
746a4331366SHans Reiser  */
__radix_tree_lookup(const struct radix_tree_root * root,unsigned long index,struct radix_tree_node ** nodep,void __rcu *** slotp)74735534c86SMatthew Wilcox void *__radix_tree_lookup(const struct radix_tree_root *root,
74835534c86SMatthew Wilcox 			  unsigned long index, struct radix_tree_node **nodep,
749d7b62727SMatthew Wilcox 			  void __rcu ***slotp)
750a4331366SHans Reiser {
751139e5616SJohannes Weiner 	struct radix_tree_node *node, *parent;
75285829954SMatthew Wilcox 	unsigned long maxindex;
753d7b62727SMatthew Wilcox 	void __rcu **slot;
7547cf9c2c7SNick Piggin 
75585829954SMatthew Wilcox  restart:
75685829954SMatthew Wilcox 	parent = NULL;
757f8d5d0ccSMatthew Wilcox 	slot = (void __rcu **)&root->xa_head;
7589e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
75985829954SMatthew Wilcox 	if (index > maxindex)
7607cf9c2c7SNick Piggin 		return NULL;
7617cf9c2c7SNick Piggin 
762b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
76385829954SMatthew Wilcox 		unsigned offset;
764139e5616SJohannes Weiner 
7654dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
7669e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
76785829954SMatthew Wilcox 		slot = parent->slots + offset;
768eff3860bSMatthew Wilcox 		if (node == RADIX_TREE_RETRY)
769eff3860bSMatthew Wilcox 			goto restart;
77066ee620fSMatthew Wilcox 		if (parent->shift == 0)
77166ee620fSMatthew Wilcox 			break;
77285829954SMatthew Wilcox 	}
7737cf9c2c7SNick Piggin 
774139e5616SJohannes Weiner 	if (nodep)
775139e5616SJohannes Weiner 		*nodep = parent;
776139e5616SJohannes Weiner 	if (slotp)
777139e5616SJohannes Weiner 		*slotp = slot;
778139e5616SJohannes Weiner 	return node;
779b72b71c6SHuang Shijie }
780b72b71c6SHuang Shijie 
781b72b71c6SHuang Shijie /**
782b72b71c6SHuang Shijie  *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
783b72b71c6SHuang Shijie  *	@root:		radix tree root
784b72b71c6SHuang Shijie  *	@index:		index key
785b72b71c6SHuang Shijie  *
786b72b71c6SHuang Shijie  *	Returns:  the slot corresponding to the position @index in the
787b72b71c6SHuang Shijie  *	radix tree @root. This is useful for update-if-exists operations.
788b72b71c6SHuang Shijie  *
789b72b71c6SHuang Shijie  *	This function can be called under rcu_read_lock iff the slot is not
790b72b71c6SHuang Shijie  *	modified by radix_tree_replace_slot, otherwise it must be called
791b72b71c6SHuang Shijie  *	exclusive from other writers. Any dereference of the slot must be done
792b72b71c6SHuang Shijie  *	using radix_tree_deref_slot.
793b72b71c6SHuang Shijie  */
radix_tree_lookup_slot(const struct radix_tree_root * root,unsigned long index)794d7b62727SMatthew Wilcox void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
79535534c86SMatthew Wilcox 				unsigned long index)
796b72b71c6SHuang Shijie {
797d7b62727SMatthew Wilcox 	void __rcu **slot;
798139e5616SJohannes Weiner 
799139e5616SJohannes Weiner 	if (!__radix_tree_lookup(root, index, NULL, &slot))
800139e5616SJohannes Weiner 		return NULL;
801139e5616SJohannes Weiner 	return slot;
802a4331366SHans Reiser }
803a4331366SHans Reiser EXPORT_SYMBOL(radix_tree_lookup_slot);
804a4331366SHans Reiser 
8051da177e4SLinus Torvalds /**
8061da177e4SLinus Torvalds  *	radix_tree_lookup    -    perform lookup operation on a radix tree
8071da177e4SLinus Torvalds  *	@root:		radix tree root
8081da177e4SLinus Torvalds  *	@index:		index key
8091da177e4SLinus Torvalds  *
8101da177e4SLinus Torvalds  *	Lookup the item at the position @index in the radix tree @root.
8117cf9c2c7SNick Piggin  *
8127cf9c2c7SNick Piggin  *	This function can be called under rcu_read_lock, however the caller
8137cf9c2c7SNick Piggin  *	must manage lifetimes of leaf nodes (eg. RCU may also be used to free
8147cf9c2c7SNick Piggin  *	them safely). No RCU barriers are required to access or modify the
8157cf9c2c7SNick Piggin  *	returned item, however.
8161da177e4SLinus Torvalds  */
radix_tree_lookup(const struct radix_tree_root * root,unsigned long index)81735534c86SMatthew Wilcox void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
8181da177e4SLinus Torvalds {
819139e5616SJohannes Weiner 	return __radix_tree_lookup(root, index, NULL, NULL);
8201da177e4SLinus Torvalds }
8211da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_lookup);
8221da177e4SLinus Torvalds 
replace_slot(void __rcu ** slot,void * item,struct radix_tree_node * node,int count,int values)823d7b62727SMatthew Wilcox static void replace_slot(void __rcu **slot, void *item,
82401959dfeSMatthew Wilcox 		struct radix_tree_node *node, int count, int values)
8256d75f366SJohannes Weiner {
82601959dfeSMatthew Wilcox 	if (node && (count || values)) {
827f4b109c6SJohannes Weiner 		node->count += count;
82801959dfeSMatthew Wilcox 		node->nr_values += values;
829a90eb3a2SMatthew Wilcox 	}
8306d75f366SJohannes Weiner 
8316d75f366SJohannes Weiner 	rcu_assign_pointer(*slot, item);
8326d75f366SJohannes Weiner }
8336d75f366SJohannes Weiner 
node_tag_get(const struct radix_tree_root * root,const struct radix_tree_node * node,unsigned int tag,unsigned int offset)8340a835c4fSMatthew Wilcox static bool node_tag_get(const struct radix_tree_root *root,
8350a835c4fSMatthew Wilcox 				const struct radix_tree_node *node,
8360a835c4fSMatthew Wilcox 				unsigned int tag, unsigned int offset)
837a90eb3a2SMatthew Wilcox {
8380a835c4fSMatthew Wilcox 	if (node)
8390a835c4fSMatthew Wilcox 		return tag_get(node, tag, offset);
8400a835c4fSMatthew Wilcox 	return root_tag_get(root, tag);
841a90eb3a2SMatthew Wilcox }
8420a835c4fSMatthew Wilcox 
8430a835c4fSMatthew Wilcox /*
8440a835c4fSMatthew Wilcox  * IDR users want to be able to store NULL in the tree, so if the slot isn't
8450a835c4fSMatthew Wilcox  * free, don't adjust the count, even if it's transitioning between NULL and
8460a835c4fSMatthew Wilcox  * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
8470a835c4fSMatthew Wilcox  * have empty bits, but it only stores NULL in slots when they're being
8480a835c4fSMatthew Wilcox  * deleted.
8490a835c4fSMatthew Wilcox  */
calculate_count(struct radix_tree_root * root,struct radix_tree_node * node,void __rcu ** slot,void * item,void * old)8500a835c4fSMatthew Wilcox static int calculate_count(struct radix_tree_root *root,
851d7b62727SMatthew Wilcox 				struct radix_tree_node *node, void __rcu **slot,
8520a835c4fSMatthew Wilcox 				void *item, void *old)
8530a835c4fSMatthew Wilcox {
8540a835c4fSMatthew Wilcox 	if (is_idr(root)) {
8550a835c4fSMatthew Wilcox 		unsigned offset = get_slot_offset(node, slot);
8560a835c4fSMatthew Wilcox 		bool free = node_tag_get(root, node, IDR_FREE, offset);
8570a835c4fSMatthew Wilcox 		if (!free)
8580a835c4fSMatthew Wilcox 			return 0;
8590a835c4fSMatthew Wilcox 		if (!old)
8600a835c4fSMatthew Wilcox 			return 1;
8610a835c4fSMatthew Wilcox 	}
8620a835c4fSMatthew Wilcox 	return !!item - !!old;
863a90eb3a2SMatthew Wilcox }
864a90eb3a2SMatthew Wilcox 
8651da177e4SLinus Torvalds /**
866f7942430SJohannes Weiner  * __radix_tree_replace		- replace item in a slot
867f7942430SJohannes Weiner  * @root:		radix tree root
868f7942430SJohannes Weiner  * @node:		pointer to tree node
869f7942430SJohannes Weiner  * @slot:		pointer to slot in @node
870f7942430SJohannes Weiner  * @item:		new item to store in the slot.
871f7942430SJohannes Weiner  *
872f7942430SJohannes Weiner  * For use with __radix_tree_lookup().  Caller must hold tree write locked
873f7942430SJohannes Weiner  * across slot lookup and replacement.
874f7942430SJohannes Weiner  */
__radix_tree_replace(struct radix_tree_root * root,struct radix_tree_node * node,void __rcu ** slot,void * item)875f7942430SJohannes Weiner void __radix_tree_replace(struct radix_tree_root *root,
876f7942430SJohannes Weiner 			  struct radix_tree_node *node,
8771cf56f9dSMatthew Wilcox 			  void __rcu **slot, void *item)
878f7942430SJohannes Weiner {
8790a835c4fSMatthew Wilcox 	void *old = rcu_dereference_raw(*slot);
88001959dfeSMatthew Wilcox 	int values = !!xa_is_value(item) - !!xa_is_value(old);
8810a835c4fSMatthew Wilcox 	int count = calculate_count(root, node, slot, item, old);
8820a835c4fSMatthew Wilcox 
8836d75f366SJohannes Weiner 	/*
88401959dfeSMatthew Wilcox 	 * This function supports replacing value entries and
885f4b109c6SJohannes Weiner 	 * deleting entries, but that needs accounting against the
886f8d5d0ccSMatthew Wilcox 	 * node unless the slot is root->xa_head.
8876d75f366SJohannes Weiner 	 */
888f8d5d0ccSMatthew Wilcox 	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
88901959dfeSMatthew Wilcox 			(count || values));
89001959dfeSMatthew Wilcox 	replace_slot(slot, item, node, count, values);
891f4b109c6SJohannes Weiner 
8924d693d08SJohannes Weiner 	if (!node)
8934d693d08SJohannes Weiner 		return;
8944d693d08SJohannes Weiner 
8951cf56f9dSMatthew Wilcox 	delete_node(root, node);
8966d75f366SJohannes Weiner }
897f7942430SJohannes Weiner 
8986d75f366SJohannes Weiner /**
8996d75f366SJohannes Weiner  * radix_tree_replace_slot	- replace item in a slot
9006d75f366SJohannes Weiner  * @root:	radix tree root
9016d75f366SJohannes Weiner  * @slot:	pointer to slot
9026d75f366SJohannes Weiner  * @item:	new item to store in the slot.
9036d75f366SJohannes Weiner  *
9047b8d046fSMatthew Wilcox  * For use with radix_tree_lookup_slot() and
9056d75f366SJohannes Weiner  * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
9066d75f366SJohannes Weiner  * across slot lookup and replacement.
9076d75f366SJohannes Weiner  *
9086d75f366SJohannes Weiner  * NOTE: This cannot be used to switch between non-entries (empty slots),
90901959dfeSMatthew Wilcox  * regular entries, and value entries, as that requires accounting
910f4b109c6SJohannes Weiner  * inside the radix tree node. When switching from one type of entry or
911e157b555SMatthew Wilcox  * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
912e157b555SMatthew Wilcox  * radix_tree_iter_replace().
9136d75f366SJohannes Weiner  */
radix_tree_replace_slot(struct radix_tree_root * root,void __rcu ** slot,void * item)9146d75f366SJohannes Weiner void radix_tree_replace_slot(struct radix_tree_root *root,
915d7b62727SMatthew Wilcox 			     void __rcu **slot, void *item)
9166d75f366SJohannes Weiner {
9171cf56f9dSMatthew Wilcox 	__radix_tree_replace(root, NULL, slot, item);
918f7942430SJohannes Weiner }
91910257d71SSong Liu EXPORT_SYMBOL(radix_tree_replace_slot);
920f7942430SJohannes Weiner 
921e157b555SMatthew Wilcox /**
922e157b555SMatthew Wilcox  * radix_tree_iter_replace - replace item in a slot
923e157b555SMatthew Wilcox  * @root:	radix tree root
924c95c2d32SRandy Dunlap  * @iter:	iterator state
925e157b555SMatthew Wilcox  * @slot:	pointer to slot
926e157b555SMatthew Wilcox  * @item:	new item to store in the slot.
927e157b555SMatthew Wilcox  *
9282956c664SMatthew Wilcox  * For use with radix_tree_for_each_slot().
9292956c664SMatthew Wilcox  * Caller must hold tree write locked.
930e157b555SMatthew Wilcox  */
radix_tree_iter_replace(struct radix_tree_root * root,const struct radix_tree_iter * iter,void __rcu ** slot,void * item)931e157b555SMatthew Wilcox void radix_tree_iter_replace(struct radix_tree_root *root,
932d7b62727SMatthew Wilcox 				const struct radix_tree_iter *iter,
933d7b62727SMatthew Wilcox 				void __rcu **slot, void *item)
934e157b555SMatthew Wilcox {
9351cf56f9dSMatthew Wilcox 	__radix_tree_replace(root, iter->node, slot, item);
936e157b555SMatthew Wilcox }
937e157b555SMatthew Wilcox 
node_tag_set(struct radix_tree_root * root,struct radix_tree_node * node,unsigned int tag,unsigned int offset)93830b888baSMatthew Wilcox static void node_tag_set(struct radix_tree_root *root,
93930b888baSMatthew Wilcox 				struct radix_tree_node *node,
94030b888baSMatthew Wilcox 				unsigned int tag, unsigned int offset)
94130b888baSMatthew Wilcox {
94230b888baSMatthew Wilcox 	while (node) {
94330b888baSMatthew Wilcox 		if (tag_get(node, tag, offset))
94430b888baSMatthew Wilcox 			return;
94530b888baSMatthew Wilcox 		tag_set(node, tag, offset);
94630b888baSMatthew Wilcox 		offset = node->offset;
94730b888baSMatthew Wilcox 		node = node->parent;
94830b888baSMatthew Wilcox 	}
94930b888baSMatthew Wilcox 
95030b888baSMatthew Wilcox 	if (!root_tag_get(root, tag))
95130b888baSMatthew Wilcox 		root_tag_set(root, tag);
95230b888baSMatthew Wilcox }
95330b888baSMatthew Wilcox 
9541da177e4SLinus Torvalds /**
9551da177e4SLinus Torvalds  *	radix_tree_tag_set - set a tag on a radix tree node
9561da177e4SLinus Torvalds  *	@root:		radix tree root
9571da177e4SLinus Torvalds  *	@index:		index key
9581da177e4SLinus Torvalds  *	@tag:		tag index
9591da177e4SLinus Torvalds  *
960daff89f3SJonathan Corbet  *	Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
961daff89f3SJonathan Corbet  *	corresponding to @index in the radix tree.  From
9621da177e4SLinus Torvalds  *	the root all the way down to the leaf node.
9631da177e4SLinus Torvalds  *
9641da177e4SLinus Torvalds  *	Returns the address of the tagged item.  Setting a tag on a not-present
9651da177e4SLinus Torvalds  *	item is a bug.
9661da177e4SLinus Torvalds  */
radix_tree_tag_set(struct radix_tree_root * root,unsigned long index,unsigned int tag)9671da177e4SLinus Torvalds void *radix_tree_tag_set(struct radix_tree_root *root,
968daff89f3SJonathan Corbet 			unsigned long index, unsigned int tag)
9691da177e4SLinus Torvalds {
970fb969909SRoss Zwisler 	struct radix_tree_node *node, *parent;
971fb969909SRoss Zwisler 	unsigned long maxindex;
9721da177e4SLinus Torvalds 
9739e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
974fb969909SRoss Zwisler 	BUG_ON(index > maxindex);
9751da177e4SLinus Torvalds 
976b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
977fb969909SRoss Zwisler 		unsigned offset;
9781da177e4SLinus Torvalds 
9794dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
9809e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
981fb969909SRoss Zwisler 		BUG_ON(!node);
982fb969909SRoss Zwisler 
983fb969909SRoss Zwisler 		if (!tag_get(parent, tag, offset))
984fb969909SRoss Zwisler 			tag_set(parent, tag, offset);
9851da177e4SLinus Torvalds 	}
9861da177e4SLinus Torvalds 
987612d6c19SNick Piggin 	/* set the root's tag bit */
988fb969909SRoss Zwisler 	if (!root_tag_get(root, tag))
989612d6c19SNick Piggin 		root_tag_set(root, tag);
990612d6c19SNick Piggin 
991fb969909SRoss Zwisler 	return node;
9921da177e4SLinus Torvalds }
9931da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_set);
9941da177e4SLinus Torvalds 
node_tag_clear(struct radix_tree_root * root,struct radix_tree_node * node,unsigned int tag,unsigned int offset)995d604c324SMatthew Wilcox static void node_tag_clear(struct radix_tree_root *root,
996d604c324SMatthew Wilcox 				struct radix_tree_node *node,
997d604c324SMatthew Wilcox 				unsigned int tag, unsigned int offset)
998d604c324SMatthew Wilcox {
999d604c324SMatthew Wilcox 	while (node) {
1000d604c324SMatthew Wilcox 		if (!tag_get(node, tag, offset))
1001d604c324SMatthew Wilcox 			return;
1002d604c324SMatthew Wilcox 		tag_clear(node, tag, offset);
1003d604c324SMatthew Wilcox 		if (any_tag_set(node, tag))
1004d604c324SMatthew Wilcox 			return;
1005d604c324SMatthew Wilcox 
1006d604c324SMatthew Wilcox 		offset = node->offset;
1007d604c324SMatthew Wilcox 		node = node->parent;
1008d604c324SMatthew Wilcox 	}
1009d604c324SMatthew Wilcox 
1010d604c324SMatthew Wilcox 	/* clear the root's tag bit */
1011d604c324SMatthew Wilcox 	if (root_tag_get(root, tag))
1012d604c324SMatthew Wilcox 		root_tag_clear(root, tag);
1013d604c324SMatthew Wilcox }
1014d604c324SMatthew Wilcox 
1015268f42deSMatthew Wilcox /**
10161da177e4SLinus Torvalds  *	radix_tree_tag_clear - clear a tag on a radix tree node
10171da177e4SLinus Torvalds  *	@root:		radix tree root
10181da177e4SLinus Torvalds  *	@index:		index key
10191da177e4SLinus Torvalds  *	@tag:		tag index
10201da177e4SLinus Torvalds  *
1021daff89f3SJonathan Corbet  *	Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
10222fcd9005SMatthew Wilcox  *	corresponding to @index in the radix tree.  If this causes
10232fcd9005SMatthew Wilcox  *	the leaf node to have no tags set then clear the tag in the
10241da177e4SLinus Torvalds  *	next-to-leaf node, etc.
10251da177e4SLinus Torvalds  *
10261da177e4SLinus Torvalds  *	Returns the address of the tagged item on success, else NULL.  ie:
10271da177e4SLinus Torvalds  *	has the same return value and semantics as radix_tree_lookup().
10281da177e4SLinus Torvalds  */
radix_tree_tag_clear(struct radix_tree_root * root,unsigned long index,unsigned int tag)10291da177e4SLinus Torvalds void *radix_tree_tag_clear(struct radix_tree_root *root,
1030daff89f3SJonathan Corbet 			unsigned long index, unsigned int tag)
10311da177e4SLinus Torvalds {
103200f47b58SRoss Zwisler 	struct radix_tree_node *node, *parent;
103300f47b58SRoss Zwisler 	unsigned long maxindex;
1034fc0e7387SRong Tao 	int offset = 0;
10351da177e4SLinus Torvalds 
10369e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
103700f47b58SRoss Zwisler 	if (index > maxindex)
103800f47b58SRoss Zwisler 		return NULL;
10391da177e4SLinus Torvalds 
104000f47b58SRoss Zwisler 	parent = NULL;
10411da177e4SLinus Torvalds 
1042b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
10434dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
10449e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
10451da177e4SLinus Torvalds 	}
10461da177e4SLinus Torvalds 
1047d604c324SMatthew Wilcox 	if (node)
1048d604c324SMatthew Wilcox 		node_tag_clear(root, parent, tag, offset);
10491da177e4SLinus Torvalds 
105000f47b58SRoss Zwisler 	return node;
10511da177e4SLinus Torvalds }
10521da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_clear);
10531da177e4SLinus Torvalds 
10541da177e4SLinus Torvalds /**
105530b888baSMatthew Wilcox   * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
105630b888baSMatthew Wilcox   * @root: radix tree root
105730b888baSMatthew Wilcox   * @iter: iterator state
105830b888baSMatthew Wilcox   * @tag: tag to clear
105930b888baSMatthew Wilcox   */
radix_tree_iter_tag_clear(struct radix_tree_root * root,const struct radix_tree_iter * iter,unsigned int tag)106030b888baSMatthew Wilcox void radix_tree_iter_tag_clear(struct radix_tree_root *root,
106130b888baSMatthew Wilcox 			const struct radix_tree_iter *iter, unsigned int tag)
106230b888baSMatthew Wilcox {
106330b888baSMatthew Wilcox 	node_tag_clear(root, iter->node, tag, iter_offset(iter));
106430b888baSMatthew Wilcox }
106530b888baSMatthew Wilcox 
106630b888baSMatthew Wilcox /**
10671da177e4SLinus Torvalds  * radix_tree_tag_get - get a tag on a radix tree node
10681da177e4SLinus Torvalds  * @root:		radix tree root
10691da177e4SLinus Torvalds  * @index:		index key
1070daff89f3SJonathan Corbet  * @tag:		tag index (< RADIX_TREE_MAX_TAGS)
10711da177e4SLinus Torvalds  *
107232605a18SMarcelo Tosatti  * Return values:
10731da177e4SLinus Torvalds  *
1074612d6c19SNick Piggin  *  0: tag not present or not set
1075612d6c19SNick Piggin  *  1: tag set
1076ce82653dSDavid Howells  *
1077ce82653dSDavid Howells  * Note that the return value of this function may not be relied on, even if
1078ce82653dSDavid Howells  * the RCU lock is held, unless tag modification and node deletion are excluded
1079ce82653dSDavid Howells  * from concurrency.
10801da177e4SLinus Torvalds  */
radix_tree_tag_get(const struct radix_tree_root * root,unsigned long index,unsigned int tag)108135534c86SMatthew Wilcox int radix_tree_tag_get(const struct radix_tree_root *root,
1082daff89f3SJonathan Corbet 			unsigned long index, unsigned int tag)
10831da177e4SLinus Torvalds {
10844589ba6dSRoss Zwisler 	struct radix_tree_node *node, *parent;
10854589ba6dSRoss Zwisler 	unsigned long maxindex;
10861da177e4SLinus Torvalds 
1087612d6c19SNick Piggin 	if (!root_tag_get(root, tag))
1088612d6c19SNick Piggin 		return 0;
1089612d6c19SNick Piggin 
10909e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
10914589ba6dSRoss Zwisler 	if (index > maxindex)
10924589ba6dSRoss Zwisler 		return 0;
10937cf9c2c7SNick Piggin 
1094b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
10959e85d811SMatthew Wilcox 		unsigned offset;
10964589ba6dSRoss Zwisler 
10974dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
10989e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
10994589ba6dSRoss Zwisler 
11004589ba6dSRoss Zwisler 		if (!tag_get(parent, tag, offset))
11014589ba6dSRoss Zwisler 			return 0;
11024589ba6dSRoss Zwisler 		if (node == RADIX_TREE_RETRY)
11034589ba6dSRoss Zwisler 			break;
11041da177e4SLinus Torvalds 	}
11054589ba6dSRoss Zwisler 
11064589ba6dSRoss Zwisler 	return 1;
11071da177e4SLinus Torvalds }
11081da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_get);
11091da177e4SLinus Torvalds 
1110148deab2SMatthew Wilcox /* Construct iter->tags bit-mask from node->tags[tag] array */
set_iter_tags(struct radix_tree_iter * iter,struct radix_tree_node * node,unsigned offset,unsigned tag)1111148deab2SMatthew Wilcox static void set_iter_tags(struct radix_tree_iter *iter,
1112148deab2SMatthew Wilcox 				struct radix_tree_node *node, unsigned offset,
1113148deab2SMatthew Wilcox 				unsigned tag)
1114148deab2SMatthew Wilcox {
1115148deab2SMatthew Wilcox 	unsigned tag_long = offset / BITS_PER_LONG;
1116148deab2SMatthew Wilcox 	unsigned tag_bit  = offset % BITS_PER_LONG;
1117148deab2SMatthew Wilcox 
11180a835c4fSMatthew Wilcox 	if (!node) {
11190a835c4fSMatthew Wilcox 		iter->tags = 1;
11200a835c4fSMatthew Wilcox 		return;
11210a835c4fSMatthew Wilcox 	}
11220a835c4fSMatthew Wilcox 
1123148deab2SMatthew Wilcox 	iter->tags = node->tags[tag][tag_long] >> tag_bit;
1124148deab2SMatthew Wilcox 
1125148deab2SMatthew Wilcox 	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1126148deab2SMatthew Wilcox 	if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1127148deab2SMatthew Wilcox 		/* Pick tags from next element */
1128148deab2SMatthew Wilcox 		if (tag_bit)
1129148deab2SMatthew Wilcox 			iter->tags |= node->tags[tag][tag_long + 1] <<
1130148deab2SMatthew Wilcox 						(BITS_PER_LONG - tag_bit);
1131148deab2SMatthew Wilcox 		/* Clip chunk size, here only BITS_PER_LONG tags */
1132148deab2SMatthew Wilcox 		iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
1133148deab2SMatthew Wilcox 	}
1134148deab2SMatthew Wilcox }
1135148deab2SMatthew Wilcox 
radix_tree_iter_resume(void __rcu ** slot,struct radix_tree_iter * iter)1136d7b62727SMatthew Wilcox void __rcu **radix_tree_iter_resume(void __rcu **slot,
1137d7b62727SMatthew Wilcox 					struct radix_tree_iter *iter)
1138148deab2SMatthew Wilcox {
1139148deab2SMatthew Wilcox 	iter->index = __radix_tree_iter_add(iter, 1);
1140148deab2SMatthew Wilcox 	iter->next_index = iter->index;
1141148deab2SMatthew Wilcox 	iter->tags = 0;
1142148deab2SMatthew Wilcox 	return NULL;
1143148deab2SMatthew Wilcox }
1144148deab2SMatthew Wilcox EXPORT_SYMBOL(radix_tree_iter_resume);
1145148deab2SMatthew Wilcox 
11466df8ba4fSFengguang Wu /**
114778c1d784SKonstantin Khlebnikov  * radix_tree_next_chunk - find next chunk of slots for iteration
114878c1d784SKonstantin Khlebnikov  *
114978c1d784SKonstantin Khlebnikov  * @root:	radix tree root
115078c1d784SKonstantin Khlebnikov  * @iter:	iterator state
115178c1d784SKonstantin Khlebnikov  * @flags:	RADIX_TREE_ITER_* flags and tag index
115278c1d784SKonstantin Khlebnikov  * Returns:	pointer to chunk first slot, or NULL if iteration is over
115378c1d784SKonstantin Khlebnikov  */
radix_tree_next_chunk(const struct radix_tree_root * root,struct radix_tree_iter * iter,unsigned flags)1154d7b62727SMatthew Wilcox void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
115578c1d784SKonstantin Khlebnikov 			     struct radix_tree_iter *iter, unsigned flags)
115678c1d784SKonstantin Khlebnikov {
11579e85d811SMatthew Wilcox 	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
11588c1244deSMatthew Wilcox 	struct radix_tree_node *node, *child;
115921ef5339SRoss Zwisler 	unsigned long index, offset, maxindex;
116078c1d784SKonstantin Khlebnikov 
116178c1d784SKonstantin Khlebnikov 	if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
116278c1d784SKonstantin Khlebnikov 		return NULL;
116378c1d784SKonstantin Khlebnikov 
116478c1d784SKonstantin Khlebnikov 	/*
116578c1d784SKonstantin Khlebnikov 	 * Catch next_index overflow after ~0UL. iter->index never overflows
116678c1d784SKonstantin Khlebnikov 	 * during iterating; it can be zero only at the beginning.
116778c1d784SKonstantin Khlebnikov 	 * And we cannot overflow iter->next_index in a single step,
116878c1d784SKonstantin Khlebnikov 	 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
1169fffaee36SKonstantin Khlebnikov 	 *
1170fffaee36SKonstantin Khlebnikov 	 * This condition also used by radix_tree_next_slot() to stop
117191b9677cSMatthew Wilcox 	 * contiguous iterating, and forbid switching to the next chunk.
117278c1d784SKonstantin Khlebnikov 	 */
117378c1d784SKonstantin Khlebnikov 	index = iter->next_index;
117478c1d784SKonstantin Khlebnikov 	if (!index && iter->index)
117578c1d784SKonstantin Khlebnikov 		return NULL;
117678c1d784SKonstantin Khlebnikov 
117721ef5339SRoss Zwisler  restart:
11789e85d811SMatthew Wilcox 	radix_tree_load_root(root, &child, &maxindex);
117921ef5339SRoss Zwisler 	if (index > maxindex)
118021ef5339SRoss Zwisler 		return NULL;
11818c1244deSMatthew Wilcox 	if (!child)
11828c1244deSMatthew Wilcox 		return NULL;
118321ef5339SRoss Zwisler 
11848c1244deSMatthew Wilcox 	if (!radix_tree_is_internal_node(child)) {
118578c1d784SKonstantin Khlebnikov 		/* Single-slot tree */
118621ef5339SRoss Zwisler 		iter->index = index;
118721ef5339SRoss Zwisler 		iter->next_index = maxindex + 1;
118878c1d784SKonstantin Khlebnikov 		iter->tags = 1;
1189268f42deSMatthew Wilcox 		iter->node = NULL;
1190f8d5d0ccSMatthew Wilcox 		return (void __rcu **)&root->xa_head;
119121ef5339SRoss Zwisler 	}
119221ef5339SRoss Zwisler 
11938c1244deSMatthew Wilcox 	do {
11948c1244deSMatthew Wilcox 		node = entry_to_node(child);
11959e85d811SMatthew Wilcox 		offset = radix_tree_descend(node, &child, index);
11968c1244deSMatthew Wilcox 
119778c1d784SKonstantin Khlebnikov 		if ((flags & RADIX_TREE_ITER_TAGGED) ?
11988c1244deSMatthew Wilcox 				!tag_get(node, tag, offset) : !child) {
119978c1d784SKonstantin Khlebnikov 			/* Hole detected */
120078c1d784SKonstantin Khlebnikov 			if (flags & RADIX_TREE_ITER_CONTIG)
120178c1d784SKonstantin Khlebnikov 				return NULL;
120278c1d784SKonstantin Khlebnikov 
120378c1d784SKonstantin Khlebnikov 			if (flags & RADIX_TREE_ITER_TAGGED)
1204bc412fcaSMatthew Wilcox 				offset = radix_tree_find_next_bit(node, tag,
120578c1d784SKonstantin Khlebnikov 						offset + 1);
120678c1d784SKonstantin Khlebnikov 			else
120778c1d784SKonstantin Khlebnikov 				while (++offset	< RADIX_TREE_MAP_SIZE) {
120812320d0fSMatthew Wilcox 					void *slot = rcu_dereference_raw(
120912320d0fSMatthew Wilcox 							node->slots[offset]);
121021ef5339SRoss Zwisler 					if (slot)
121178c1d784SKonstantin Khlebnikov 						break;
121278c1d784SKonstantin Khlebnikov 				}
12138c1244deSMatthew Wilcox 			index &= ~node_maxindex(node);
12149e85d811SMatthew Wilcox 			index += offset << node->shift;
121578c1d784SKonstantin Khlebnikov 			/* Overflow after ~0UL */
121678c1d784SKonstantin Khlebnikov 			if (!index)
121778c1d784SKonstantin Khlebnikov 				return NULL;
121878c1d784SKonstantin Khlebnikov 			if (offset == RADIX_TREE_MAP_SIZE)
121978c1d784SKonstantin Khlebnikov 				goto restart;
12208c1244deSMatthew Wilcox 			child = rcu_dereference_raw(node->slots[offset]);
122178c1d784SKonstantin Khlebnikov 		}
122278c1d784SKonstantin Khlebnikov 
1223e157b555SMatthew Wilcox 		if (!child)
122478c1d784SKonstantin Khlebnikov 			goto restart;
1225e157b555SMatthew Wilcox 		if (child == RADIX_TREE_RETRY)
1226e157b555SMatthew Wilcox 			break;
122766ee620fSMatthew Wilcox 	} while (node->shift && radix_tree_is_internal_node(child));
122878c1d784SKonstantin Khlebnikov 
122978c1d784SKonstantin Khlebnikov 	/* Update the iterator state */
12303a08cd52SMatthew Wilcox 	iter->index = (index &~ node_maxindex(node)) | offset;
12318c1244deSMatthew Wilcox 	iter->next_index = (index | node_maxindex(node)) + 1;
1232268f42deSMatthew Wilcox 	iter->node = node;
123378c1d784SKonstantin Khlebnikov 
1234148deab2SMatthew Wilcox 	if (flags & RADIX_TREE_ITER_TAGGED)
1235148deab2SMatthew Wilcox 		set_iter_tags(iter, node, offset, tag);
123678c1d784SKonstantin Khlebnikov 
123778c1d784SKonstantin Khlebnikov 	return node->slots + offset;
123878c1d784SKonstantin Khlebnikov }
123978c1d784SKonstantin Khlebnikov EXPORT_SYMBOL(radix_tree_next_chunk);
124078c1d784SKonstantin Khlebnikov 
124178c1d784SKonstantin Khlebnikov /**
12421da177e4SLinus Torvalds  *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
12431da177e4SLinus Torvalds  *	@root:		radix tree root
12441da177e4SLinus Torvalds  *	@results:	where the results of the lookup are placed
12451da177e4SLinus Torvalds  *	@first_index:	start the lookup from this key
12461da177e4SLinus Torvalds  *	@max_items:	place up to this many items at *results
12471da177e4SLinus Torvalds  *
12481da177e4SLinus Torvalds  *	Performs an index-ascending scan of the tree for present items.  Places
12491da177e4SLinus Torvalds  *	them at *@results and returns the number of items which were placed at
12501da177e4SLinus Torvalds  *	*@results.
12511da177e4SLinus Torvalds  *
12521da177e4SLinus Torvalds  *	The implementation is naive.
12537cf9c2c7SNick Piggin  *
12547cf9c2c7SNick Piggin  *	Like radix_tree_lookup, radix_tree_gang_lookup may be called under
12557cf9c2c7SNick Piggin  *	rcu_read_lock. In this case, rather than the returned results being
12562fcd9005SMatthew Wilcox  *	an atomic snapshot of the tree at a single point in time, the
12572fcd9005SMatthew Wilcox  *	semantics of an RCU protected gang lookup are as though multiple
12582fcd9005SMatthew Wilcox  *	radix_tree_lookups have been issued in individual locks, and results
12592fcd9005SMatthew Wilcox  *	stored in 'results'.
12601da177e4SLinus Torvalds  */
12611da177e4SLinus Torvalds unsigned int
radix_tree_gang_lookup(const struct radix_tree_root * root,void ** results,unsigned long first_index,unsigned int max_items)126235534c86SMatthew Wilcox radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
12631da177e4SLinus Torvalds 			unsigned long first_index, unsigned int max_items)
12641da177e4SLinus Torvalds {
1265cebbd29eSKonstantin Khlebnikov 	struct radix_tree_iter iter;
1266d7b62727SMatthew Wilcox 	void __rcu **slot;
1267cebbd29eSKonstantin Khlebnikov 	unsigned int ret = 0;
12681da177e4SLinus Torvalds 
1269cebbd29eSKonstantin Khlebnikov 	if (unlikely(!max_items))
12707cf9c2c7SNick Piggin 		return 0;
12717cf9c2c7SNick Piggin 
1272cebbd29eSKonstantin Khlebnikov 	radix_tree_for_each_slot(slot, root, &iter, first_index) {
127346437f9aSMatthew Wilcox 		results[ret] = rcu_dereference_raw(*slot);
1274cebbd29eSKonstantin Khlebnikov 		if (!results[ret])
127547feff2cSNick Piggin 			continue;
1276b194d16cSMatthew Wilcox 		if (radix_tree_is_internal_node(results[ret])) {
127746437f9aSMatthew Wilcox 			slot = radix_tree_iter_retry(&iter);
127846437f9aSMatthew Wilcox 			continue;
127946437f9aSMatthew Wilcox 		}
1280cebbd29eSKonstantin Khlebnikov 		if (++ret == max_items)
12811da177e4SLinus Torvalds 			break;
12821da177e4SLinus Torvalds 	}
12837cf9c2c7SNick Piggin 
12841da177e4SLinus Torvalds 	return ret;
12851da177e4SLinus Torvalds }
12861da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup);
12871da177e4SLinus Torvalds 
128847feff2cSNick Piggin /**
12891da177e4SLinus Torvalds  *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
12901da177e4SLinus Torvalds  *	                             based on a tag
12911da177e4SLinus Torvalds  *	@root:		radix tree root
12921da177e4SLinus Torvalds  *	@results:	where the results of the lookup are placed
12931da177e4SLinus Torvalds  *	@first_index:	start the lookup from this key
12941da177e4SLinus Torvalds  *	@max_items:	place up to this many items at *results
1295daff89f3SJonathan Corbet  *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
12961da177e4SLinus Torvalds  *
12971da177e4SLinus Torvalds  *	Performs an index-ascending scan of the tree for present items which
12981da177e4SLinus Torvalds  *	have the tag indexed by @tag set.  Places the items at *@results and
12991da177e4SLinus Torvalds  *	returns the number of items which were placed at *@results.
13001da177e4SLinus Torvalds  */
13011da177e4SLinus Torvalds unsigned int
radix_tree_gang_lookup_tag(const struct radix_tree_root * root,void ** results,unsigned long first_index,unsigned int max_items,unsigned int tag)130235534c86SMatthew Wilcox radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
1303daff89f3SJonathan Corbet 		unsigned long first_index, unsigned int max_items,
1304daff89f3SJonathan Corbet 		unsigned int tag)
13051da177e4SLinus Torvalds {
1306cebbd29eSKonstantin Khlebnikov 	struct radix_tree_iter iter;
1307d7b62727SMatthew Wilcox 	void __rcu **slot;
1308cebbd29eSKonstantin Khlebnikov 	unsigned int ret = 0;
13091da177e4SLinus Torvalds 
1310cebbd29eSKonstantin Khlebnikov 	if (unlikely(!max_items))
1311612d6c19SNick Piggin 		return 0;
1312612d6c19SNick Piggin 
1313cebbd29eSKonstantin Khlebnikov 	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
131446437f9aSMatthew Wilcox 		results[ret] = rcu_dereference_raw(*slot);
1315cebbd29eSKonstantin Khlebnikov 		if (!results[ret])
131647feff2cSNick Piggin 			continue;
1317b194d16cSMatthew Wilcox 		if (radix_tree_is_internal_node(results[ret])) {
131846437f9aSMatthew Wilcox 			slot = radix_tree_iter_retry(&iter);
131946437f9aSMatthew Wilcox 			continue;
132046437f9aSMatthew Wilcox 		}
1321cebbd29eSKonstantin Khlebnikov 		if (++ret == max_items)
13221da177e4SLinus Torvalds 			break;
13231da177e4SLinus Torvalds 	}
13247cf9c2c7SNick Piggin 
13251da177e4SLinus Torvalds 	return ret;
13261da177e4SLinus Torvalds }
13271da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
13281da177e4SLinus Torvalds 
13291da177e4SLinus Torvalds /**
133047feff2cSNick Piggin  *	radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
133147feff2cSNick Piggin  *					  radix tree based on a tag
133247feff2cSNick Piggin  *	@root:		radix tree root
133347feff2cSNick Piggin  *	@results:	where the results of the lookup are placed
133447feff2cSNick Piggin  *	@first_index:	start the lookup from this key
133547feff2cSNick Piggin  *	@max_items:	place up to this many items at *results
133647feff2cSNick Piggin  *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
133747feff2cSNick Piggin  *
133847feff2cSNick Piggin  *	Performs an index-ascending scan of the tree for present items which
133947feff2cSNick Piggin  *	have the tag indexed by @tag set.  Places the slots at *@results and
134047feff2cSNick Piggin  *	returns the number of slots which were placed at *@results.
134147feff2cSNick Piggin  */
134247feff2cSNick Piggin unsigned int
radix_tree_gang_lookup_tag_slot(const struct radix_tree_root * root,void __rcu *** results,unsigned long first_index,unsigned int max_items,unsigned int tag)134335534c86SMatthew Wilcox radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
1344d7b62727SMatthew Wilcox 		void __rcu ***results, unsigned long first_index,
134535534c86SMatthew Wilcox 		unsigned int max_items, unsigned int tag)
134647feff2cSNick Piggin {
1347cebbd29eSKonstantin Khlebnikov 	struct radix_tree_iter iter;
1348d7b62727SMatthew Wilcox 	void __rcu **slot;
1349cebbd29eSKonstantin Khlebnikov 	unsigned int ret = 0;
135047feff2cSNick Piggin 
1351cebbd29eSKonstantin Khlebnikov 	if (unlikely(!max_items))
135247feff2cSNick Piggin 		return 0;
135347feff2cSNick Piggin 
1354cebbd29eSKonstantin Khlebnikov 	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1355cebbd29eSKonstantin Khlebnikov 		results[ret] = slot;
1356cebbd29eSKonstantin Khlebnikov 		if (++ret == max_items)
135747feff2cSNick Piggin 			break;
135847feff2cSNick Piggin 	}
135947feff2cSNick Piggin 
136047feff2cSNick Piggin 	return ret;
136147feff2cSNick Piggin }
136247feff2cSNick Piggin EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
136347feff2cSNick Piggin 
__radix_tree_delete(struct radix_tree_root * root,struct radix_tree_node * node,void __rcu ** slot)13640ac398efSMatthew Wilcox static bool __radix_tree_delete(struct radix_tree_root *root,
1365d7b62727SMatthew Wilcox 				struct radix_tree_node *node, void __rcu **slot)
13660ac398efSMatthew Wilcox {
13670a835c4fSMatthew Wilcox 	void *old = rcu_dereference_raw(*slot);
136801959dfeSMatthew Wilcox 	int values = xa_is_value(old) ? -1 : 0;
13690ac398efSMatthew Wilcox 	unsigned offset = get_slot_offset(node, slot);
13700ac398efSMatthew Wilcox 	int tag;
13710ac398efSMatthew Wilcox 
13720a835c4fSMatthew Wilcox 	if (is_idr(root))
13730a835c4fSMatthew Wilcox 		node_tag_set(root, node, IDR_FREE, offset);
13740a835c4fSMatthew Wilcox 	else
13750ac398efSMatthew Wilcox 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
13760ac398efSMatthew Wilcox 			node_tag_clear(root, node, tag, offset);
13770ac398efSMatthew Wilcox 
137801959dfeSMatthew Wilcox 	replace_slot(slot, NULL, node, -1, values);
13791cf56f9dSMatthew Wilcox 	return node && delete_node(root, node);
13800ac398efSMatthew Wilcox }
13810ac398efSMatthew Wilcox 
13820ac398efSMatthew Wilcox /**
13830ac398efSMatthew Wilcox  * radix_tree_iter_delete - delete the entry at this iterator position
13840ac398efSMatthew Wilcox  * @root: radix tree root
13850ac398efSMatthew Wilcox  * @iter: iterator state
13860ac398efSMatthew Wilcox  * @slot: pointer to slot
13870ac398efSMatthew Wilcox  *
13880ac398efSMatthew Wilcox  * Delete the entry at the position currently pointed to by the iterator.
13890ac398efSMatthew Wilcox  * This may result in the current node being freed; if it is, the iterator
13900ac398efSMatthew Wilcox  * is advanced so that it will not reference the freed memory.  This
13910ac398efSMatthew Wilcox  * function may be called without any locking if there are no other threads
13920ac398efSMatthew Wilcox  * which can access this tree.
13930ac398efSMatthew Wilcox  */
radix_tree_iter_delete(struct radix_tree_root * root,struct radix_tree_iter * iter,void __rcu ** slot)13940ac398efSMatthew Wilcox void radix_tree_iter_delete(struct radix_tree_root *root,
1395d7b62727SMatthew Wilcox 				struct radix_tree_iter *iter, void __rcu **slot)
13960ac398efSMatthew Wilcox {
13970ac398efSMatthew Wilcox 	if (__radix_tree_delete(root, iter->node, slot))
13980ac398efSMatthew Wilcox 		iter->index = iter->next_index;
13990ac398efSMatthew Wilcox }
1400d1b48c1eSChris Wilson EXPORT_SYMBOL(radix_tree_iter_delete);
14010ac398efSMatthew Wilcox 
1402139e5616SJohannes Weiner /**
140353c59f26SJohannes Weiner  * radix_tree_delete_item - delete an item from a radix tree
14041da177e4SLinus Torvalds  * @root: radix tree root
14051da177e4SLinus Torvalds  * @index: index key
140653c59f26SJohannes Weiner  * @item: expected item
14071da177e4SLinus Torvalds  *
140853c59f26SJohannes Weiner  * Remove @item at @index from the radix tree rooted at @root.
14091da177e4SLinus Torvalds  *
14100ac398efSMatthew Wilcox  * Return: the deleted entry, or %NULL if it was not present
141153c59f26SJohannes Weiner  * or the entry at the given @index was not @item.
14121da177e4SLinus Torvalds  */
radix_tree_delete_item(struct radix_tree_root * root,unsigned long index,void * item)141353c59f26SJohannes Weiner void *radix_tree_delete_item(struct radix_tree_root *root,
141453c59f26SJohannes Weiner 			     unsigned long index, void *item)
14151da177e4SLinus Torvalds {
14160a835c4fSMatthew Wilcox 	struct radix_tree_node *node = NULL;
14177a4deea1SMatthew Wilcox 	void __rcu **slot = NULL;
1418139e5616SJohannes Weiner 	void *entry;
14191da177e4SLinus Torvalds 
1420139e5616SJohannes Weiner 	entry = __radix_tree_lookup(root, index, &node, &slot);
14217a4deea1SMatthew Wilcox 	if (!slot)
14227a4deea1SMatthew Wilcox 		return NULL;
14230a835c4fSMatthew Wilcox 	if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
14240a835c4fSMatthew Wilcox 						get_slot_offset(node, slot))))
1425139e5616SJohannes Weiner 		return NULL;
14261da177e4SLinus Torvalds 
1427139e5616SJohannes Weiner 	if (item && entry != item)
1428139e5616SJohannes Weiner 		return NULL;
1429139e5616SJohannes Weiner 
14300ac398efSMatthew Wilcox 	__radix_tree_delete(root, node, slot);
1431201b6264SChristoph Lameter 
1432139e5616SJohannes Weiner 	return entry;
14331da177e4SLinus Torvalds }
143453c59f26SJohannes Weiner EXPORT_SYMBOL(radix_tree_delete_item);
143553c59f26SJohannes Weiner 
143653c59f26SJohannes Weiner /**
14370ac398efSMatthew Wilcox  * radix_tree_delete - delete an entry from a radix tree
143853c59f26SJohannes Weiner  * @root: radix tree root
143953c59f26SJohannes Weiner  * @index: index key
144053c59f26SJohannes Weiner  *
14410ac398efSMatthew Wilcox  * Remove the entry at @index from the radix tree rooted at @root.
144253c59f26SJohannes Weiner  *
14430ac398efSMatthew Wilcox  * Return: The deleted entry, or %NULL if it was not present.
144453c59f26SJohannes Weiner  */
radix_tree_delete(struct radix_tree_root * root,unsigned long index)144553c59f26SJohannes Weiner void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
144653c59f26SJohannes Weiner {
144753c59f26SJohannes Weiner 	return radix_tree_delete_item(root, index, NULL);
144853c59f26SJohannes Weiner }
14491da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_delete);
14501da177e4SLinus Torvalds 
14511da177e4SLinus Torvalds /**
14521da177e4SLinus Torvalds  *	radix_tree_tagged - test whether any items in the tree are tagged
14531da177e4SLinus Torvalds  *	@root:		radix tree root
14541da177e4SLinus Torvalds  *	@tag:		tag to test
14551da177e4SLinus Torvalds  */
radix_tree_tagged(const struct radix_tree_root * root,unsigned int tag)145635534c86SMatthew Wilcox int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
14571da177e4SLinus Torvalds {
1458612d6c19SNick Piggin 	return root_tag_get(root, tag);
14591da177e4SLinus Torvalds }
14601da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tagged);
14611da177e4SLinus Torvalds 
14620a835c4fSMatthew Wilcox /**
14630a835c4fSMatthew Wilcox  * idr_preload - preload for idr_alloc()
14640a835c4fSMatthew Wilcox  * @gfp_mask: allocation mask to use for preloading
14650a835c4fSMatthew Wilcox  *
14660a835c4fSMatthew Wilcox  * Preallocate memory to use for the next call to idr_alloc().  This function
14670a835c4fSMatthew Wilcox  * returns with preemption disabled.  It will be enabled by idr_preload_end().
14680a835c4fSMatthew Wilcox  */
idr_preload(gfp_t gfp_mask)14690a835c4fSMatthew Wilcox void idr_preload(gfp_t gfp_mask)
14700a835c4fSMatthew Wilcox {
1471bc9ae224SEric Dumazet 	if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
1472cfa6705dSSebastian Andrzej Siewior 		local_lock(&radix_tree_preloads.lock);
14730a835c4fSMatthew Wilcox }
14740a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_preload);
14750a835c4fSMatthew Wilcox 
idr_get_free(struct radix_tree_root * root,struct radix_tree_iter * iter,gfp_t gfp,unsigned long max)1476460488c5SMatthew Wilcox void __rcu **idr_get_free(struct radix_tree_root *root,
1477388f79fdSChris Mi 			      struct radix_tree_iter *iter, gfp_t gfp,
1478388f79fdSChris Mi 			      unsigned long max)
14790a835c4fSMatthew Wilcox {
14800a835c4fSMatthew Wilcox 	struct radix_tree_node *node = NULL, *child;
1481f8d5d0ccSMatthew Wilcox 	void __rcu **slot = (void __rcu **)&root->xa_head;
14820a835c4fSMatthew Wilcox 	unsigned long maxindex, start = iter->next_index;
14830a835c4fSMatthew Wilcox 	unsigned int shift, offset = 0;
14840a835c4fSMatthew Wilcox 
14850a835c4fSMatthew Wilcox  grow:
14860a835c4fSMatthew Wilcox 	shift = radix_tree_load_root(root, &child, &maxindex);
14870a835c4fSMatthew Wilcox 	if (!radix_tree_tagged(root, IDR_FREE))
14880a835c4fSMatthew Wilcox 		start = max(start, maxindex + 1);
14890a835c4fSMatthew Wilcox 	if (start > max)
14900a835c4fSMatthew Wilcox 		return ERR_PTR(-ENOSPC);
14910a835c4fSMatthew Wilcox 
14920a835c4fSMatthew Wilcox 	if (start > maxindex) {
14930a835c4fSMatthew Wilcox 		int error = radix_tree_extend(root, gfp, start, shift);
14940a835c4fSMatthew Wilcox 		if (error < 0)
14950a835c4fSMatthew Wilcox 			return ERR_PTR(error);
14960a835c4fSMatthew Wilcox 		shift = error;
1497f8d5d0ccSMatthew Wilcox 		child = rcu_dereference_raw(root->xa_head);
14980a835c4fSMatthew Wilcox 	}
149966ee620fSMatthew Wilcox 	if (start == 0 && shift == 0)
150066ee620fSMatthew Wilcox 		shift = RADIX_TREE_MAP_SHIFT;
15010a835c4fSMatthew Wilcox 
15020a835c4fSMatthew Wilcox 	while (shift) {
15030a835c4fSMatthew Wilcox 		shift -= RADIX_TREE_MAP_SHIFT;
15040a835c4fSMatthew Wilcox 		if (child == NULL) {
15050a835c4fSMatthew Wilcox 			/* Have to add a child node.  */
1506d58275bcSMatthew Wilcox 			child = radix_tree_node_alloc(gfp, node, root, shift,
1507d58275bcSMatthew Wilcox 							offset, 0, 0);
15080a835c4fSMatthew Wilcox 			if (!child)
15090a835c4fSMatthew Wilcox 				return ERR_PTR(-ENOMEM);
15100a835c4fSMatthew Wilcox 			all_tag_set(child, IDR_FREE);
15110a835c4fSMatthew Wilcox 			rcu_assign_pointer(*slot, node_to_entry(child));
15120a835c4fSMatthew Wilcox 			if (node)
15130a835c4fSMatthew Wilcox 				node->count++;
15140a835c4fSMatthew Wilcox 		} else if (!radix_tree_is_internal_node(child))
15150a835c4fSMatthew Wilcox 			break;
15160a835c4fSMatthew Wilcox 
15170a835c4fSMatthew Wilcox 		node = entry_to_node(child);
15180a835c4fSMatthew Wilcox 		offset = radix_tree_descend(node, &child, start);
15190a835c4fSMatthew Wilcox 		if (!tag_get(node, IDR_FREE, offset)) {
15200a835c4fSMatthew Wilcox 			offset = radix_tree_find_next_bit(node, IDR_FREE,
15210a835c4fSMatthew Wilcox 							offset + 1);
15220a835c4fSMatthew Wilcox 			start = next_index(start, node, offset);
1523b7e9728fSMatthew Wilcox (Oracle) 			if (start > max || start == 0)
15240a835c4fSMatthew Wilcox 				return ERR_PTR(-ENOSPC);
15250a835c4fSMatthew Wilcox 			while (offset == RADIX_TREE_MAP_SIZE) {
15260a835c4fSMatthew Wilcox 				offset = node->offset + 1;
15270a835c4fSMatthew Wilcox 				node = node->parent;
15280a835c4fSMatthew Wilcox 				if (!node)
15290a835c4fSMatthew Wilcox 					goto grow;
15300a835c4fSMatthew Wilcox 				shift = node->shift;
15310a835c4fSMatthew Wilcox 			}
15320a835c4fSMatthew Wilcox 			child = rcu_dereference_raw(node->slots[offset]);
15330a835c4fSMatthew Wilcox 		}
15340a835c4fSMatthew Wilcox 		slot = &node->slots[offset];
15350a835c4fSMatthew Wilcox 	}
15360a835c4fSMatthew Wilcox 
15370a835c4fSMatthew Wilcox 	iter->index = start;
15380a835c4fSMatthew Wilcox 	if (node)
15390a835c4fSMatthew Wilcox 		iter->next_index = 1 + min(max, (start | node_maxindex(node)));
15400a835c4fSMatthew Wilcox 	else
15410a835c4fSMatthew Wilcox 		iter->next_index = 1;
15420a835c4fSMatthew Wilcox 	iter->node = node;
15430a835c4fSMatthew Wilcox 	set_iter_tags(iter, node, offset, IDR_FREE);
15440a835c4fSMatthew Wilcox 
15450a835c4fSMatthew Wilcox 	return slot;
15460a835c4fSMatthew Wilcox }
15470a835c4fSMatthew Wilcox 
15480a835c4fSMatthew Wilcox /**
15490a835c4fSMatthew Wilcox  * idr_destroy - release all internal memory from an IDR
15500a835c4fSMatthew Wilcox  * @idr: idr handle
15510a835c4fSMatthew Wilcox  *
15520a835c4fSMatthew Wilcox  * After this function is called, the IDR is empty, and may be reused or
15530a835c4fSMatthew Wilcox  * the data structure containing it may be freed.
15540a835c4fSMatthew Wilcox  *
15550a835c4fSMatthew Wilcox  * A typical clean-up sequence for objects stored in an idr tree will use
15560a835c4fSMatthew Wilcox  * idr_for_each() to free all objects, if necessary, then idr_destroy() to
15570a835c4fSMatthew Wilcox  * free the memory used to keep track of those objects.
15580a835c4fSMatthew Wilcox  */
idr_destroy(struct idr * idr)15590a835c4fSMatthew Wilcox void idr_destroy(struct idr *idr)
15600a835c4fSMatthew Wilcox {
1561f8d5d0ccSMatthew Wilcox 	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
15620a835c4fSMatthew Wilcox 	if (radix_tree_is_internal_node(node))
15630a835c4fSMatthew Wilcox 		radix_tree_free_nodes(node);
1564f8d5d0ccSMatthew Wilcox 	idr->idr_rt.xa_head = NULL;
15650a835c4fSMatthew Wilcox 	root_tag_set(&idr->idr_rt, IDR_FREE);
15660a835c4fSMatthew Wilcox }
15670a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_destroy);
15680a835c4fSMatthew Wilcox 
15691da177e4SLinus Torvalds static void
radix_tree_node_ctor(void * arg)1570449dd698SJohannes Weiner radix_tree_node_ctor(void *arg)
15711da177e4SLinus Torvalds {
1572449dd698SJohannes Weiner 	struct radix_tree_node *node = arg;
1573449dd698SJohannes Weiner 
1574449dd698SJohannes Weiner 	memset(node, 0, sizeof(*node));
1575449dd698SJohannes Weiner 	INIT_LIST_HEAD(&node->private_list);
15761da177e4SLinus Torvalds }
15771da177e4SLinus Torvalds 
radix_tree_cpu_dead(unsigned int cpu)1578d544abd5SSebastian Andrzej Siewior static int radix_tree_cpu_dead(unsigned int cpu)
15791da177e4SLinus Torvalds {
15801da177e4SLinus Torvalds 	struct radix_tree_preload *rtp;
15819d2a8da0SKirill A. Shutemov 	struct radix_tree_node *node;
15821da177e4SLinus Torvalds 
15832fcd9005SMatthew Wilcox 	/* Free per-cpu pool of preloaded nodes */
15841da177e4SLinus Torvalds 	rtp = &per_cpu(radix_tree_preloads, cpu);
15851da177e4SLinus Torvalds 	while (rtp->nr) {
15869d2a8da0SKirill A. Shutemov 		node = rtp->nodes;
15871293d5c5SMatthew Wilcox 		rtp->nodes = node->parent;
15889d2a8da0SKirill A. Shutemov 		kmem_cache_free(radix_tree_node_cachep, node);
15891da177e4SLinus Torvalds 		rtp->nr--;
15901da177e4SLinus Torvalds 	}
1591d544abd5SSebastian Andrzej Siewior 	return 0;
15921da177e4SLinus Torvalds }
15931da177e4SLinus Torvalds 
radix_tree_init(void)15941da177e4SLinus Torvalds void __init radix_tree_init(void)
15951da177e4SLinus Torvalds {
1596d544abd5SSebastian Andrzej Siewior 	int ret;
15977e784422SMichal Hocko 
15987e784422SMichal Hocko 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
1599fa290cdaSMatthew Wilcox 	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
160002c02bf1SMatthew Wilcox 	BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
16011da177e4SLinus Torvalds 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
16021da177e4SLinus Torvalds 			sizeof(struct radix_tree_node), 0,
1603488514d1SChristoph Lameter 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1604488514d1SChristoph Lameter 			radix_tree_node_ctor);
1605d544abd5SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
1606d544abd5SSebastian Andrzej Siewior 					NULL, radix_tree_cpu_dead);
1607d544abd5SSebastian Andrzej Siewior 	WARN_ON(ret < 0);
16081da177e4SLinus Torvalds }
1609