xref: /openbmc/linux/lib/radix-tree.c (revision eff3860bbfedbac6edac57fb0d7f3a60e860c1c3)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Copyright (C) 2001 Momchil Velikov
31da177e4SLinus Torvalds  * Portions Copyright (C) 2001 Christoph Hellwig
4cde53535SChristoph Lameter  * Copyright (C) 2005 SGI, Christoph Lameter
57cf9c2c7SNick Piggin  * Copyright (C) 2006 Nick Piggin
678c1d784SKonstantin Khlebnikov  * Copyright (C) 2012 Konstantin Khlebnikov
76b053b8eSMatthew Wilcox  * Copyright (C) 2016 Intel, Matthew Wilcox
86b053b8eSMatthew Wilcox  * Copyright (C) 2016 Intel, Ross Zwisler
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  * This program is free software; you can redistribute it and/or
111da177e4SLinus Torvalds  * modify it under the terms of the GNU General Public License as
121da177e4SLinus Torvalds  * published by the Free Software Foundation; either version 2, or (at
131da177e4SLinus Torvalds  * your option) any later version.
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * This program is distributed in the hope that it will be useful, but
161da177e4SLinus Torvalds  * WITHOUT ANY WARRANTY; without even the implied warranty of
171da177e4SLinus Torvalds  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
181da177e4SLinus Torvalds  * General Public License for more details.
191da177e4SLinus Torvalds  *
201da177e4SLinus Torvalds  * You should have received a copy of the GNU General Public License
211da177e4SLinus Torvalds  * along with this program; if not, write to the Free Software
221da177e4SLinus Torvalds  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
231da177e4SLinus Torvalds  */
241da177e4SLinus Torvalds 
250a835c4fSMatthew Wilcox #include <linux/bitmap.h>
260a835c4fSMatthew Wilcox #include <linux/bitops.h>
27460488c5SMatthew Wilcox #include <linux/bug.h>
28e157b555SMatthew Wilcox #include <linux/cpu.h>
291da177e4SLinus Torvalds #include <linux/errno.h>
300a835c4fSMatthew Wilcox #include <linux/export.h>
310a835c4fSMatthew Wilcox #include <linux/idr.h>
321da177e4SLinus Torvalds #include <linux/init.h>
331da177e4SLinus Torvalds #include <linux/kernel.h>
34ce80b067SCatalin Marinas #include <linux/kmemleak.h>
350a835c4fSMatthew Wilcox #include <linux/percpu.h>
3692cf2118SFrederic Weisbecker #include <linux/preempt.h>		/* in_interrupt() */
370a835c4fSMatthew Wilcox #include <linux/radix-tree.h>
380a835c4fSMatthew Wilcox #include <linux/rcupdate.h>
390a835c4fSMatthew Wilcox #include <linux/slab.h>
400a835c4fSMatthew Wilcox #include <linux/string.h>
4102c02bf1SMatthew Wilcox #include <linux/xarray.h>
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds 
4426fb1589SJeff Moyer /*
451da177e4SLinus Torvalds  * Radix tree node cache.
461da177e4SLinus Torvalds  */
4758d6ea30SMatthew Wilcox struct kmem_cache *radix_tree_node_cachep;
481da177e4SLinus Torvalds 
491da177e4SLinus Torvalds /*
5055368052SNick Piggin  * The radix tree is variable-height, so an insert operation not only has
5155368052SNick Piggin  * to build the branch to its corresponding item, it also has to build the
5255368052SNick Piggin  * branch to existing items if the size has to be increased (by
5355368052SNick Piggin  * radix_tree_extend).
5455368052SNick Piggin  *
5555368052SNick Piggin  * The worst case is a zero height tree with just a single item at index 0,
5655368052SNick Piggin  * and then inserting an item at index ULONG_MAX. This requires 2 new branches
5755368052SNick Piggin  * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
5855368052SNick Piggin  * Hence:
5955368052SNick Piggin  */
6055368052SNick Piggin #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
6155368052SNick Piggin 
6255368052SNick Piggin /*
630a835c4fSMatthew Wilcox  * The IDR does not have to be as high as the radix tree since it uses
640a835c4fSMatthew Wilcox  * signed integers, not unsigned longs.
650a835c4fSMatthew Wilcox  */
660a835c4fSMatthew Wilcox #define IDR_INDEX_BITS		(8 /* CHAR_BIT */ * sizeof(int) - 1)
670a835c4fSMatthew Wilcox #define IDR_MAX_PATH		(DIV_ROUND_UP(IDR_INDEX_BITS, \
680a835c4fSMatthew Wilcox 						RADIX_TREE_MAP_SHIFT))
690a835c4fSMatthew Wilcox #define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
700a835c4fSMatthew Wilcox 
710a835c4fSMatthew Wilcox /*
727ad3d4d8SMatthew Wilcox  * The IDA is even shorter since it uses a bitmap at the last level.
737ad3d4d8SMatthew Wilcox  */
747ad3d4d8SMatthew Wilcox #define IDA_INDEX_BITS		(8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS))
757ad3d4d8SMatthew Wilcox #define IDA_MAX_PATH		(DIV_ROUND_UP(IDA_INDEX_BITS, \
767ad3d4d8SMatthew Wilcox 						RADIX_TREE_MAP_SHIFT))
777ad3d4d8SMatthew Wilcox #define IDA_PRELOAD_SIZE	(IDA_MAX_PATH * 2 - 1)
787ad3d4d8SMatthew Wilcox 
797ad3d4d8SMatthew Wilcox /*
801da177e4SLinus Torvalds  * Per-cpu pool of preloaded nodes
811da177e4SLinus Torvalds  */
821da177e4SLinus Torvalds struct radix_tree_preload {
832fcd9005SMatthew Wilcox 	unsigned nr;
841293d5c5SMatthew Wilcox 	/* nodes->parent points to next preallocated node */
859d2a8da0SKirill A. Shutemov 	struct radix_tree_node *nodes;
861da177e4SLinus Torvalds };
878cef7d57SHarvey Harrison static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
881da177e4SLinus Torvalds 
89148deab2SMatthew Wilcox static inline struct radix_tree_node *entry_to_node(void *ptr)
90148deab2SMatthew Wilcox {
91148deab2SMatthew Wilcox 	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
92148deab2SMatthew Wilcox }
93148deab2SMatthew Wilcox 
94a4db4dceSMatthew Wilcox static inline void *node_to_entry(void *ptr)
9527d20fddSNick Piggin {
9630ff46ccSMatthew Wilcox 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
9727d20fddSNick Piggin }
9827d20fddSNick Piggin 
9902c02bf1SMatthew Wilcox #define RADIX_TREE_RETRY	XA_RETRY_ENTRY
100db050f29SMatthew Wilcox 
101d7b62727SMatthew Wilcox static inline unsigned long
102d7b62727SMatthew Wilcox get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
103db050f29SMatthew Wilcox {
10476f070b4SMatthew Wilcox 	return parent ? slot - parent->slots : 0;
105db050f29SMatthew Wilcox }
106db050f29SMatthew Wilcox 
10735534c86SMatthew Wilcox static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
1089e85d811SMatthew Wilcox 			struct radix_tree_node **nodep, unsigned long index)
109db050f29SMatthew Wilcox {
1109e85d811SMatthew Wilcox 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
111d7b62727SMatthew Wilcox 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
112db050f29SMatthew Wilcox 
113db050f29SMatthew Wilcox 	*nodep = (void *)entry;
114db050f29SMatthew Wilcox 	return offset;
115db050f29SMatthew Wilcox }
116db050f29SMatthew Wilcox 
11735534c86SMatthew Wilcox static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
118612d6c19SNick Piggin {
119f8d5d0ccSMatthew Wilcox 	return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
120612d6c19SNick Piggin }
121612d6c19SNick Piggin 
122643b52b9SNick Piggin static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
123643b52b9SNick Piggin 		int offset)
124643b52b9SNick Piggin {
125643b52b9SNick Piggin 	__set_bit(offset, node->tags[tag]);
126643b52b9SNick Piggin }
127643b52b9SNick Piggin 
128643b52b9SNick Piggin static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
129643b52b9SNick Piggin 		int offset)
130643b52b9SNick Piggin {
131643b52b9SNick Piggin 	__clear_bit(offset, node->tags[tag]);
132643b52b9SNick Piggin }
133643b52b9SNick Piggin 
13435534c86SMatthew Wilcox static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
135643b52b9SNick Piggin 		int offset)
136643b52b9SNick Piggin {
137643b52b9SNick Piggin 	return test_bit(offset, node->tags[tag]);
138643b52b9SNick Piggin }
139643b52b9SNick Piggin 
14035534c86SMatthew Wilcox static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
141643b52b9SNick Piggin {
142f8d5d0ccSMatthew Wilcox 	root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
143643b52b9SNick Piggin }
144643b52b9SNick Piggin 
1452fcd9005SMatthew Wilcox static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
146643b52b9SNick Piggin {
147f8d5d0ccSMatthew Wilcox 	root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
148643b52b9SNick Piggin }
149643b52b9SNick Piggin 
150643b52b9SNick Piggin static inline void root_tag_clear_all(struct radix_tree_root *root)
151643b52b9SNick Piggin {
152f8d5d0ccSMatthew Wilcox 	root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
153643b52b9SNick Piggin }
154643b52b9SNick Piggin 
15535534c86SMatthew Wilcox static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
156643b52b9SNick Piggin {
157f8d5d0ccSMatthew Wilcox 	return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
158643b52b9SNick Piggin }
159643b52b9SNick Piggin 
16035534c86SMatthew Wilcox static inline unsigned root_tags_get(const struct radix_tree_root *root)
1617b60e9adSMatthew Wilcox {
162f8d5d0ccSMatthew Wilcox 	return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
1630a835c4fSMatthew Wilcox }
1640a835c4fSMatthew Wilcox 
1650a835c4fSMatthew Wilcox static inline bool is_idr(const struct radix_tree_root *root)
1660a835c4fSMatthew Wilcox {
167f8d5d0ccSMatthew Wilcox 	return !!(root->xa_flags & ROOT_IS_IDR);
1687b60e9adSMatthew Wilcox }
1697b60e9adSMatthew Wilcox 
170643b52b9SNick Piggin /*
171643b52b9SNick Piggin  * Returns 1 if any slot in the node has this tag set.
172643b52b9SNick Piggin  * Otherwise returns 0.
173643b52b9SNick Piggin  */
17435534c86SMatthew Wilcox static inline int any_tag_set(const struct radix_tree_node *node,
17535534c86SMatthew Wilcox 							unsigned int tag)
176643b52b9SNick Piggin {
1772fcd9005SMatthew Wilcox 	unsigned idx;
178643b52b9SNick Piggin 	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
179643b52b9SNick Piggin 		if (node->tags[tag][idx])
180643b52b9SNick Piggin 			return 1;
181643b52b9SNick Piggin 	}
182643b52b9SNick Piggin 	return 0;
183643b52b9SNick Piggin }
18478c1d784SKonstantin Khlebnikov 
1850a835c4fSMatthew Wilcox static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
1860a835c4fSMatthew Wilcox {
1870a835c4fSMatthew Wilcox 	bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
1880a835c4fSMatthew Wilcox }
1890a835c4fSMatthew Wilcox 
19078c1d784SKonstantin Khlebnikov /**
19178c1d784SKonstantin Khlebnikov  * radix_tree_find_next_bit - find the next set bit in a memory region
19278c1d784SKonstantin Khlebnikov  *
19378c1d784SKonstantin Khlebnikov  * @addr: The address to base the search on
19478c1d784SKonstantin Khlebnikov  * @size: The bitmap size in bits
19578c1d784SKonstantin Khlebnikov  * @offset: The bitnumber to start searching at
19678c1d784SKonstantin Khlebnikov  *
19778c1d784SKonstantin Khlebnikov  * Unrollable variant of find_next_bit() for constant size arrays.
19878c1d784SKonstantin Khlebnikov  * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
19978c1d784SKonstantin Khlebnikov  * Returns next bit offset, or size if nothing found.
20078c1d784SKonstantin Khlebnikov  */
20178c1d784SKonstantin Khlebnikov static __always_inline unsigned long
202bc412fcaSMatthew Wilcox radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
203bc412fcaSMatthew Wilcox 			 unsigned long offset)
20478c1d784SKonstantin Khlebnikov {
205bc412fcaSMatthew Wilcox 	const unsigned long *addr = node->tags[tag];
20678c1d784SKonstantin Khlebnikov 
207bc412fcaSMatthew Wilcox 	if (offset < RADIX_TREE_MAP_SIZE) {
20878c1d784SKonstantin Khlebnikov 		unsigned long tmp;
20978c1d784SKonstantin Khlebnikov 
21078c1d784SKonstantin Khlebnikov 		addr += offset / BITS_PER_LONG;
21178c1d784SKonstantin Khlebnikov 		tmp = *addr >> (offset % BITS_PER_LONG);
21278c1d784SKonstantin Khlebnikov 		if (tmp)
21378c1d784SKonstantin Khlebnikov 			return __ffs(tmp) + offset;
21478c1d784SKonstantin Khlebnikov 		offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
215bc412fcaSMatthew Wilcox 		while (offset < RADIX_TREE_MAP_SIZE) {
21678c1d784SKonstantin Khlebnikov 			tmp = *++addr;
21778c1d784SKonstantin Khlebnikov 			if (tmp)
21878c1d784SKonstantin Khlebnikov 				return __ffs(tmp) + offset;
21978c1d784SKonstantin Khlebnikov 			offset += BITS_PER_LONG;
22078c1d784SKonstantin Khlebnikov 		}
22178c1d784SKonstantin Khlebnikov 	}
222bc412fcaSMatthew Wilcox 	return RADIX_TREE_MAP_SIZE;
22378c1d784SKonstantin Khlebnikov }
22478c1d784SKonstantin Khlebnikov 
225268f42deSMatthew Wilcox static unsigned int iter_offset(const struct radix_tree_iter *iter)
226268f42deSMatthew Wilcox {
2273a08cd52SMatthew Wilcox 	return iter->index & RADIX_TREE_MAP_MASK;
228268f42deSMatthew Wilcox }
229268f42deSMatthew Wilcox 
230218ed750SMatthew Wilcox /*
231218ed750SMatthew Wilcox  * The maximum index which can be stored in a radix tree
232218ed750SMatthew Wilcox  */
233218ed750SMatthew Wilcox static inline unsigned long shift_maxindex(unsigned int shift)
234218ed750SMatthew Wilcox {
235218ed750SMatthew Wilcox 	return (RADIX_TREE_MAP_SIZE << shift) - 1;
236218ed750SMatthew Wilcox }
237218ed750SMatthew Wilcox 
23835534c86SMatthew Wilcox static inline unsigned long node_maxindex(const struct radix_tree_node *node)
239218ed750SMatthew Wilcox {
240218ed750SMatthew Wilcox 	return shift_maxindex(node->shift);
241218ed750SMatthew Wilcox }
242218ed750SMatthew Wilcox 
2430a835c4fSMatthew Wilcox static unsigned long next_index(unsigned long index,
2440a835c4fSMatthew Wilcox 				const struct radix_tree_node *node,
2450a835c4fSMatthew Wilcox 				unsigned long offset)
2460a835c4fSMatthew Wilcox {
2470a835c4fSMatthew Wilcox 	return (index & ~node_maxindex(node)) + (offset << node->shift);
2480a835c4fSMatthew Wilcox }
2490a835c4fSMatthew Wilcox 
2501da177e4SLinus Torvalds /*
2511da177e4SLinus Torvalds  * This assumes that the caller has performed appropriate preallocation, and
2521da177e4SLinus Torvalds  * that the caller has pinned this thread of control to the current CPU.
2531da177e4SLinus Torvalds  */
2541da177e4SLinus Torvalds static struct radix_tree_node *
2550a835c4fSMatthew Wilcox radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
256d58275bcSMatthew Wilcox 			struct radix_tree_root *root,
257e8de4340SMatthew Wilcox 			unsigned int shift, unsigned int offset,
25801959dfeSMatthew Wilcox 			unsigned int count, unsigned int nr_values)
2591da177e4SLinus Torvalds {
260e2848a0eSNick Piggin 	struct radix_tree_node *ret = NULL;
2611da177e4SLinus Torvalds 
2625e4c0d97SJan Kara 	/*
2632fcd9005SMatthew Wilcox 	 * Preload code isn't irq safe and it doesn't make sense to use
2642fcd9005SMatthew Wilcox 	 * preloading during an interrupt anyway as all the allocations have
2652fcd9005SMatthew Wilcox 	 * to be atomic. So just do normal allocation when in interrupt.
2665e4c0d97SJan Kara 	 */
267d0164adcSMel Gorman 	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
2681da177e4SLinus Torvalds 		struct radix_tree_preload *rtp;
2691da177e4SLinus Torvalds 
270e2848a0eSNick Piggin 		/*
27158e698afSVladimir Davydov 		 * Even if the caller has preloaded, try to allocate from the
27205eb6e72SVladimir Davydov 		 * cache first for the new node to get accounted to the memory
27305eb6e72SVladimir Davydov 		 * cgroup.
27458e698afSVladimir Davydov 		 */
27558e698afSVladimir Davydov 		ret = kmem_cache_alloc(radix_tree_node_cachep,
27605eb6e72SVladimir Davydov 				       gfp_mask | __GFP_NOWARN);
27758e698afSVladimir Davydov 		if (ret)
27858e698afSVladimir Davydov 			goto out;
27958e698afSVladimir Davydov 
28058e698afSVladimir Davydov 		/*
281e2848a0eSNick Piggin 		 * Provided the caller has preloaded here, we will always
282e2848a0eSNick Piggin 		 * succeed in getting a node here (and never reach
283e2848a0eSNick Piggin 		 * kmem_cache_alloc)
284e2848a0eSNick Piggin 		 */
2857c8e0181SChristoph Lameter 		rtp = this_cpu_ptr(&radix_tree_preloads);
2861da177e4SLinus Torvalds 		if (rtp->nr) {
2879d2a8da0SKirill A. Shutemov 			ret = rtp->nodes;
2881293d5c5SMatthew Wilcox 			rtp->nodes = ret->parent;
2891da177e4SLinus Torvalds 			rtp->nr--;
2901da177e4SLinus Torvalds 		}
291ce80b067SCatalin Marinas 		/*
292ce80b067SCatalin Marinas 		 * Update the allocation stack trace as this is more useful
293ce80b067SCatalin Marinas 		 * for debugging.
294ce80b067SCatalin Marinas 		 */
295ce80b067SCatalin Marinas 		kmemleak_update_trace(ret);
29658e698afSVladimir Davydov 		goto out;
2971da177e4SLinus Torvalds 	}
29805eb6e72SVladimir Davydov 	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
29958e698afSVladimir Davydov out:
300b194d16cSMatthew Wilcox 	BUG_ON(radix_tree_is_internal_node(ret));
301e8de4340SMatthew Wilcox 	if (ret) {
302e8de4340SMatthew Wilcox 		ret->shift = shift;
303e8de4340SMatthew Wilcox 		ret->offset = offset;
304e8de4340SMatthew Wilcox 		ret->count = count;
30501959dfeSMatthew Wilcox 		ret->nr_values = nr_values;
306d58275bcSMatthew Wilcox 		ret->parent = parent;
30701959dfeSMatthew Wilcox 		ret->array = root;
308e8de4340SMatthew Wilcox 	}
3091da177e4SLinus Torvalds 	return ret;
3101da177e4SLinus Torvalds }
3111da177e4SLinus Torvalds 
31258d6ea30SMatthew Wilcox void radix_tree_node_rcu_free(struct rcu_head *head)
3137cf9c2c7SNick Piggin {
3147cf9c2c7SNick Piggin 	struct radix_tree_node *node =
3157cf9c2c7SNick Piggin 			container_of(head, struct radix_tree_node, rcu_head);
316643b52b9SNick Piggin 
317643b52b9SNick Piggin 	/*
318175542f5SMatthew Wilcox 	 * Must only free zeroed nodes into the slab.  We can be left with
319175542f5SMatthew Wilcox 	 * non-NULL entries by radix_tree_free_nodes, so clear the entries
320175542f5SMatthew Wilcox 	 * and tags here.
321643b52b9SNick Piggin 	 */
322175542f5SMatthew Wilcox 	memset(node->slots, 0, sizeof(node->slots));
323175542f5SMatthew Wilcox 	memset(node->tags, 0, sizeof(node->tags));
32491d9c05aSMatthew Wilcox 	INIT_LIST_HEAD(&node->private_list);
325643b52b9SNick Piggin 
3267cf9c2c7SNick Piggin 	kmem_cache_free(radix_tree_node_cachep, node);
3277cf9c2c7SNick Piggin }
3287cf9c2c7SNick Piggin 
3291da177e4SLinus Torvalds static inline void
3301da177e4SLinus Torvalds radix_tree_node_free(struct radix_tree_node *node)
3311da177e4SLinus Torvalds {
3327cf9c2c7SNick Piggin 	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
3351da177e4SLinus Torvalds /*
3361da177e4SLinus Torvalds  * Load up this CPU's radix_tree_node buffer with sufficient objects to
3371da177e4SLinus Torvalds  * ensure that the addition of a single element in the tree cannot fail.  On
3381da177e4SLinus Torvalds  * success, return zero, with preemption disabled.  On error, return -ENOMEM
3391da177e4SLinus Torvalds  * with preemption not disabled.
340b34df792SDavid Howells  *
341b34df792SDavid Howells  * To make use of this facility, the radix tree must be initialised without
342d0164adcSMel Gorman  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
3431da177e4SLinus Torvalds  */
344bc9ae224SEric Dumazet static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
3451da177e4SLinus Torvalds {
3461da177e4SLinus Torvalds 	struct radix_tree_preload *rtp;
3471da177e4SLinus Torvalds 	struct radix_tree_node *node;
3481da177e4SLinus Torvalds 	int ret = -ENOMEM;
3491da177e4SLinus Torvalds 
35005eb6e72SVladimir Davydov 	/*
35105eb6e72SVladimir Davydov 	 * Nodes preloaded by one cgroup can be be used by another cgroup, so
35205eb6e72SVladimir Davydov 	 * they should never be accounted to any particular memory cgroup.
35305eb6e72SVladimir Davydov 	 */
35405eb6e72SVladimir Davydov 	gfp_mask &= ~__GFP_ACCOUNT;
35505eb6e72SVladimir Davydov 
3561da177e4SLinus Torvalds 	preempt_disable();
3577c8e0181SChristoph Lameter 	rtp = this_cpu_ptr(&radix_tree_preloads);
358c78c66d1SKirill A. Shutemov 	while (rtp->nr < nr) {
3591da177e4SLinus Torvalds 		preempt_enable();
360488514d1SChristoph Lameter 		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
3611da177e4SLinus Torvalds 		if (node == NULL)
3621da177e4SLinus Torvalds 			goto out;
3631da177e4SLinus Torvalds 		preempt_disable();
3647c8e0181SChristoph Lameter 		rtp = this_cpu_ptr(&radix_tree_preloads);
365c78c66d1SKirill A. Shutemov 		if (rtp->nr < nr) {
3661293d5c5SMatthew Wilcox 			node->parent = rtp->nodes;
3679d2a8da0SKirill A. Shutemov 			rtp->nodes = node;
3689d2a8da0SKirill A. Shutemov 			rtp->nr++;
3699d2a8da0SKirill A. Shutemov 		} else {
3701da177e4SLinus Torvalds 			kmem_cache_free(radix_tree_node_cachep, node);
3711da177e4SLinus Torvalds 		}
3729d2a8da0SKirill A. Shutemov 	}
3731da177e4SLinus Torvalds 	ret = 0;
3741da177e4SLinus Torvalds out:
3751da177e4SLinus Torvalds 	return ret;
3761da177e4SLinus Torvalds }
3775e4c0d97SJan Kara 
3785e4c0d97SJan Kara /*
3795e4c0d97SJan Kara  * Load up this CPU's radix_tree_node buffer with sufficient objects to
3805e4c0d97SJan Kara  * ensure that the addition of a single element in the tree cannot fail.  On
3815e4c0d97SJan Kara  * success, return zero, with preemption disabled.  On error, return -ENOMEM
3825e4c0d97SJan Kara  * with preemption not disabled.
3835e4c0d97SJan Kara  *
3845e4c0d97SJan Kara  * To make use of this facility, the radix tree must be initialised without
385d0164adcSMel Gorman  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
3865e4c0d97SJan Kara  */
3875e4c0d97SJan Kara int radix_tree_preload(gfp_t gfp_mask)
3885e4c0d97SJan Kara {
3895e4c0d97SJan Kara 	/* Warn on non-sensical use... */
390d0164adcSMel Gorman 	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
391c78c66d1SKirill A. Shutemov 	return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
3925e4c0d97SJan Kara }
393d7f0923dSDavid Chinner EXPORT_SYMBOL(radix_tree_preload);
3941da177e4SLinus Torvalds 
3956e954b9eSNick Piggin /*
3965e4c0d97SJan Kara  * The same as above function, except we don't guarantee preloading happens.
3975e4c0d97SJan Kara  * We do it, if we decide it helps. On success, return zero with preemption
3985e4c0d97SJan Kara  * disabled. On error, return -ENOMEM with preemption not disabled.
3995e4c0d97SJan Kara  */
4005e4c0d97SJan Kara int radix_tree_maybe_preload(gfp_t gfp_mask)
4015e4c0d97SJan Kara {
402d0164adcSMel Gorman 	if (gfpflags_allow_blocking(gfp_mask))
403c78c66d1SKirill A. Shutemov 		return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
4045e4c0d97SJan Kara 	/* Preloading doesn't help anything with this gfp mask, skip it */
4055e4c0d97SJan Kara 	preempt_disable();
4065e4c0d97SJan Kara 	return 0;
4075e4c0d97SJan Kara }
4085e4c0d97SJan Kara EXPORT_SYMBOL(radix_tree_maybe_preload);
4095e4c0d97SJan Kara 
41035534c86SMatthew Wilcox static unsigned radix_tree_load_root(const struct radix_tree_root *root,
4111456a439SMatthew Wilcox 		struct radix_tree_node **nodep, unsigned long *maxindex)
4121456a439SMatthew Wilcox {
413f8d5d0ccSMatthew Wilcox 	struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
4141456a439SMatthew Wilcox 
4151456a439SMatthew Wilcox 	*nodep = node;
4161456a439SMatthew Wilcox 
417b194d16cSMatthew Wilcox 	if (likely(radix_tree_is_internal_node(node))) {
4184dd6c098SMatthew Wilcox 		node = entry_to_node(node);
4191456a439SMatthew Wilcox 		*maxindex = node_maxindex(node);
420c12e51b0SMatthew Wilcox 		return node->shift + RADIX_TREE_MAP_SHIFT;
4211456a439SMatthew Wilcox 	}
4221456a439SMatthew Wilcox 
4231456a439SMatthew Wilcox 	*maxindex = 0;
4241456a439SMatthew Wilcox 	return 0;
4251456a439SMatthew Wilcox }
4261456a439SMatthew Wilcox 
4271da177e4SLinus Torvalds /*
4281da177e4SLinus Torvalds  *	Extend a radix tree so it can store key @index.
4291da177e4SLinus Torvalds  */
4300a835c4fSMatthew Wilcox static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
431d0891265SMatthew Wilcox 				unsigned long index, unsigned int shift)
4321da177e4SLinus Torvalds {
433d7b62727SMatthew Wilcox 	void *entry;
434d0891265SMatthew Wilcox 	unsigned int maxshift;
4351da177e4SLinus Torvalds 	int tag;
4361da177e4SLinus Torvalds 
437d0891265SMatthew Wilcox 	/* Figure out what the shift should be.  */
438d0891265SMatthew Wilcox 	maxshift = shift;
439d0891265SMatthew Wilcox 	while (index > shift_maxindex(maxshift))
440d0891265SMatthew Wilcox 		maxshift += RADIX_TREE_MAP_SHIFT;
4411da177e4SLinus Torvalds 
442f8d5d0ccSMatthew Wilcox 	entry = rcu_dereference_raw(root->xa_head);
443d7b62727SMatthew Wilcox 	if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
4441da177e4SLinus Torvalds 		goto out;
4451da177e4SLinus Torvalds 
4461da177e4SLinus Torvalds 	do {
4470a835c4fSMatthew Wilcox 		struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
448d58275bcSMatthew Wilcox 							root, shift, 0, 1, 0);
4492fcd9005SMatthew Wilcox 		if (!node)
4501da177e4SLinus Torvalds 			return -ENOMEM;
4511da177e4SLinus Torvalds 
4520a835c4fSMatthew Wilcox 		if (is_idr(root)) {
4530a835c4fSMatthew Wilcox 			all_tag_set(node, IDR_FREE);
4540a835c4fSMatthew Wilcox 			if (!root_tag_get(root, IDR_FREE)) {
4550a835c4fSMatthew Wilcox 				tag_clear(node, IDR_FREE, 0);
4560a835c4fSMatthew Wilcox 				root_tag_set(root, IDR_FREE);
4570a835c4fSMatthew Wilcox 			}
4580a835c4fSMatthew Wilcox 		} else {
4590a835c4fSMatthew Wilcox 			/* Propagate the aggregated tag info to the new child */
460daff89f3SJonathan Corbet 			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
461612d6c19SNick Piggin 				if (root_tag_get(root, tag))
4621da177e4SLinus Torvalds 					tag_set(node, tag, 0);
4631da177e4SLinus Torvalds 			}
4640a835c4fSMatthew Wilcox 		}
4651da177e4SLinus Torvalds 
466d0891265SMatthew Wilcox 		BUG_ON(shift > BITS_PER_LONG);
467d7b62727SMatthew Wilcox 		if (radix_tree_is_internal_node(entry)) {
468d7b62727SMatthew Wilcox 			entry_to_node(entry)->parent = node;
4693159f943SMatthew Wilcox 		} else if (xa_is_value(entry)) {
47001959dfeSMatthew Wilcox 			/* Moving a value entry root->xa_head to a node */
47101959dfeSMatthew Wilcox 			node->nr_values = 1;
472f7942430SJohannes Weiner 		}
473d7b62727SMatthew Wilcox 		/*
474d7b62727SMatthew Wilcox 		 * entry was already in the radix tree, so we do not need
475d7b62727SMatthew Wilcox 		 * rcu_assign_pointer here
476d7b62727SMatthew Wilcox 		 */
477d7b62727SMatthew Wilcox 		node->slots[0] = (void __rcu *)entry;
478d7b62727SMatthew Wilcox 		entry = node_to_entry(node);
479f8d5d0ccSMatthew Wilcox 		rcu_assign_pointer(root->xa_head, entry);
480d0891265SMatthew Wilcox 		shift += RADIX_TREE_MAP_SHIFT;
481d0891265SMatthew Wilcox 	} while (shift <= maxshift);
4821da177e4SLinus Torvalds out:
483d0891265SMatthew Wilcox 	return maxshift + RADIX_TREE_MAP_SHIFT;
4841da177e4SLinus Torvalds }
4851da177e4SLinus Torvalds 
4861da177e4SLinus Torvalds /**
487f4b109c6SJohannes Weiner  *	radix_tree_shrink    -    shrink radix tree to minimum height
488f4b109c6SJohannes Weiner  *	@root		radix tree root
489f4b109c6SJohannes Weiner  */
4901cf56f9dSMatthew Wilcox static inline bool radix_tree_shrink(struct radix_tree_root *root)
491f4b109c6SJohannes Weiner {
4920ac398efSMatthew Wilcox 	bool shrunk = false;
4930ac398efSMatthew Wilcox 
494f4b109c6SJohannes Weiner 	for (;;) {
495f8d5d0ccSMatthew Wilcox 		struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
496f4b109c6SJohannes Weiner 		struct radix_tree_node *child;
497f4b109c6SJohannes Weiner 
498f4b109c6SJohannes Weiner 		if (!radix_tree_is_internal_node(node))
499f4b109c6SJohannes Weiner 			break;
500f4b109c6SJohannes Weiner 		node = entry_to_node(node);
501f4b109c6SJohannes Weiner 
502f4b109c6SJohannes Weiner 		/*
503f4b109c6SJohannes Weiner 		 * The candidate node has more than one child, or its child
5043a08cd52SMatthew Wilcox 		 * is not at the leftmost slot, we cannot shrink.
505f4b109c6SJohannes Weiner 		 */
506f4b109c6SJohannes Weiner 		if (node->count != 1)
507f4b109c6SJohannes Weiner 			break;
50812320d0fSMatthew Wilcox 		child = rcu_dereference_raw(node->slots[0]);
509f4b109c6SJohannes Weiner 		if (!child)
510f4b109c6SJohannes Weiner 			break;
511f4b109c6SJohannes Weiner 
51266ee620fSMatthew Wilcox 		/*
51366ee620fSMatthew Wilcox 		 * For an IDR, we must not shrink entry 0 into the root in
51466ee620fSMatthew Wilcox 		 * case somebody calls idr_replace() with a pointer that
51566ee620fSMatthew Wilcox 		 * appears to be an internal entry
51666ee620fSMatthew Wilcox 		 */
51766ee620fSMatthew Wilcox 		if (!node->shift && is_idr(root))
51866ee620fSMatthew Wilcox 			break;
51966ee620fSMatthew Wilcox 
520f4b109c6SJohannes Weiner 		if (radix_tree_is_internal_node(child))
521f4b109c6SJohannes Weiner 			entry_to_node(child)->parent = NULL;
522f4b109c6SJohannes Weiner 
523f4b109c6SJohannes Weiner 		/*
524f4b109c6SJohannes Weiner 		 * We don't need rcu_assign_pointer(), since we are simply
525f4b109c6SJohannes Weiner 		 * moving the node from one part of the tree to another: if it
526f4b109c6SJohannes Weiner 		 * was safe to dereference the old pointer to it
527f4b109c6SJohannes Weiner 		 * (node->slots[0]), it will be safe to dereference the new
528f8d5d0ccSMatthew Wilcox 		 * one (root->xa_head) as far as dependent read barriers go.
529f4b109c6SJohannes Weiner 		 */
530f8d5d0ccSMatthew Wilcox 		root->xa_head = (void __rcu *)child;
5310a835c4fSMatthew Wilcox 		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
5320a835c4fSMatthew Wilcox 			root_tag_clear(root, IDR_FREE);
533f4b109c6SJohannes Weiner 
534f4b109c6SJohannes Weiner 		/*
535f4b109c6SJohannes Weiner 		 * We have a dilemma here. The node's slot[0] must not be
536f4b109c6SJohannes Weiner 		 * NULLed in case there are concurrent lookups expecting to
537f4b109c6SJohannes Weiner 		 * find the item. However if this was a bottom-level node,
538f4b109c6SJohannes Weiner 		 * then it may be subject to the slot pointer being visible
539f4b109c6SJohannes Weiner 		 * to callers dereferencing it. If item corresponding to
540f4b109c6SJohannes Weiner 		 * slot[0] is subsequently deleted, these callers would expect
541f4b109c6SJohannes Weiner 		 * their slot to become empty sooner or later.
542f4b109c6SJohannes Weiner 		 *
543f4b109c6SJohannes Weiner 		 * For example, lockless pagecache will look up a slot, deref
544f4b109c6SJohannes Weiner 		 * the page pointer, and if the page has 0 refcount it means it
545f4b109c6SJohannes Weiner 		 * was concurrently deleted from pagecache so try the deref
546f4b109c6SJohannes Weiner 		 * again. Fortunately there is already a requirement for logic
547f4b109c6SJohannes Weiner 		 * to retry the entire slot lookup -- the indirect pointer
548f4b109c6SJohannes Weiner 		 * problem (replacing direct root node with an indirect pointer
549f4b109c6SJohannes Weiner 		 * also results in a stale slot). So tag the slot as indirect
550f4b109c6SJohannes Weiner 		 * to force callers to retry.
551f4b109c6SJohannes Weiner 		 */
5524d693d08SJohannes Weiner 		node->count = 0;
5534d693d08SJohannes Weiner 		if (!radix_tree_is_internal_node(child)) {
554d7b62727SMatthew Wilcox 			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
5554d693d08SJohannes Weiner 		}
556f4b109c6SJohannes Weiner 
557ea07b862SJohannes Weiner 		WARN_ON_ONCE(!list_empty(&node->private_list));
558f4b109c6SJohannes Weiner 		radix_tree_node_free(node);
5590ac398efSMatthew Wilcox 		shrunk = true;
560f4b109c6SJohannes Weiner 	}
561f4b109c6SJohannes Weiner 
5620ac398efSMatthew Wilcox 	return shrunk;
5630ac398efSMatthew Wilcox }
5640ac398efSMatthew Wilcox 
5650ac398efSMatthew Wilcox static bool delete_node(struct radix_tree_root *root,
5661cf56f9dSMatthew Wilcox 			struct radix_tree_node *node)
567f4b109c6SJohannes Weiner {
5680ac398efSMatthew Wilcox 	bool deleted = false;
5690ac398efSMatthew Wilcox 
570f4b109c6SJohannes Weiner 	do {
571f4b109c6SJohannes Weiner 		struct radix_tree_node *parent;
572f4b109c6SJohannes Weiner 
573f4b109c6SJohannes Weiner 		if (node->count) {
57412320d0fSMatthew Wilcox 			if (node_to_entry(node) ==
575f8d5d0ccSMatthew Wilcox 					rcu_dereference_raw(root->xa_head))
5761cf56f9dSMatthew Wilcox 				deleted |= radix_tree_shrink(root);
5770ac398efSMatthew Wilcox 			return deleted;
578f4b109c6SJohannes Weiner 		}
579f4b109c6SJohannes Weiner 
580f4b109c6SJohannes Weiner 		parent = node->parent;
581f4b109c6SJohannes Weiner 		if (parent) {
582f4b109c6SJohannes Weiner 			parent->slots[node->offset] = NULL;
583f4b109c6SJohannes Weiner 			parent->count--;
584f4b109c6SJohannes Weiner 		} else {
5850a835c4fSMatthew Wilcox 			/*
5860a835c4fSMatthew Wilcox 			 * Shouldn't the tags already have all been cleared
5870a835c4fSMatthew Wilcox 			 * by the caller?
5880a835c4fSMatthew Wilcox 			 */
5890a835c4fSMatthew Wilcox 			if (!is_idr(root))
590f4b109c6SJohannes Weiner 				root_tag_clear_all(root);
591f8d5d0ccSMatthew Wilcox 			root->xa_head = NULL;
592f4b109c6SJohannes Weiner 		}
593f4b109c6SJohannes Weiner 
594ea07b862SJohannes Weiner 		WARN_ON_ONCE(!list_empty(&node->private_list));
595f4b109c6SJohannes Weiner 		radix_tree_node_free(node);
5960ac398efSMatthew Wilcox 		deleted = true;
597f4b109c6SJohannes Weiner 
598f4b109c6SJohannes Weiner 		node = parent;
599f4b109c6SJohannes Weiner 	} while (node);
6000ac398efSMatthew Wilcox 
6010ac398efSMatthew Wilcox 	return deleted;
602f4b109c6SJohannes Weiner }
603f4b109c6SJohannes Weiner 
604f4b109c6SJohannes Weiner /**
605139e5616SJohannes Weiner  *	__radix_tree_create	-	create a slot in a radix tree
6061da177e4SLinus Torvalds  *	@root:		radix tree root
6071da177e4SLinus Torvalds  *	@index:		index key
608139e5616SJohannes Weiner  *	@nodep:		returns node
609139e5616SJohannes Weiner  *	@slotp:		returns slot
6101da177e4SLinus Torvalds  *
611139e5616SJohannes Weiner  *	Create, if necessary, and return the node and slot for an item
612139e5616SJohannes Weiner  *	at position @index in the radix tree @root.
613139e5616SJohannes Weiner  *
614139e5616SJohannes Weiner  *	Until there is more than one item in the tree, no nodes are
615f8d5d0ccSMatthew Wilcox  *	allocated and @root->xa_head is used as a direct slot instead of
616139e5616SJohannes Weiner  *	pointing to a node, in which case *@nodep will be NULL.
617139e5616SJohannes Weiner  *
618139e5616SJohannes Weiner  *	Returns -ENOMEM, or 0 for success.
6191da177e4SLinus Torvalds  */
62074d60958SMatthew Wilcox static int __radix_tree_create(struct radix_tree_root *root,
6213a08cd52SMatthew Wilcox 		unsigned long index, struct radix_tree_node **nodep,
6223a08cd52SMatthew Wilcox 		void __rcu ***slotp)
6231da177e4SLinus Torvalds {
62489148aa4SMatthew Wilcox 	struct radix_tree_node *node = NULL, *child;
625f8d5d0ccSMatthew Wilcox 	void __rcu **slot = (void __rcu **)&root->xa_head;
62649ea6ebcSMatthew Wilcox 	unsigned long maxindex;
62789148aa4SMatthew Wilcox 	unsigned int shift, offset = 0;
6283a08cd52SMatthew Wilcox 	unsigned long max = index;
6290a835c4fSMatthew Wilcox 	gfp_t gfp = root_gfp_mask(root);
63049ea6ebcSMatthew Wilcox 
63189148aa4SMatthew Wilcox 	shift = radix_tree_load_root(root, &child, &maxindex);
6321da177e4SLinus Torvalds 
6331da177e4SLinus Torvalds 	/* Make sure the tree is high enough.  */
63449ea6ebcSMatthew Wilcox 	if (max > maxindex) {
6350a835c4fSMatthew Wilcox 		int error = radix_tree_extend(root, gfp, max, shift);
63649ea6ebcSMatthew Wilcox 		if (error < 0)
6371da177e4SLinus Torvalds 			return error;
63849ea6ebcSMatthew Wilcox 		shift = error;
639f8d5d0ccSMatthew Wilcox 		child = rcu_dereference_raw(root->xa_head);
6401da177e4SLinus Torvalds 	}
6411da177e4SLinus Torvalds 
6423a08cd52SMatthew Wilcox 	while (shift > 0) {
643c12e51b0SMatthew Wilcox 		shift -= RADIX_TREE_MAP_SHIFT;
64489148aa4SMatthew Wilcox 		if (child == NULL) {
6451da177e4SLinus Torvalds 			/* Have to add a child node.  */
646d58275bcSMatthew Wilcox 			child = radix_tree_node_alloc(gfp, node, root, shift,
647e8de4340SMatthew Wilcox 							offset, 0, 0);
64889148aa4SMatthew Wilcox 			if (!child)
6491da177e4SLinus Torvalds 				return -ENOMEM;
65089148aa4SMatthew Wilcox 			rcu_assign_pointer(*slot, node_to_entry(child));
65189148aa4SMatthew Wilcox 			if (node)
6521da177e4SLinus Torvalds 				node->count++;
65389148aa4SMatthew Wilcox 		} else if (!radix_tree_is_internal_node(child))
654e6145236SMatthew Wilcox 			break;
6551da177e4SLinus Torvalds 
6561da177e4SLinus Torvalds 		/* Go a level down */
65789148aa4SMatthew Wilcox 		node = entry_to_node(child);
6589e85d811SMatthew Wilcox 		offset = radix_tree_descend(node, &child, index);
65989148aa4SMatthew Wilcox 		slot = &node->slots[offset];
660e6145236SMatthew Wilcox 	}
661e6145236SMatthew Wilcox 
662139e5616SJohannes Weiner 	if (nodep)
663139e5616SJohannes Weiner 		*nodep = node;
664139e5616SJohannes Weiner 	if (slotp)
66589148aa4SMatthew Wilcox 		*slotp = slot;
666139e5616SJohannes Weiner 	return 0;
667139e5616SJohannes Weiner }
668139e5616SJohannes Weiner 
669175542f5SMatthew Wilcox /*
670175542f5SMatthew Wilcox  * Free any nodes below this node.  The tree is presumed to not need
671175542f5SMatthew Wilcox  * shrinking, and any user data in the tree is presumed to not need a
672175542f5SMatthew Wilcox  * destructor called on it.  If we need to add a destructor, we can
673175542f5SMatthew Wilcox  * add that functionality later.  Note that we may not clear tags or
674175542f5SMatthew Wilcox  * slots from the tree as an RCU walker may still have a pointer into
675175542f5SMatthew Wilcox  * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
676175542f5SMatthew Wilcox  * but we'll still have to clear those in rcu_free.
677175542f5SMatthew Wilcox  */
678175542f5SMatthew Wilcox static void radix_tree_free_nodes(struct radix_tree_node *node)
679175542f5SMatthew Wilcox {
680175542f5SMatthew Wilcox 	unsigned offset = 0;
681175542f5SMatthew Wilcox 	struct radix_tree_node *child = entry_to_node(node);
682175542f5SMatthew Wilcox 
683175542f5SMatthew Wilcox 	for (;;) {
68412320d0fSMatthew Wilcox 		void *entry = rcu_dereference_raw(child->slots[offset]);
68502c02bf1SMatthew Wilcox 		if (xa_is_node(entry) && child->shift) {
686175542f5SMatthew Wilcox 			child = entry_to_node(entry);
687175542f5SMatthew Wilcox 			offset = 0;
688175542f5SMatthew Wilcox 			continue;
689175542f5SMatthew Wilcox 		}
690175542f5SMatthew Wilcox 		offset++;
691175542f5SMatthew Wilcox 		while (offset == RADIX_TREE_MAP_SIZE) {
692175542f5SMatthew Wilcox 			struct radix_tree_node *old = child;
693175542f5SMatthew Wilcox 			offset = child->offset + 1;
694175542f5SMatthew Wilcox 			child = child->parent;
695dd040b6fSMatthew Wilcox 			WARN_ON_ONCE(!list_empty(&old->private_list));
696175542f5SMatthew Wilcox 			radix_tree_node_free(old);
697175542f5SMatthew Wilcox 			if (old == entry_to_node(node))
698175542f5SMatthew Wilcox 				return;
699175542f5SMatthew Wilcox 		}
700175542f5SMatthew Wilcox 	}
701175542f5SMatthew Wilcox }
702175542f5SMatthew Wilcox 
703d7b62727SMatthew Wilcox static inline int insert_entries(struct radix_tree_node *node,
7043a08cd52SMatthew Wilcox 		void __rcu **slot, void *item, bool replace)
705175542f5SMatthew Wilcox {
706175542f5SMatthew Wilcox 	if (*slot)
707175542f5SMatthew Wilcox 		return -EEXIST;
708175542f5SMatthew Wilcox 	rcu_assign_pointer(*slot, item);
709175542f5SMatthew Wilcox 	if (node) {
710175542f5SMatthew Wilcox 		node->count++;
7113159f943SMatthew Wilcox 		if (xa_is_value(item))
71201959dfeSMatthew Wilcox 			node->nr_values++;
713175542f5SMatthew Wilcox 	}
714175542f5SMatthew Wilcox 	return 1;
715175542f5SMatthew Wilcox }
716175542f5SMatthew Wilcox 
717139e5616SJohannes Weiner /**
718e6145236SMatthew Wilcox  *	__radix_tree_insert    -    insert into a radix tree
719139e5616SJohannes Weiner  *	@root:		radix tree root
720139e5616SJohannes Weiner  *	@index:		index key
721139e5616SJohannes Weiner  *	@item:		item to insert
722139e5616SJohannes Weiner  *
723139e5616SJohannes Weiner  *	Insert an item into the radix tree at position @index.
724139e5616SJohannes Weiner  */
7253a08cd52SMatthew Wilcox int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
7263a08cd52SMatthew Wilcox 			void *item)
727139e5616SJohannes Weiner {
728139e5616SJohannes Weiner 	struct radix_tree_node *node;
729d7b62727SMatthew Wilcox 	void __rcu **slot;
730139e5616SJohannes Weiner 	int error;
731139e5616SJohannes Weiner 
732b194d16cSMatthew Wilcox 	BUG_ON(radix_tree_is_internal_node(item));
733139e5616SJohannes Weiner 
7343a08cd52SMatthew Wilcox 	error = __radix_tree_create(root, index, &node, &slot);
735139e5616SJohannes Weiner 	if (error)
736139e5616SJohannes Weiner 		return error;
737175542f5SMatthew Wilcox 
7383a08cd52SMatthew Wilcox 	error = insert_entries(node, slot, item, false);
739175542f5SMatthew Wilcox 	if (error < 0)
740175542f5SMatthew Wilcox 		return error;
741201b6264SChristoph Lameter 
742612d6c19SNick Piggin 	if (node) {
7437b60e9adSMatthew Wilcox 		unsigned offset = get_slot_offset(node, slot);
7447b60e9adSMatthew Wilcox 		BUG_ON(tag_get(node, 0, offset));
7457b60e9adSMatthew Wilcox 		BUG_ON(tag_get(node, 1, offset));
7467b60e9adSMatthew Wilcox 		BUG_ON(tag_get(node, 2, offset));
747612d6c19SNick Piggin 	} else {
7487b60e9adSMatthew Wilcox 		BUG_ON(root_tags_get(root));
749612d6c19SNick Piggin 	}
7501da177e4SLinus Torvalds 
7511da177e4SLinus Torvalds 	return 0;
7521da177e4SLinus Torvalds }
7533a08cd52SMatthew Wilcox EXPORT_SYMBOL(radix_tree_insert);
7541da177e4SLinus Torvalds 
755139e5616SJohannes Weiner /**
756139e5616SJohannes Weiner  *	__radix_tree_lookup	-	lookup an item in a radix tree
757139e5616SJohannes Weiner  *	@root:		radix tree root
758139e5616SJohannes Weiner  *	@index:		index key
759139e5616SJohannes Weiner  *	@nodep:		returns node
760139e5616SJohannes Weiner  *	@slotp:		returns slot
761139e5616SJohannes Weiner  *
762139e5616SJohannes Weiner  *	Lookup and return the item at position @index in the radix
763139e5616SJohannes Weiner  *	tree @root.
764139e5616SJohannes Weiner  *
765139e5616SJohannes Weiner  *	Until there is more than one item in the tree, no nodes are
766f8d5d0ccSMatthew Wilcox  *	allocated and @root->xa_head is used as a direct slot instead of
767139e5616SJohannes Weiner  *	pointing to a node, in which case *@nodep will be NULL.
768a4331366SHans Reiser  */
76935534c86SMatthew Wilcox void *__radix_tree_lookup(const struct radix_tree_root *root,
77035534c86SMatthew Wilcox 			  unsigned long index, struct radix_tree_node **nodep,
771d7b62727SMatthew Wilcox 			  void __rcu ***slotp)
772a4331366SHans Reiser {
773139e5616SJohannes Weiner 	struct radix_tree_node *node, *parent;
77485829954SMatthew Wilcox 	unsigned long maxindex;
775d7b62727SMatthew Wilcox 	void __rcu **slot;
7767cf9c2c7SNick Piggin 
77785829954SMatthew Wilcox  restart:
77885829954SMatthew Wilcox 	parent = NULL;
779f8d5d0ccSMatthew Wilcox 	slot = (void __rcu **)&root->xa_head;
7809e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
78185829954SMatthew Wilcox 	if (index > maxindex)
7827cf9c2c7SNick Piggin 		return NULL;
7837cf9c2c7SNick Piggin 
784b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
78585829954SMatthew Wilcox 		unsigned offset;
786139e5616SJohannes Weiner 
7874dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
7889e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
78985829954SMatthew Wilcox 		slot = parent->slots + offset;
790*eff3860bSMatthew Wilcox 		if (node == RADIX_TREE_RETRY)
791*eff3860bSMatthew Wilcox 			goto restart;
79266ee620fSMatthew Wilcox 		if (parent->shift == 0)
79366ee620fSMatthew Wilcox 			break;
79485829954SMatthew Wilcox 	}
7957cf9c2c7SNick Piggin 
796139e5616SJohannes Weiner 	if (nodep)
797139e5616SJohannes Weiner 		*nodep = parent;
798139e5616SJohannes Weiner 	if (slotp)
799139e5616SJohannes Weiner 		*slotp = slot;
800139e5616SJohannes Weiner 	return node;
801b72b71c6SHuang Shijie }
802b72b71c6SHuang Shijie 
803b72b71c6SHuang Shijie /**
804b72b71c6SHuang Shijie  *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
805b72b71c6SHuang Shijie  *	@root:		radix tree root
806b72b71c6SHuang Shijie  *	@index:		index key
807b72b71c6SHuang Shijie  *
808b72b71c6SHuang Shijie  *	Returns:  the slot corresponding to the position @index in the
809b72b71c6SHuang Shijie  *	radix tree @root. This is useful for update-if-exists operations.
810b72b71c6SHuang Shijie  *
811b72b71c6SHuang Shijie  *	This function can be called under rcu_read_lock iff the slot is not
812b72b71c6SHuang Shijie  *	modified by radix_tree_replace_slot, otherwise it must be called
813b72b71c6SHuang Shijie  *	exclusive from other writers. Any dereference of the slot must be done
814b72b71c6SHuang Shijie  *	using radix_tree_deref_slot.
815b72b71c6SHuang Shijie  */
816d7b62727SMatthew Wilcox void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
81735534c86SMatthew Wilcox 				unsigned long index)
818b72b71c6SHuang Shijie {
819d7b62727SMatthew Wilcox 	void __rcu **slot;
820139e5616SJohannes Weiner 
821139e5616SJohannes Weiner 	if (!__radix_tree_lookup(root, index, NULL, &slot))
822139e5616SJohannes Weiner 		return NULL;
823139e5616SJohannes Weiner 	return slot;
824a4331366SHans Reiser }
825a4331366SHans Reiser EXPORT_SYMBOL(radix_tree_lookup_slot);
826a4331366SHans Reiser 
8271da177e4SLinus Torvalds /**
8281da177e4SLinus Torvalds  *	radix_tree_lookup    -    perform lookup operation on a radix tree
8291da177e4SLinus Torvalds  *	@root:		radix tree root
8301da177e4SLinus Torvalds  *	@index:		index key
8311da177e4SLinus Torvalds  *
8321da177e4SLinus Torvalds  *	Lookup the item at the position @index in the radix tree @root.
8337cf9c2c7SNick Piggin  *
8347cf9c2c7SNick Piggin  *	This function can be called under rcu_read_lock, however the caller
8357cf9c2c7SNick Piggin  *	must manage lifetimes of leaf nodes (eg. RCU may also be used to free
8367cf9c2c7SNick Piggin  *	them safely). No RCU barriers are required to access or modify the
8377cf9c2c7SNick Piggin  *	returned item, however.
8381da177e4SLinus Torvalds  */
83935534c86SMatthew Wilcox void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
8401da177e4SLinus Torvalds {
841139e5616SJohannes Weiner 	return __radix_tree_lookup(root, index, NULL, NULL);
8421da177e4SLinus Torvalds }
8431da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_lookup);
8441da177e4SLinus Torvalds 
845d7b62727SMatthew Wilcox static void replace_slot(void __rcu **slot, void *item,
84601959dfeSMatthew Wilcox 		struct radix_tree_node *node, int count, int values)
8476d75f366SJohannes Weiner {
84801959dfeSMatthew Wilcox 	if (node && (count || values)) {
849f4b109c6SJohannes Weiner 		node->count += count;
85001959dfeSMatthew Wilcox 		node->nr_values += values;
851a90eb3a2SMatthew Wilcox 	}
8526d75f366SJohannes Weiner 
8536d75f366SJohannes Weiner 	rcu_assign_pointer(*slot, item);
8546d75f366SJohannes Weiner }
8556d75f366SJohannes Weiner 
8560a835c4fSMatthew Wilcox static bool node_tag_get(const struct radix_tree_root *root,
8570a835c4fSMatthew Wilcox 				const struct radix_tree_node *node,
8580a835c4fSMatthew Wilcox 				unsigned int tag, unsigned int offset)
859a90eb3a2SMatthew Wilcox {
8600a835c4fSMatthew Wilcox 	if (node)
8610a835c4fSMatthew Wilcox 		return tag_get(node, tag, offset);
8620a835c4fSMatthew Wilcox 	return root_tag_get(root, tag);
863a90eb3a2SMatthew Wilcox }
8640a835c4fSMatthew Wilcox 
8650a835c4fSMatthew Wilcox /*
8660a835c4fSMatthew Wilcox  * IDR users want to be able to store NULL in the tree, so if the slot isn't
8670a835c4fSMatthew Wilcox  * free, don't adjust the count, even if it's transitioning between NULL and
8680a835c4fSMatthew Wilcox  * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
8690a835c4fSMatthew Wilcox  * have empty bits, but it only stores NULL in slots when they're being
8700a835c4fSMatthew Wilcox  * deleted.
8710a835c4fSMatthew Wilcox  */
8720a835c4fSMatthew Wilcox static int calculate_count(struct radix_tree_root *root,
873d7b62727SMatthew Wilcox 				struct radix_tree_node *node, void __rcu **slot,
8740a835c4fSMatthew Wilcox 				void *item, void *old)
8750a835c4fSMatthew Wilcox {
8760a835c4fSMatthew Wilcox 	if (is_idr(root)) {
8770a835c4fSMatthew Wilcox 		unsigned offset = get_slot_offset(node, slot);
8780a835c4fSMatthew Wilcox 		bool free = node_tag_get(root, node, IDR_FREE, offset);
8790a835c4fSMatthew Wilcox 		if (!free)
8800a835c4fSMatthew Wilcox 			return 0;
8810a835c4fSMatthew Wilcox 		if (!old)
8820a835c4fSMatthew Wilcox 			return 1;
8830a835c4fSMatthew Wilcox 	}
8840a835c4fSMatthew Wilcox 	return !!item - !!old;
885a90eb3a2SMatthew Wilcox }
886a90eb3a2SMatthew Wilcox 
8871da177e4SLinus Torvalds /**
888f7942430SJohannes Weiner  * __radix_tree_replace		- replace item in a slot
889f7942430SJohannes Weiner  * @root:		radix tree root
890f7942430SJohannes Weiner  * @node:		pointer to tree node
891f7942430SJohannes Weiner  * @slot:		pointer to slot in @node
892f7942430SJohannes Weiner  * @item:		new item to store in the slot.
893f7942430SJohannes Weiner  *
894f7942430SJohannes Weiner  * For use with __radix_tree_lookup().  Caller must hold tree write locked
895f7942430SJohannes Weiner  * across slot lookup and replacement.
896f7942430SJohannes Weiner  */
897f7942430SJohannes Weiner void __radix_tree_replace(struct radix_tree_root *root,
898f7942430SJohannes Weiner 			  struct radix_tree_node *node,
8991cf56f9dSMatthew Wilcox 			  void __rcu **slot, void *item)
900f7942430SJohannes Weiner {
9010a835c4fSMatthew Wilcox 	void *old = rcu_dereference_raw(*slot);
90201959dfeSMatthew Wilcox 	int values = !!xa_is_value(item) - !!xa_is_value(old);
9030a835c4fSMatthew Wilcox 	int count = calculate_count(root, node, slot, item, old);
9040a835c4fSMatthew Wilcox 
9056d75f366SJohannes Weiner 	/*
90601959dfeSMatthew Wilcox 	 * This function supports replacing value entries and
907f4b109c6SJohannes Weiner 	 * deleting entries, but that needs accounting against the
908f8d5d0ccSMatthew Wilcox 	 * node unless the slot is root->xa_head.
9096d75f366SJohannes Weiner 	 */
910f8d5d0ccSMatthew Wilcox 	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
91101959dfeSMatthew Wilcox 			(count || values));
91201959dfeSMatthew Wilcox 	replace_slot(slot, item, node, count, values);
913f4b109c6SJohannes Weiner 
9144d693d08SJohannes Weiner 	if (!node)
9154d693d08SJohannes Weiner 		return;
9164d693d08SJohannes Weiner 
9171cf56f9dSMatthew Wilcox 	delete_node(root, node);
9186d75f366SJohannes Weiner }
919f7942430SJohannes Weiner 
9206d75f366SJohannes Weiner /**
9216d75f366SJohannes Weiner  * radix_tree_replace_slot	- replace item in a slot
9226d75f366SJohannes Weiner  * @root:	radix tree root
9236d75f366SJohannes Weiner  * @slot:	pointer to slot
9246d75f366SJohannes Weiner  * @item:	new item to store in the slot.
9256d75f366SJohannes Weiner  *
9267b8d046fSMatthew Wilcox  * For use with radix_tree_lookup_slot() and
9276d75f366SJohannes Weiner  * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
9286d75f366SJohannes Weiner  * across slot lookup and replacement.
9296d75f366SJohannes Weiner  *
9306d75f366SJohannes Weiner  * NOTE: This cannot be used to switch between non-entries (empty slots),
93101959dfeSMatthew Wilcox  * regular entries, and value entries, as that requires accounting
932f4b109c6SJohannes Weiner  * inside the radix tree node. When switching from one type of entry or
933e157b555SMatthew Wilcox  * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
934e157b555SMatthew Wilcox  * radix_tree_iter_replace().
9356d75f366SJohannes Weiner  */
9366d75f366SJohannes Weiner void radix_tree_replace_slot(struct radix_tree_root *root,
937d7b62727SMatthew Wilcox 			     void __rcu **slot, void *item)
9386d75f366SJohannes Weiner {
9391cf56f9dSMatthew Wilcox 	__radix_tree_replace(root, NULL, slot, item);
940f7942430SJohannes Weiner }
94110257d71SSong Liu EXPORT_SYMBOL(radix_tree_replace_slot);
942f7942430SJohannes Weiner 
943e157b555SMatthew Wilcox /**
944e157b555SMatthew Wilcox  * radix_tree_iter_replace - replace item in a slot
945e157b555SMatthew Wilcox  * @root:	radix tree root
946e157b555SMatthew Wilcox  * @slot:	pointer to slot
947e157b555SMatthew Wilcox  * @item:	new item to store in the slot.
948e157b555SMatthew Wilcox  *
9492956c664SMatthew Wilcox  * For use with radix_tree_for_each_slot().
9502956c664SMatthew Wilcox  * Caller must hold tree write locked.
951e157b555SMatthew Wilcox  */
952e157b555SMatthew Wilcox void radix_tree_iter_replace(struct radix_tree_root *root,
953d7b62727SMatthew Wilcox 				const struct radix_tree_iter *iter,
954d7b62727SMatthew Wilcox 				void __rcu **slot, void *item)
955e157b555SMatthew Wilcox {
9561cf56f9dSMatthew Wilcox 	__radix_tree_replace(root, iter->node, slot, item);
957e157b555SMatthew Wilcox }
958e157b555SMatthew Wilcox 
95930b888baSMatthew Wilcox static void node_tag_set(struct radix_tree_root *root,
96030b888baSMatthew Wilcox 				struct radix_tree_node *node,
96130b888baSMatthew Wilcox 				unsigned int tag, unsigned int offset)
96230b888baSMatthew Wilcox {
96330b888baSMatthew Wilcox 	while (node) {
96430b888baSMatthew Wilcox 		if (tag_get(node, tag, offset))
96530b888baSMatthew Wilcox 			return;
96630b888baSMatthew Wilcox 		tag_set(node, tag, offset);
96730b888baSMatthew Wilcox 		offset = node->offset;
96830b888baSMatthew Wilcox 		node = node->parent;
96930b888baSMatthew Wilcox 	}
97030b888baSMatthew Wilcox 
97130b888baSMatthew Wilcox 	if (!root_tag_get(root, tag))
97230b888baSMatthew Wilcox 		root_tag_set(root, tag);
97330b888baSMatthew Wilcox }
97430b888baSMatthew Wilcox 
9751da177e4SLinus Torvalds /**
9761da177e4SLinus Torvalds  *	radix_tree_tag_set - set a tag on a radix tree node
9771da177e4SLinus Torvalds  *	@root:		radix tree root
9781da177e4SLinus Torvalds  *	@index:		index key
9791da177e4SLinus Torvalds  *	@tag:		tag index
9801da177e4SLinus Torvalds  *
981daff89f3SJonathan Corbet  *	Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
982daff89f3SJonathan Corbet  *	corresponding to @index in the radix tree.  From
9831da177e4SLinus Torvalds  *	the root all the way down to the leaf node.
9841da177e4SLinus Torvalds  *
9851da177e4SLinus Torvalds  *	Returns the address of the tagged item.  Setting a tag on a not-present
9861da177e4SLinus Torvalds  *	item is a bug.
9871da177e4SLinus Torvalds  */
9881da177e4SLinus Torvalds void *radix_tree_tag_set(struct radix_tree_root *root,
989daff89f3SJonathan Corbet 			unsigned long index, unsigned int tag)
9901da177e4SLinus Torvalds {
991fb969909SRoss Zwisler 	struct radix_tree_node *node, *parent;
992fb969909SRoss Zwisler 	unsigned long maxindex;
9931da177e4SLinus Torvalds 
9949e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
995fb969909SRoss Zwisler 	BUG_ON(index > maxindex);
9961da177e4SLinus Torvalds 
997b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
998fb969909SRoss Zwisler 		unsigned offset;
9991da177e4SLinus Torvalds 
10004dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
10019e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
1002fb969909SRoss Zwisler 		BUG_ON(!node);
1003fb969909SRoss Zwisler 
1004fb969909SRoss Zwisler 		if (!tag_get(parent, tag, offset))
1005fb969909SRoss Zwisler 			tag_set(parent, tag, offset);
10061da177e4SLinus Torvalds 	}
10071da177e4SLinus Torvalds 
1008612d6c19SNick Piggin 	/* set the root's tag bit */
1009fb969909SRoss Zwisler 	if (!root_tag_get(root, tag))
1010612d6c19SNick Piggin 		root_tag_set(root, tag);
1011612d6c19SNick Piggin 
1012fb969909SRoss Zwisler 	return node;
10131da177e4SLinus Torvalds }
10141da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_set);
10151da177e4SLinus Torvalds 
1016d604c324SMatthew Wilcox static void node_tag_clear(struct radix_tree_root *root,
1017d604c324SMatthew Wilcox 				struct radix_tree_node *node,
1018d604c324SMatthew Wilcox 				unsigned int tag, unsigned int offset)
1019d604c324SMatthew Wilcox {
1020d604c324SMatthew Wilcox 	while (node) {
1021d604c324SMatthew Wilcox 		if (!tag_get(node, tag, offset))
1022d604c324SMatthew Wilcox 			return;
1023d604c324SMatthew Wilcox 		tag_clear(node, tag, offset);
1024d604c324SMatthew Wilcox 		if (any_tag_set(node, tag))
1025d604c324SMatthew Wilcox 			return;
1026d604c324SMatthew Wilcox 
1027d604c324SMatthew Wilcox 		offset = node->offset;
1028d604c324SMatthew Wilcox 		node = node->parent;
1029d604c324SMatthew Wilcox 	}
1030d604c324SMatthew Wilcox 
1031d604c324SMatthew Wilcox 	/* clear the root's tag bit */
1032d604c324SMatthew Wilcox 	if (root_tag_get(root, tag))
1033d604c324SMatthew Wilcox 		root_tag_clear(root, tag);
1034d604c324SMatthew Wilcox }
1035d604c324SMatthew Wilcox 
1036268f42deSMatthew Wilcox /**
10371da177e4SLinus Torvalds  *	radix_tree_tag_clear - clear a tag on a radix tree node
10381da177e4SLinus Torvalds  *	@root:		radix tree root
10391da177e4SLinus Torvalds  *	@index:		index key
10401da177e4SLinus Torvalds  *	@tag:		tag index
10411da177e4SLinus Torvalds  *
1042daff89f3SJonathan Corbet  *	Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
10432fcd9005SMatthew Wilcox  *	corresponding to @index in the radix tree.  If this causes
10442fcd9005SMatthew Wilcox  *	the leaf node to have no tags set then clear the tag in the
10451da177e4SLinus Torvalds  *	next-to-leaf node, etc.
10461da177e4SLinus Torvalds  *
10471da177e4SLinus Torvalds  *	Returns the address of the tagged item on success, else NULL.  ie:
10481da177e4SLinus Torvalds  *	has the same return value and semantics as radix_tree_lookup().
10491da177e4SLinus Torvalds  */
10501da177e4SLinus Torvalds void *radix_tree_tag_clear(struct radix_tree_root *root,
1051daff89f3SJonathan Corbet 			unsigned long index, unsigned int tag)
10521da177e4SLinus Torvalds {
105300f47b58SRoss Zwisler 	struct radix_tree_node *node, *parent;
105400f47b58SRoss Zwisler 	unsigned long maxindex;
1055e2bdb933SHugh Dickins 	int uninitialized_var(offset);
10561da177e4SLinus Torvalds 
10579e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
105800f47b58SRoss Zwisler 	if (index > maxindex)
105900f47b58SRoss Zwisler 		return NULL;
10601da177e4SLinus Torvalds 
106100f47b58SRoss Zwisler 	parent = NULL;
10621da177e4SLinus Torvalds 
1063b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
10644dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
10659e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
10661da177e4SLinus Torvalds 	}
10671da177e4SLinus Torvalds 
1068d604c324SMatthew Wilcox 	if (node)
1069d604c324SMatthew Wilcox 		node_tag_clear(root, parent, tag, offset);
10701da177e4SLinus Torvalds 
107100f47b58SRoss Zwisler 	return node;
10721da177e4SLinus Torvalds }
10731da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_clear);
10741da177e4SLinus Torvalds 
10751da177e4SLinus Torvalds /**
107630b888baSMatthew Wilcox   * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
107730b888baSMatthew Wilcox   * @root: radix tree root
107830b888baSMatthew Wilcox   * @iter: iterator state
107930b888baSMatthew Wilcox   * @tag: tag to clear
108030b888baSMatthew Wilcox   */
108130b888baSMatthew Wilcox void radix_tree_iter_tag_clear(struct radix_tree_root *root,
108230b888baSMatthew Wilcox 			const struct radix_tree_iter *iter, unsigned int tag)
108330b888baSMatthew Wilcox {
108430b888baSMatthew Wilcox 	node_tag_clear(root, iter->node, tag, iter_offset(iter));
108530b888baSMatthew Wilcox }
108630b888baSMatthew Wilcox 
108730b888baSMatthew Wilcox /**
10881da177e4SLinus Torvalds  * radix_tree_tag_get - get a tag on a radix tree node
10891da177e4SLinus Torvalds  * @root:		radix tree root
10901da177e4SLinus Torvalds  * @index:		index key
1091daff89f3SJonathan Corbet  * @tag:		tag index (< RADIX_TREE_MAX_TAGS)
10921da177e4SLinus Torvalds  *
109332605a18SMarcelo Tosatti  * Return values:
10941da177e4SLinus Torvalds  *
1095612d6c19SNick Piggin  *  0: tag not present or not set
1096612d6c19SNick Piggin  *  1: tag set
1097ce82653dSDavid Howells  *
1098ce82653dSDavid Howells  * Note that the return value of this function may not be relied on, even if
1099ce82653dSDavid Howells  * the RCU lock is held, unless tag modification and node deletion are excluded
1100ce82653dSDavid Howells  * from concurrency.
11011da177e4SLinus Torvalds  */
110235534c86SMatthew Wilcox int radix_tree_tag_get(const struct radix_tree_root *root,
1103daff89f3SJonathan Corbet 			unsigned long index, unsigned int tag)
11041da177e4SLinus Torvalds {
11054589ba6dSRoss Zwisler 	struct radix_tree_node *node, *parent;
11064589ba6dSRoss Zwisler 	unsigned long maxindex;
11071da177e4SLinus Torvalds 
1108612d6c19SNick Piggin 	if (!root_tag_get(root, tag))
1109612d6c19SNick Piggin 		return 0;
1110612d6c19SNick Piggin 
11119e85d811SMatthew Wilcox 	radix_tree_load_root(root, &node, &maxindex);
11124589ba6dSRoss Zwisler 	if (index > maxindex)
11134589ba6dSRoss Zwisler 		return 0;
11147cf9c2c7SNick Piggin 
1115b194d16cSMatthew Wilcox 	while (radix_tree_is_internal_node(node)) {
11169e85d811SMatthew Wilcox 		unsigned offset;
11174589ba6dSRoss Zwisler 
11184dd6c098SMatthew Wilcox 		parent = entry_to_node(node);
11199e85d811SMatthew Wilcox 		offset = radix_tree_descend(parent, &node, index);
11204589ba6dSRoss Zwisler 
11214589ba6dSRoss Zwisler 		if (!tag_get(parent, tag, offset))
11224589ba6dSRoss Zwisler 			return 0;
11234589ba6dSRoss Zwisler 		if (node == RADIX_TREE_RETRY)
11244589ba6dSRoss Zwisler 			break;
11251da177e4SLinus Torvalds 	}
11264589ba6dSRoss Zwisler 
11274589ba6dSRoss Zwisler 	return 1;
11281da177e4SLinus Torvalds }
11291da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tag_get);
11301da177e4SLinus Torvalds 
1131148deab2SMatthew Wilcox /* Construct iter->tags bit-mask from node->tags[tag] array */
1132148deab2SMatthew Wilcox static void set_iter_tags(struct radix_tree_iter *iter,
1133148deab2SMatthew Wilcox 				struct radix_tree_node *node, unsigned offset,
1134148deab2SMatthew Wilcox 				unsigned tag)
1135148deab2SMatthew Wilcox {
1136148deab2SMatthew Wilcox 	unsigned tag_long = offset / BITS_PER_LONG;
1137148deab2SMatthew Wilcox 	unsigned tag_bit  = offset % BITS_PER_LONG;
1138148deab2SMatthew Wilcox 
11390a835c4fSMatthew Wilcox 	if (!node) {
11400a835c4fSMatthew Wilcox 		iter->tags = 1;
11410a835c4fSMatthew Wilcox 		return;
11420a835c4fSMatthew Wilcox 	}
11430a835c4fSMatthew Wilcox 
1144148deab2SMatthew Wilcox 	iter->tags = node->tags[tag][tag_long] >> tag_bit;
1145148deab2SMatthew Wilcox 
1146148deab2SMatthew Wilcox 	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1147148deab2SMatthew Wilcox 	if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1148148deab2SMatthew Wilcox 		/* Pick tags from next element */
1149148deab2SMatthew Wilcox 		if (tag_bit)
1150148deab2SMatthew Wilcox 			iter->tags |= node->tags[tag][tag_long + 1] <<
1151148deab2SMatthew Wilcox 						(BITS_PER_LONG - tag_bit);
1152148deab2SMatthew Wilcox 		/* Clip chunk size, here only BITS_PER_LONG tags */
1153148deab2SMatthew Wilcox 		iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
1154148deab2SMatthew Wilcox 	}
1155148deab2SMatthew Wilcox }
1156148deab2SMatthew Wilcox 
1157d7b62727SMatthew Wilcox void __rcu **radix_tree_iter_resume(void __rcu **slot,
1158d7b62727SMatthew Wilcox 					struct radix_tree_iter *iter)
1159148deab2SMatthew Wilcox {
1160148deab2SMatthew Wilcox 	slot++;
1161148deab2SMatthew Wilcox 	iter->index = __radix_tree_iter_add(iter, 1);
1162148deab2SMatthew Wilcox 	iter->next_index = iter->index;
1163148deab2SMatthew Wilcox 	iter->tags = 0;
1164148deab2SMatthew Wilcox 	return NULL;
1165148deab2SMatthew Wilcox }
1166148deab2SMatthew Wilcox EXPORT_SYMBOL(radix_tree_iter_resume);
1167148deab2SMatthew Wilcox 
11686df8ba4fSFengguang Wu /**
116978c1d784SKonstantin Khlebnikov  * radix_tree_next_chunk - find next chunk of slots for iteration
117078c1d784SKonstantin Khlebnikov  *
117178c1d784SKonstantin Khlebnikov  * @root:	radix tree root
117278c1d784SKonstantin Khlebnikov  * @iter:	iterator state
117378c1d784SKonstantin Khlebnikov  * @flags:	RADIX_TREE_ITER_* flags and tag index
117478c1d784SKonstantin Khlebnikov  * Returns:	pointer to chunk first slot, or NULL if iteration is over
117578c1d784SKonstantin Khlebnikov  */
1176d7b62727SMatthew Wilcox void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
117778c1d784SKonstantin Khlebnikov 			     struct radix_tree_iter *iter, unsigned flags)
117878c1d784SKonstantin Khlebnikov {
11799e85d811SMatthew Wilcox 	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
11808c1244deSMatthew Wilcox 	struct radix_tree_node *node, *child;
118121ef5339SRoss Zwisler 	unsigned long index, offset, maxindex;
118278c1d784SKonstantin Khlebnikov 
118378c1d784SKonstantin Khlebnikov 	if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
118478c1d784SKonstantin Khlebnikov 		return NULL;
118578c1d784SKonstantin Khlebnikov 
118678c1d784SKonstantin Khlebnikov 	/*
118778c1d784SKonstantin Khlebnikov 	 * Catch next_index overflow after ~0UL. iter->index never overflows
118878c1d784SKonstantin Khlebnikov 	 * during iterating; it can be zero only at the beginning.
118978c1d784SKonstantin Khlebnikov 	 * And we cannot overflow iter->next_index in a single step,
119078c1d784SKonstantin Khlebnikov 	 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
1191fffaee36SKonstantin Khlebnikov 	 *
1192fffaee36SKonstantin Khlebnikov 	 * This condition also used by radix_tree_next_slot() to stop
119391b9677cSMatthew Wilcox 	 * contiguous iterating, and forbid switching to the next chunk.
119478c1d784SKonstantin Khlebnikov 	 */
119578c1d784SKonstantin Khlebnikov 	index = iter->next_index;
119678c1d784SKonstantin Khlebnikov 	if (!index && iter->index)
119778c1d784SKonstantin Khlebnikov 		return NULL;
119878c1d784SKonstantin Khlebnikov 
119921ef5339SRoss Zwisler  restart:
12009e85d811SMatthew Wilcox 	radix_tree_load_root(root, &child, &maxindex);
120121ef5339SRoss Zwisler 	if (index > maxindex)
120221ef5339SRoss Zwisler 		return NULL;
12038c1244deSMatthew Wilcox 	if (!child)
12048c1244deSMatthew Wilcox 		return NULL;
120521ef5339SRoss Zwisler 
12068c1244deSMatthew Wilcox 	if (!radix_tree_is_internal_node(child)) {
120778c1d784SKonstantin Khlebnikov 		/* Single-slot tree */
120821ef5339SRoss Zwisler 		iter->index = index;
120921ef5339SRoss Zwisler 		iter->next_index = maxindex + 1;
121078c1d784SKonstantin Khlebnikov 		iter->tags = 1;
1211268f42deSMatthew Wilcox 		iter->node = NULL;
1212f8d5d0ccSMatthew Wilcox 		return (void __rcu **)&root->xa_head;
121321ef5339SRoss Zwisler 	}
121421ef5339SRoss Zwisler 
12158c1244deSMatthew Wilcox 	do {
12168c1244deSMatthew Wilcox 		node = entry_to_node(child);
12179e85d811SMatthew Wilcox 		offset = radix_tree_descend(node, &child, index);
12188c1244deSMatthew Wilcox 
121978c1d784SKonstantin Khlebnikov 		if ((flags & RADIX_TREE_ITER_TAGGED) ?
12208c1244deSMatthew Wilcox 				!tag_get(node, tag, offset) : !child) {
122178c1d784SKonstantin Khlebnikov 			/* Hole detected */
122278c1d784SKonstantin Khlebnikov 			if (flags & RADIX_TREE_ITER_CONTIG)
122378c1d784SKonstantin Khlebnikov 				return NULL;
122478c1d784SKonstantin Khlebnikov 
122578c1d784SKonstantin Khlebnikov 			if (flags & RADIX_TREE_ITER_TAGGED)
1226bc412fcaSMatthew Wilcox 				offset = radix_tree_find_next_bit(node, tag,
122778c1d784SKonstantin Khlebnikov 						offset + 1);
122878c1d784SKonstantin Khlebnikov 			else
122978c1d784SKonstantin Khlebnikov 				while (++offset	< RADIX_TREE_MAP_SIZE) {
123012320d0fSMatthew Wilcox 					void *slot = rcu_dereference_raw(
123112320d0fSMatthew Wilcox 							node->slots[offset]);
123221ef5339SRoss Zwisler 					if (slot)
123378c1d784SKonstantin Khlebnikov 						break;
123478c1d784SKonstantin Khlebnikov 				}
12358c1244deSMatthew Wilcox 			index &= ~node_maxindex(node);
12369e85d811SMatthew Wilcox 			index += offset << node->shift;
123778c1d784SKonstantin Khlebnikov 			/* Overflow after ~0UL */
123878c1d784SKonstantin Khlebnikov 			if (!index)
123978c1d784SKonstantin Khlebnikov 				return NULL;
124078c1d784SKonstantin Khlebnikov 			if (offset == RADIX_TREE_MAP_SIZE)
124178c1d784SKonstantin Khlebnikov 				goto restart;
12428c1244deSMatthew Wilcox 			child = rcu_dereference_raw(node->slots[offset]);
124378c1d784SKonstantin Khlebnikov 		}
124478c1d784SKonstantin Khlebnikov 
1245e157b555SMatthew Wilcox 		if (!child)
124678c1d784SKonstantin Khlebnikov 			goto restart;
1247e157b555SMatthew Wilcox 		if (child == RADIX_TREE_RETRY)
1248e157b555SMatthew Wilcox 			break;
124966ee620fSMatthew Wilcox 	} while (node->shift && radix_tree_is_internal_node(child));
125078c1d784SKonstantin Khlebnikov 
125178c1d784SKonstantin Khlebnikov 	/* Update the iterator state */
12523a08cd52SMatthew Wilcox 	iter->index = (index &~ node_maxindex(node)) | offset;
12538c1244deSMatthew Wilcox 	iter->next_index = (index | node_maxindex(node)) + 1;
1254268f42deSMatthew Wilcox 	iter->node = node;
125578c1d784SKonstantin Khlebnikov 
1256148deab2SMatthew Wilcox 	if (flags & RADIX_TREE_ITER_TAGGED)
1257148deab2SMatthew Wilcox 		set_iter_tags(iter, node, offset, tag);
125878c1d784SKonstantin Khlebnikov 
125978c1d784SKonstantin Khlebnikov 	return node->slots + offset;
126078c1d784SKonstantin Khlebnikov }
126178c1d784SKonstantin Khlebnikov EXPORT_SYMBOL(radix_tree_next_chunk);
126278c1d784SKonstantin Khlebnikov 
126378c1d784SKonstantin Khlebnikov /**
12641da177e4SLinus Torvalds  *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
12651da177e4SLinus Torvalds  *	@root:		radix tree root
12661da177e4SLinus Torvalds  *	@results:	where the results of the lookup are placed
12671da177e4SLinus Torvalds  *	@first_index:	start the lookup from this key
12681da177e4SLinus Torvalds  *	@max_items:	place up to this many items at *results
12691da177e4SLinus Torvalds  *
12701da177e4SLinus Torvalds  *	Performs an index-ascending scan of the tree for present items.  Places
12711da177e4SLinus Torvalds  *	them at *@results and returns the number of items which were placed at
12721da177e4SLinus Torvalds  *	*@results.
12731da177e4SLinus Torvalds  *
12741da177e4SLinus Torvalds  *	The implementation is naive.
12757cf9c2c7SNick Piggin  *
12767cf9c2c7SNick Piggin  *	Like radix_tree_lookup, radix_tree_gang_lookup may be called under
12777cf9c2c7SNick Piggin  *	rcu_read_lock. In this case, rather than the returned results being
12782fcd9005SMatthew Wilcox  *	an atomic snapshot of the tree at a single point in time, the
12792fcd9005SMatthew Wilcox  *	semantics of an RCU protected gang lookup are as though multiple
12802fcd9005SMatthew Wilcox  *	radix_tree_lookups have been issued in individual locks, and results
12812fcd9005SMatthew Wilcox  *	stored in 'results'.
12821da177e4SLinus Torvalds  */
12831da177e4SLinus Torvalds unsigned int
128435534c86SMatthew Wilcox radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
12851da177e4SLinus Torvalds 			unsigned long first_index, unsigned int max_items)
12861da177e4SLinus Torvalds {
1287cebbd29eSKonstantin Khlebnikov 	struct radix_tree_iter iter;
1288d7b62727SMatthew Wilcox 	void __rcu **slot;
1289cebbd29eSKonstantin Khlebnikov 	unsigned int ret = 0;
12901da177e4SLinus Torvalds 
1291cebbd29eSKonstantin Khlebnikov 	if (unlikely(!max_items))
12927cf9c2c7SNick Piggin 		return 0;
12937cf9c2c7SNick Piggin 
1294cebbd29eSKonstantin Khlebnikov 	radix_tree_for_each_slot(slot, root, &iter, first_index) {
129546437f9aSMatthew Wilcox 		results[ret] = rcu_dereference_raw(*slot);
1296cebbd29eSKonstantin Khlebnikov 		if (!results[ret])
129747feff2cSNick Piggin 			continue;
1298b194d16cSMatthew Wilcox 		if (radix_tree_is_internal_node(results[ret])) {
129946437f9aSMatthew Wilcox 			slot = radix_tree_iter_retry(&iter);
130046437f9aSMatthew Wilcox 			continue;
130146437f9aSMatthew Wilcox 		}
1302cebbd29eSKonstantin Khlebnikov 		if (++ret == max_items)
13031da177e4SLinus Torvalds 			break;
13041da177e4SLinus Torvalds 	}
13057cf9c2c7SNick Piggin 
13061da177e4SLinus Torvalds 	return ret;
13071da177e4SLinus Torvalds }
13081da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup);
13091da177e4SLinus Torvalds 
131047feff2cSNick Piggin /**
13111da177e4SLinus Torvalds  *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
13121da177e4SLinus Torvalds  *	                             based on a tag
13131da177e4SLinus Torvalds  *	@root:		radix tree root
13141da177e4SLinus Torvalds  *	@results:	where the results of the lookup are placed
13151da177e4SLinus Torvalds  *	@first_index:	start the lookup from this key
13161da177e4SLinus Torvalds  *	@max_items:	place up to this many items at *results
1317daff89f3SJonathan Corbet  *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
13181da177e4SLinus Torvalds  *
13191da177e4SLinus Torvalds  *	Performs an index-ascending scan of the tree for present items which
13201da177e4SLinus Torvalds  *	have the tag indexed by @tag set.  Places the items at *@results and
13211da177e4SLinus Torvalds  *	returns the number of items which were placed at *@results.
13221da177e4SLinus Torvalds  */
13231da177e4SLinus Torvalds unsigned int
132435534c86SMatthew Wilcox radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
1325daff89f3SJonathan Corbet 		unsigned long first_index, unsigned int max_items,
1326daff89f3SJonathan Corbet 		unsigned int tag)
13271da177e4SLinus Torvalds {
1328cebbd29eSKonstantin Khlebnikov 	struct radix_tree_iter iter;
1329d7b62727SMatthew Wilcox 	void __rcu **slot;
1330cebbd29eSKonstantin Khlebnikov 	unsigned int ret = 0;
13311da177e4SLinus Torvalds 
1332cebbd29eSKonstantin Khlebnikov 	if (unlikely(!max_items))
1333612d6c19SNick Piggin 		return 0;
1334612d6c19SNick Piggin 
1335cebbd29eSKonstantin Khlebnikov 	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
133646437f9aSMatthew Wilcox 		results[ret] = rcu_dereference_raw(*slot);
1337cebbd29eSKonstantin Khlebnikov 		if (!results[ret])
133847feff2cSNick Piggin 			continue;
1339b194d16cSMatthew Wilcox 		if (radix_tree_is_internal_node(results[ret])) {
134046437f9aSMatthew Wilcox 			slot = radix_tree_iter_retry(&iter);
134146437f9aSMatthew Wilcox 			continue;
134246437f9aSMatthew Wilcox 		}
1343cebbd29eSKonstantin Khlebnikov 		if (++ret == max_items)
13441da177e4SLinus Torvalds 			break;
13451da177e4SLinus Torvalds 	}
13467cf9c2c7SNick Piggin 
13471da177e4SLinus Torvalds 	return ret;
13481da177e4SLinus Torvalds }
13491da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
13501da177e4SLinus Torvalds 
13511da177e4SLinus Torvalds /**
135247feff2cSNick Piggin  *	radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
135347feff2cSNick Piggin  *					  radix tree based on a tag
135447feff2cSNick Piggin  *	@root:		radix tree root
135547feff2cSNick Piggin  *	@results:	where the results of the lookup are placed
135647feff2cSNick Piggin  *	@first_index:	start the lookup from this key
135747feff2cSNick Piggin  *	@max_items:	place up to this many items at *results
135847feff2cSNick Piggin  *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
135947feff2cSNick Piggin  *
136047feff2cSNick Piggin  *	Performs an index-ascending scan of the tree for present items which
136147feff2cSNick Piggin  *	have the tag indexed by @tag set.  Places the slots at *@results and
136247feff2cSNick Piggin  *	returns the number of slots which were placed at *@results.
136347feff2cSNick Piggin  */
136447feff2cSNick Piggin unsigned int
136535534c86SMatthew Wilcox radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
1366d7b62727SMatthew Wilcox 		void __rcu ***results, unsigned long first_index,
136735534c86SMatthew Wilcox 		unsigned int max_items, unsigned int tag)
136847feff2cSNick Piggin {
1369cebbd29eSKonstantin Khlebnikov 	struct radix_tree_iter iter;
1370d7b62727SMatthew Wilcox 	void __rcu **slot;
1371cebbd29eSKonstantin Khlebnikov 	unsigned int ret = 0;
137247feff2cSNick Piggin 
1373cebbd29eSKonstantin Khlebnikov 	if (unlikely(!max_items))
137447feff2cSNick Piggin 		return 0;
137547feff2cSNick Piggin 
1376cebbd29eSKonstantin Khlebnikov 	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1377cebbd29eSKonstantin Khlebnikov 		results[ret] = slot;
1378cebbd29eSKonstantin Khlebnikov 		if (++ret == max_items)
137947feff2cSNick Piggin 			break;
138047feff2cSNick Piggin 	}
138147feff2cSNick Piggin 
138247feff2cSNick Piggin 	return ret;
138347feff2cSNick Piggin }
138447feff2cSNick Piggin EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
138547feff2cSNick Piggin 
13860ac398efSMatthew Wilcox static bool __radix_tree_delete(struct radix_tree_root *root,
1387d7b62727SMatthew Wilcox 				struct radix_tree_node *node, void __rcu **slot)
13880ac398efSMatthew Wilcox {
13890a835c4fSMatthew Wilcox 	void *old = rcu_dereference_raw(*slot);
139001959dfeSMatthew Wilcox 	int values = xa_is_value(old) ? -1 : 0;
13910ac398efSMatthew Wilcox 	unsigned offset = get_slot_offset(node, slot);
13920ac398efSMatthew Wilcox 	int tag;
13930ac398efSMatthew Wilcox 
13940a835c4fSMatthew Wilcox 	if (is_idr(root))
13950a835c4fSMatthew Wilcox 		node_tag_set(root, node, IDR_FREE, offset);
13960a835c4fSMatthew Wilcox 	else
13970ac398efSMatthew Wilcox 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
13980ac398efSMatthew Wilcox 			node_tag_clear(root, node, tag, offset);
13990ac398efSMatthew Wilcox 
140001959dfeSMatthew Wilcox 	replace_slot(slot, NULL, node, -1, values);
14011cf56f9dSMatthew Wilcox 	return node && delete_node(root, node);
14020ac398efSMatthew Wilcox }
14030ac398efSMatthew Wilcox 
14040ac398efSMatthew Wilcox /**
14050ac398efSMatthew Wilcox  * radix_tree_iter_delete - delete the entry at this iterator position
14060ac398efSMatthew Wilcox  * @root: radix tree root
14070ac398efSMatthew Wilcox  * @iter: iterator state
14080ac398efSMatthew Wilcox  * @slot: pointer to slot
14090ac398efSMatthew Wilcox  *
14100ac398efSMatthew Wilcox  * Delete the entry at the position currently pointed to by the iterator.
14110ac398efSMatthew Wilcox  * This may result in the current node being freed; if it is, the iterator
14120ac398efSMatthew Wilcox  * is advanced so that it will not reference the freed memory.  This
14130ac398efSMatthew Wilcox  * function may be called without any locking if there are no other threads
14140ac398efSMatthew Wilcox  * which can access this tree.
14150ac398efSMatthew Wilcox  */
14160ac398efSMatthew Wilcox void radix_tree_iter_delete(struct radix_tree_root *root,
1417d7b62727SMatthew Wilcox 				struct radix_tree_iter *iter, void __rcu **slot)
14180ac398efSMatthew Wilcox {
14190ac398efSMatthew Wilcox 	if (__radix_tree_delete(root, iter->node, slot))
14200ac398efSMatthew Wilcox 		iter->index = iter->next_index;
14210ac398efSMatthew Wilcox }
1422d1b48c1eSChris Wilson EXPORT_SYMBOL(radix_tree_iter_delete);
14230ac398efSMatthew Wilcox 
1424139e5616SJohannes Weiner /**
142553c59f26SJohannes Weiner  * radix_tree_delete_item - delete an item from a radix tree
14261da177e4SLinus Torvalds  * @root: radix tree root
14271da177e4SLinus Torvalds  * @index: index key
142853c59f26SJohannes Weiner  * @item: expected item
14291da177e4SLinus Torvalds  *
143053c59f26SJohannes Weiner  * Remove @item at @index from the radix tree rooted at @root.
14311da177e4SLinus Torvalds  *
14320ac398efSMatthew Wilcox  * Return: the deleted entry, or %NULL if it was not present
143353c59f26SJohannes Weiner  * or the entry at the given @index was not @item.
14341da177e4SLinus Torvalds  */
143553c59f26SJohannes Weiner void *radix_tree_delete_item(struct radix_tree_root *root,
143653c59f26SJohannes Weiner 			     unsigned long index, void *item)
14371da177e4SLinus Torvalds {
14380a835c4fSMatthew Wilcox 	struct radix_tree_node *node = NULL;
14397a4deea1SMatthew Wilcox 	void __rcu **slot = NULL;
1440139e5616SJohannes Weiner 	void *entry;
14411da177e4SLinus Torvalds 
1442139e5616SJohannes Weiner 	entry = __radix_tree_lookup(root, index, &node, &slot);
14437a4deea1SMatthew Wilcox 	if (!slot)
14447a4deea1SMatthew Wilcox 		return NULL;
14450a835c4fSMatthew Wilcox 	if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
14460a835c4fSMatthew Wilcox 						get_slot_offset(node, slot))))
1447139e5616SJohannes Weiner 		return NULL;
14481da177e4SLinus Torvalds 
1449139e5616SJohannes Weiner 	if (item && entry != item)
1450139e5616SJohannes Weiner 		return NULL;
1451139e5616SJohannes Weiner 
14520ac398efSMatthew Wilcox 	__radix_tree_delete(root, node, slot);
1453201b6264SChristoph Lameter 
1454139e5616SJohannes Weiner 	return entry;
14551da177e4SLinus Torvalds }
145653c59f26SJohannes Weiner EXPORT_SYMBOL(radix_tree_delete_item);
145753c59f26SJohannes Weiner 
145853c59f26SJohannes Weiner /**
14590ac398efSMatthew Wilcox  * radix_tree_delete - delete an entry from a radix tree
146053c59f26SJohannes Weiner  * @root: radix tree root
146153c59f26SJohannes Weiner  * @index: index key
146253c59f26SJohannes Weiner  *
14630ac398efSMatthew Wilcox  * Remove the entry at @index from the radix tree rooted at @root.
146453c59f26SJohannes Weiner  *
14650ac398efSMatthew Wilcox  * Return: The deleted entry, or %NULL if it was not present.
146653c59f26SJohannes Weiner  */
146753c59f26SJohannes Weiner void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
146853c59f26SJohannes Weiner {
146953c59f26SJohannes Weiner 	return radix_tree_delete_item(root, index, NULL);
147053c59f26SJohannes Weiner }
14711da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_delete);
14721da177e4SLinus Torvalds 
14731da177e4SLinus Torvalds /**
14741da177e4SLinus Torvalds  *	radix_tree_tagged - test whether any items in the tree are tagged
14751da177e4SLinus Torvalds  *	@root:		radix tree root
14761da177e4SLinus Torvalds  *	@tag:		tag to test
14771da177e4SLinus Torvalds  */
147835534c86SMatthew Wilcox int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
14791da177e4SLinus Torvalds {
1480612d6c19SNick Piggin 	return root_tag_get(root, tag);
14811da177e4SLinus Torvalds }
14821da177e4SLinus Torvalds EXPORT_SYMBOL(radix_tree_tagged);
14831da177e4SLinus Torvalds 
14840a835c4fSMatthew Wilcox /**
14850a835c4fSMatthew Wilcox  * idr_preload - preload for idr_alloc()
14860a835c4fSMatthew Wilcox  * @gfp_mask: allocation mask to use for preloading
14870a835c4fSMatthew Wilcox  *
14880a835c4fSMatthew Wilcox  * Preallocate memory to use for the next call to idr_alloc().  This function
14890a835c4fSMatthew Wilcox  * returns with preemption disabled.  It will be enabled by idr_preload_end().
14900a835c4fSMatthew Wilcox  */
14910a835c4fSMatthew Wilcox void idr_preload(gfp_t gfp_mask)
14920a835c4fSMatthew Wilcox {
1493bc9ae224SEric Dumazet 	if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
1494bc9ae224SEric Dumazet 		preempt_disable();
14950a835c4fSMatthew Wilcox }
14960a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_preload);
14970a835c4fSMatthew Wilcox 
1498460488c5SMatthew Wilcox void __rcu **idr_get_free(struct radix_tree_root *root,
1499388f79fdSChris Mi 			      struct radix_tree_iter *iter, gfp_t gfp,
1500388f79fdSChris Mi 			      unsigned long max)
15010a835c4fSMatthew Wilcox {
15020a835c4fSMatthew Wilcox 	struct radix_tree_node *node = NULL, *child;
1503f8d5d0ccSMatthew Wilcox 	void __rcu **slot = (void __rcu **)&root->xa_head;
15040a835c4fSMatthew Wilcox 	unsigned long maxindex, start = iter->next_index;
15050a835c4fSMatthew Wilcox 	unsigned int shift, offset = 0;
15060a835c4fSMatthew Wilcox 
15070a835c4fSMatthew Wilcox  grow:
15080a835c4fSMatthew Wilcox 	shift = radix_tree_load_root(root, &child, &maxindex);
15090a835c4fSMatthew Wilcox 	if (!radix_tree_tagged(root, IDR_FREE))
15100a835c4fSMatthew Wilcox 		start = max(start, maxindex + 1);
15110a835c4fSMatthew Wilcox 	if (start > max)
15120a835c4fSMatthew Wilcox 		return ERR_PTR(-ENOSPC);
15130a835c4fSMatthew Wilcox 
15140a835c4fSMatthew Wilcox 	if (start > maxindex) {
15150a835c4fSMatthew Wilcox 		int error = radix_tree_extend(root, gfp, start, shift);
15160a835c4fSMatthew Wilcox 		if (error < 0)
15170a835c4fSMatthew Wilcox 			return ERR_PTR(error);
15180a835c4fSMatthew Wilcox 		shift = error;
1519f8d5d0ccSMatthew Wilcox 		child = rcu_dereference_raw(root->xa_head);
15200a835c4fSMatthew Wilcox 	}
152166ee620fSMatthew Wilcox 	if (start == 0 && shift == 0)
152266ee620fSMatthew Wilcox 		shift = RADIX_TREE_MAP_SHIFT;
15230a835c4fSMatthew Wilcox 
15240a835c4fSMatthew Wilcox 	while (shift) {
15250a835c4fSMatthew Wilcox 		shift -= RADIX_TREE_MAP_SHIFT;
15260a835c4fSMatthew Wilcox 		if (child == NULL) {
15270a835c4fSMatthew Wilcox 			/* Have to add a child node.  */
1528d58275bcSMatthew Wilcox 			child = radix_tree_node_alloc(gfp, node, root, shift,
1529d58275bcSMatthew Wilcox 							offset, 0, 0);
15300a835c4fSMatthew Wilcox 			if (!child)
15310a835c4fSMatthew Wilcox 				return ERR_PTR(-ENOMEM);
15320a835c4fSMatthew Wilcox 			all_tag_set(child, IDR_FREE);
15330a835c4fSMatthew Wilcox 			rcu_assign_pointer(*slot, node_to_entry(child));
15340a835c4fSMatthew Wilcox 			if (node)
15350a835c4fSMatthew Wilcox 				node->count++;
15360a835c4fSMatthew Wilcox 		} else if (!radix_tree_is_internal_node(child))
15370a835c4fSMatthew Wilcox 			break;
15380a835c4fSMatthew Wilcox 
15390a835c4fSMatthew Wilcox 		node = entry_to_node(child);
15400a835c4fSMatthew Wilcox 		offset = radix_tree_descend(node, &child, start);
15410a835c4fSMatthew Wilcox 		if (!tag_get(node, IDR_FREE, offset)) {
15420a835c4fSMatthew Wilcox 			offset = radix_tree_find_next_bit(node, IDR_FREE,
15430a835c4fSMatthew Wilcox 							offset + 1);
15440a835c4fSMatthew Wilcox 			start = next_index(start, node, offset);
15450a835c4fSMatthew Wilcox 			if (start > max)
15460a835c4fSMatthew Wilcox 				return ERR_PTR(-ENOSPC);
15470a835c4fSMatthew Wilcox 			while (offset == RADIX_TREE_MAP_SIZE) {
15480a835c4fSMatthew Wilcox 				offset = node->offset + 1;
15490a835c4fSMatthew Wilcox 				node = node->parent;
15500a835c4fSMatthew Wilcox 				if (!node)
15510a835c4fSMatthew Wilcox 					goto grow;
15520a835c4fSMatthew Wilcox 				shift = node->shift;
15530a835c4fSMatthew Wilcox 			}
15540a835c4fSMatthew Wilcox 			child = rcu_dereference_raw(node->slots[offset]);
15550a835c4fSMatthew Wilcox 		}
15560a835c4fSMatthew Wilcox 		slot = &node->slots[offset];
15570a835c4fSMatthew Wilcox 	}
15580a835c4fSMatthew Wilcox 
15590a835c4fSMatthew Wilcox 	iter->index = start;
15600a835c4fSMatthew Wilcox 	if (node)
15610a835c4fSMatthew Wilcox 		iter->next_index = 1 + min(max, (start | node_maxindex(node)));
15620a835c4fSMatthew Wilcox 	else
15630a835c4fSMatthew Wilcox 		iter->next_index = 1;
15640a835c4fSMatthew Wilcox 	iter->node = node;
15650a835c4fSMatthew Wilcox 	set_iter_tags(iter, node, offset, IDR_FREE);
15660a835c4fSMatthew Wilcox 
15670a835c4fSMatthew Wilcox 	return slot;
15680a835c4fSMatthew Wilcox }
15690a835c4fSMatthew Wilcox 
15700a835c4fSMatthew Wilcox /**
15710a835c4fSMatthew Wilcox  * idr_destroy - release all internal memory from an IDR
15720a835c4fSMatthew Wilcox  * @idr: idr handle
15730a835c4fSMatthew Wilcox  *
15740a835c4fSMatthew Wilcox  * After this function is called, the IDR is empty, and may be reused or
15750a835c4fSMatthew Wilcox  * the data structure containing it may be freed.
15760a835c4fSMatthew Wilcox  *
15770a835c4fSMatthew Wilcox  * A typical clean-up sequence for objects stored in an idr tree will use
15780a835c4fSMatthew Wilcox  * idr_for_each() to free all objects, if necessary, then idr_destroy() to
15790a835c4fSMatthew Wilcox  * free the memory used to keep track of those objects.
15800a835c4fSMatthew Wilcox  */
15810a835c4fSMatthew Wilcox void idr_destroy(struct idr *idr)
15820a835c4fSMatthew Wilcox {
1583f8d5d0ccSMatthew Wilcox 	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
15840a835c4fSMatthew Wilcox 	if (radix_tree_is_internal_node(node))
15850a835c4fSMatthew Wilcox 		radix_tree_free_nodes(node);
1586f8d5d0ccSMatthew Wilcox 	idr->idr_rt.xa_head = NULL;
15870a835c4fSMatthew Wilcox 	root_tag_set(&idr->idr_rt, IDR_FREE);
15880a835c4fSMatthew Wilcox }
15890a835c4fSMatthew Wilcox EXPORT_SYMBOL(idr_destroy);
15900a835c4fSMatthew Wilcox 
15911da177e4SLinus Torvalds static void
1592449dd698SJohannes Weiner radix_tree_node_ctor(void *arg)
15931da177e4SLinus Torvalds {
1594449dd698SJohannes Weiner 	struct radix_tree_node *node = arg;
1595449dd698SJohannes Weiner 
1596449dd698SJohannes Weiner 	memset(node, 0, sizeof(*node));
1597449dd698SJohannes Weiner 	INIT_LIST_HEAD(&node->private_list);
15981da177e4SLinus Torvalds }
15991da177e4SLinus Torvalds 
1600d544abd5SSebastian Andrzej Siewior static int radix_tree_cpu_dead(unsigned int cpu)
16011da177e4SLinus Torvalds {
16021da177e4SLinus Torvalds 	struct radix_tree_preload *rtp;
16039d2a8da0SKirill A. Shutemov 	struct radix_tree_node *node;
16041da177e4SLinus Torvalds 
16052fcd9005SMatthew Wilcox 	/* Free per-cpu pool of preloaded nodes */
16061da177e4SLinus Torvalds 	rtp = &per_cpu(radix_tree_preloads, cpu);
16071da177e4SLinus Torvalds 	while (rtp->nr) {
16089d2a8da0SKirill A. Shutemov 		node = rtp->nodes;
16091293d5c5SMatthew Wilcox 		rtp->nodes = node->parent;
16109d2a8da0SKirill A. Shutemov 		kmem_cache_free(radix_tree_node_cachep, node);
16111da177e4SLinus Torvalds 		rtp->nr--;
16121da177e4SLinus Torvalds 	}
1613d544abd5SSebastian Andrzej Siewior 	return 0;
16141da177e4SLinus Torvalds }
16151da177e4SLinus Torvalds 
16161da177e4SLinus Torvalds void __init radix_tree_init(void)
16171da177e4SLinus Torvalds {
1618d544abd5SSebastian Andrzej Siewior 	int ret;
16197e784422SMichal Hocko 
16207e784422SMichal Hocko 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
1621fa290cdaSMatthew Wilcox 	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
162202c02bf1SMatthew Wilcox 	BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
16231da177e4SLinus Torvalds 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
16241da177e4SLinus Torvalds 			sizeof(struct radix_tree_node), 0,
1625488514d1SChristoph Lameter 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1626488514d1SChristoph Lameter 			radix_tree_node_ctor);
1627d544abd5SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
1628d544abd5SSebastian Andrzej Siewior 					NULL, radix_tree_cpu_dead);
1629d544abd5SSebastian Andrzej Siewior 	WARN_ON(ret < 0);
16301da177e4SLinus Torvalds }
1631