1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21366c37eSMatthew Wilcox /*
31366c37eSMatthew Wilcox  * Regression1
41366c37eSMatthew Wilcox  * Description:
51366c37eSMatthew Wilcox  * Salman Qazi describes the following radix-tree bug:
61366c37eSMatthew Wilcox  *
71366c37eSMatthew Wilcox  * In the following case, we get can get a deadlock:
81366c37eSMatthew Wilcox  *
91366c37eSMatthew Wilcox  * 0.  The radix tree contains two items, one has the index 0.
101366c37eSMatthew Wilcox  * 1.  The reader (in this case find_get_pages) takes the rcu_read_lock.
111366c37eSMatthew Wilcox  * 2.  The reader acquires slot(s) for item(s) including the index 0 item.
121366c37eSMatthew Wilcox  * 3.  The non-zero index item is deleted, and as a consequence the other item
131366c37eSMatthew Wilcox  *     is moved to the root of the tree. The place where it used to be is queued
141366c37eSMatthew Wilcox  *     for deletion after the readers finish.
151366c37eSMatthew Wilcox  * 3b. The zero item is deleted, removing it from the direct slot, it remains in
161366c37eSMatthew Wilcox  *     the rcu-delayed indirect node.
171366c37eSMatthew Wilcox  * 4.  The reader looks at the index 0 slot, and finds that the page has 0 ref
181366c37eSMatthew Wilcox  *     count
191366c37eSMatthew Wilcox  * 5.  The reader looks at it again, hoping that the item will either be freed
201366c37eSMatthew Wilcox  *     or the ref count will increase. This never happens, as the slot it is
211366c37eSMatthew Wilcox  *     looking at will never be updated. Also, this slot can never be reclaimed
221366c37eSMatthew Wilcox  *     because the reader is holding rcu_read_lock and is in an infinite loop.
231366c37eSMatthew Wilcox  *
241366c37eSMatthew Wilcox  * The fix is to re-use the same "indirect" pointer case that requires a slot
251366c37eSMatthew Wilcox  * lookup retry into a general "retry the lookup" bit.
261366c37eSMatthew Wilcox  *
271366c37eSMatthew Wilcox  * Running:
281366c37eSMatthew Wilcox  * This test should run to completion in a few seconds. The above bug would
291366c37eSMatthew Wilcox  * cause it to hang indefinitely.
301366c37eSMatthew Wilcox  *
311366c37eSMatthew Wilcox  * Upstream commit:
321366c37eSMatthew Wilcox  * Not yet
331366c37eSMatthew Wilcox  */
341366c37eSMatthew Wilcox #include <linux/kernel.h>
351366c37eSMatthew Wilcox #include <linux/gfp.h>
361366c37eSMatthew Wilcox #include <linux/slab.h>
371366c37eSMatthew Wilcox #include <linux/radix-tree.h>
381366c37eSMatthew Wilcox #include <linux/rcupdate.h>
391366c37eSMatthew Wilcox #include <stdlib.h>
401366c37eSMatthew Wilcox #include <pthread.h>
411366c37eSMatthew Wilcox #include <stdio.h>
421366c37eSMatthew Wilcox #include <assert.h>
431366c37eSMatthew Wilcox 
441366c37eSMatthew Wilcox #include "regression.h"
451366c37eSMatthew Wilcox 
461366c37eSMatthew Wilcox static RADIX_TREE(mt_tree, GFP_KERNEL);
471366c37eSMatthew Wilcox 
481366c37eSMatthew Wilcox struct page {
491366c37eSMatthew Wilcox 	pthread_mutex_t lock;
501366c37eSMatthew Wilcox 	struct rcu_head rcu;
511366c37eSMatthew Wilcox 	int count;
521366c37eSMatthew Wilcox 	unsigned long index;
531366c37eSMatthew Wilcox };
541366c37eSMatthew Wilcox 
page_alloc(int index)55a332125fSMatthew Wilcox static struct page *page_alloc(int index)
561366c37eSMatthew Wilcox {
571366c37eSMatthew Wilcox 	struct page *p;
581366c37eSMatthew Wilcox 	p = malloc(sizeof(struct page));
591366c37eSMatthew Wilcox 	p->count = 1;
60a332125fSMatthew Wilcox 	p->index = index;
611366c37eSMatthew Wilcox 	pthread_mutex_init(&p->lock, NULL);
621366c37eSMatthew Wilcox 
631366c37eSMatthew Wilcox 	return p;
641366c37eSMatthew Wilcox }
651366c37eSMatthew Wilcox 
page_rcu_free(struct rcu_head * rcu)661366c37eSMatthew Wilcox static void page_rcu_free(struct rcu_head *rcu)
671366c37eSMatthew Wilcox {
681366c37eSMatthew Wilcox 	struct page *p = container_of(rcu, struct page, rcu);
691366c37eSMatthew Wilcox 	assert(!p->count);
701366c37eSMatthew Wilcox 	pthread_mutex_destroy(&p->lock);
711366c37eSMatthew Wilcox 	free(p);
721366c37eSMatthew Wilcox }
731366c37eSMatthew Wilcox 
page_free(struct page * p)741366c37eSMatthew Wilcox static void page_free(struct page *p)
751366c37eSMatthew Wilcox {
761366c37eSMatthew Wilcox 	call_rcu(&p->rcu, page_rcu_free);
771366c37eSMatthew Wilcox }
781366c37eSMatthew Wilcox 
find_get_pages(unsigned long start,unsigned int nr_pages,struct page ** pages)791366c37eSMatthew Wilcox static unsigned find_get_pages(unsigned long start,
801366c37eSMatthew Wilcox 			    unsigned int nr_pages, struct page **pages)
811366c37eSMatthew Wilcox {
82a332125fSMatthew Wilcox 	XA_STATE(xas, &mt_tree, start);
83a332125fSMatthew Wilcox 	struct page *page;
84a332125fSMatthew Wilcox 	unsigned int ret = 0;
851366c37eSMatthew Wilcox 
861366c37eSMatthew Wilcox 	rcu_read_lock();
87a332125fSMatthew Wilcox 	xas_for_each(&xas, page, ULONG_MAX) {
88a332125fSMatthew Wilcox 		if (xas_retry(&xas, page))
891366c37eSMatthew Wilcox 			continue;
901366c37eSMatthew Wilcox 
911366c37eSMatthew Wilcox 		pthread_mutex_lock(&page->lock);
92a332125fSMatthew Wilcox 		if (!page->count)
93a332125fSMatthew Wilcox 			goto unlock;
94a332125fSMatthew Wilcox 
951366c37eSMatthew Wilcox 		/* don't actually update page refcount */
961366c37eSMatthew Wilcox 		pthread_mutex_unlock(&page->lock);
971366c37eSMatthew Wilcox 
981366c37eSMatthew Wilcox 		/* Has the page moved? */
99a332125fSMatthew Wilcox 		if (unlikely(page != xas_reload(&xas)))
100a332125fSMatthew Wilcox 			goto put_page;
1011366c37eSMatthew Wilcox 
1021366c37eSMatthew Wilcox 		pages[ret] = page;
1031366c37eSMatthew Wilcox 		ret++;
104a332125fSMatthew Wilcox 		continue;
105a332125fSMatthew Wilcox unlock:
106a332125fSMatthew Wilcox 		pthread_mutex_unlock(&page->lock);
107a332125fSMatthew Wilcox put_page:
108a332125fSMatthew Wilcox 		xas_reset(&xas);
1091366c37eSMatthew Wilcox 	}
1101366c37eSMatthew Wilcox 	rcu_read_unlock();
1111366c37eSMatthew Wilcox 	return ret;
1121366c37eSMatthew Wilcox }
1131366c37eSMatthew Wilcox 
1141366c37eSMatthew Wilcox static pthread_barrier_t worker_barrier;
1151366c37eSMatthew Wilcox 
regression1_fn(void * arg)1161366c37eSMatthew Wilcox static void *regression1_fn(void *arg)
1171366c37eSMatthew Wilcox {
1181366c37eSMatthew Wilcox 	rcu_register_thread();
1191366c37eSMatthew Wilcox 
1201366c37eSMatthew Wilcox 	if (pthread_barrier_wait(&worker_barrier) ==
1211366c37eSMatthew Wilcox 			PTHREAD_BARRIER_SERIAL_THREAD) {
1221366c37eSMatthew Wilcox 		int j;
1231366c37eSMatthew Wilcox 
1241366c37eSMatthew Wilcox 		for (j = 0; j < 1000000; j++) {
1251366c37eSMatthew Wilcox 			struct page *p;
1261366c37eSMatthew Wilcox 
127a332125fSMatthew Wilcox 			p = page_alloc(0);
128372266baSMatthew Wilcox 			xa_lock(&mt_tree);
1291366c37eSMatthew Wilcox 			radix_tree_insert(&mt_tree, 0, p);
130372266baSMatthew Wilcox 			xa_unlock(&mt_tree);
1311366c37eSMatthew Wilcox 
132a332125fSMatthew Wilcox 			p = page_alloc(1);
133372266baSMatthew Wilcox 			xa_lock(&mt_tree);
1341366c37eSMatthew Wilcox 			radix_tree_insert(&mt_tree, 1, p);
135372266baSMatthew Wilcox 			xa_unlock(&mt_tree);
1361366c37eSMatthew Wilcox 
137372266baSMatthew Wilcox 			xa_lock(&mt_tree);
1381366c37eSMatthew Wilcox 			p = radix_tree_delete(&mt_tree, 1);
1391366c37eSMatthew Wilcox 			pthread_mutex_lock(&p->lock);
1401366c37eSMatthew Wilcox 			p->count--;
1411366c37eSMatthew Wilcox 			pthread_mutex_unlock(&p->lock);
142372266baSMatthew Wilcox 			xa_unlock(&mt_tree);
1431366c37eSMatthew Wilcox 			page_free(p);
1441366c37eSMatthew Wilcox 
145372266baSMatthew Wilcox 			xa_lock(&mt_tree);
1461366c37eSMatthew Wilcox 			p = radix_tree_delete(&mt_tree, 0);
1471366c37eSMatthew Wilcox 			pthread_mutex_lock(&p->lock);
1481366c37eSMatthew Wilcox 			p->count--;
1491366c37eSMatthew Wilcox 			pthread_mutex_unlock(&p->lock);
150372266baSMatthew Wilcox 			xa_unlock(&mt_tree);
1511366c37eSMatthew Wilcox 			page_free(p);
1521366c37eSMatthew Wilcox 		}
1531366c37eSMatthew Wilcox 	} else {
1541366c37eSMatthew Wilcox 		int j;
1551366c37eSMatthew Wilcox 
1561366c37eSMatthew Wilcox 		for (j = 0; j < 100000000; j++) {
1571366c37eSMatthew Wilcox 			struct page *pages[10];
1581366c37eSMatthew Wilcox 
1591366c37eSMatthew Wilcox 			find_get_pages(0, 10, pages);
1601366c37eSMatthew Wilcox 		}
1611366c37eSMatthew Wilcox 	}
1621366c37eSMatthew Wilcox 
1631366c37eSMatthew Wilcox 	rcu_unregister_thread();
1641366c37eSMatthew Wilcox 
1651366c37eSMatthew Wilcox 	return NULL;
1661366c37eSMatthew Wilcox }
1671366c37eSMatthew Wilcox 
1681366c37eSMatthew Wilcox static pthread_t *threads;
regression1_test(void)1691366c37eSMatthew Wilcox void regression1_test(void)
1701366c37eSMatthew Wilcox {
1711366c37eSMatthew Wilcox 	int nr_threads;
1721366c37eSMatthew Wilcox 	int i;
1731366c37eSMatthew Wilcox 	long arg;
1741366c37eSMatthew Wilcox 
1751366c37eSMatthew Wilcox 	/* Regression #1 */
17673bc029bSRehas Sachdeva 	printv(1, "running regression test 1, should finish in under a minute\n");
1771366c37eSMatthew Wilcox 	nr_threads = 2;
1781366c37eSMatthew Wilcox 	pthread_barrier_init(&worker_barrier, NULL, nr_threads);
1791366c37eSMatthew Wilcox 
180*cac7ea57SColin Ian King 	threads = malloc(nr_threads * sizeof(*threads));
1811366c37eSMatthew Wilcox 
1821366c37eSMatthew Wilcox 	for (i = 0; i < nr_threads; i++) {
1831366c37eSMatthew Wilcox 		arg = i;
1841366c37eSMatthew Wilcox 		if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
1851366c37eSMatthew Wilcox 			perror("pthread_create");
1861366c37eSMatthew Wilcox 			exit(1);
1871366c37eSMatthew Wilcox 		}
1881366c37eSMatthew Wilcox 	}
1891366c37eSMatthew Wilcox 
1901366c37eSMatthew Wilcox 	for (i = 0; i < nr_threads; i++) {
1911366c37eSMatthew Wilcox 		if (pthread_join(threads[i], NULL)) {
1921366c37eSMatthew Wilcox 			perror("pthread_join");
1931366c37eSMatthew Wilcox 			exit(1);
1941366c37eSMatthew Wilcox 		}
1951366c37eSMatthew Wilcox 	}
1961366c37eSMatthew Wilcox 
1971366c37eSMatthew Wilcox 	free(threads);
1981366c37eSMatthew Wilcox 
19973bc029bSRehas Sachdeva 	printv(1, "regression test 1, done\n");
2001366c37eSMatthew Wilcox }
201