1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Regression1
4  * Description:
5  * Salman Qazi describes the following radix-tree bug:
6  *
7  * In the following case, we get can get a deadlock:
8  *
9  * 0.  The radix tree contains two items, one has the index 0.
10  * 1.  The reader (in this case find_get_pages) takes the rcu_read_lock.
11  * 2.  The reader acquires slot(s) for item(s) including the index 0 item.
12  * 3.  The non-zero index item is deleted, and as a consequence the other item
13  *     is moved to the root of the tree. The place where it used to be is queued
14  *     for deletion after the readers finish.
15  * 3b. The zero item is deleted, removing it from the direct slot, it remains in
16  *     the rcu-delayed indirect node.
17  * 4.  The reader looks at the index 0 slot, and finds that the page has 0 ref
18  *     count
19  * 5.  The reader looks at it again, hoping that the item will either be freed
20  *     or the ref count will increase. This never happens, as the slot it is
21  *     looking at will never be updated. Also, this slot can never be reclaimed
22  *     because the reader is holding rcu_read_lock and is in an infinite loop.
23  *
24  * The fix is to re-use the same "indirect" pointer case that requires a slot
25  * lookup retry into a general "retry the lookup" bit.
26  *
27  * Running:
28  * This test should run to completion in a few seconds. The above bug would
29  * cause it to hang indefinitely.
30  *
31  * Upstream commit:
32  * Not yet
33  */
34 #include <linux/kernel.h>
35 #include <linux/gfp.h>
36 #include <linux/slab.h>
37 #include <linux/radix-tree.h>
38 #include <linux/rcupdate.h>
39 #include <stdlib.h>
40 #include <pthread.h>
41 #include <stdio.h>
42 #include <assert.h>
43 
44 #include "regression.h"
45 
46 static RADIX_TREE(mt_tree, GFP_KERNEL);
47 static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
48 
49 struct page {
50 	pthread_mutex_t lock;
51 	struct rcu_head rcu;
52 	int count;
53 	unsigned long index;
54 };
55 
56 static struct page *page_alloc(void)
57 {
58 	struct page *p;
59 	p = malloc(sizeof(struct page));
60 	p->count = 1;
61 	p->index = 1;
62 	pthread_mutex_init(&p->lock, NULL);
63 
64 	return p;
65 }
66 
67 static void page_rcu_free(struct rcu_head *rcu)
68 {
69 	struct page *p = container_of(rcu, struct page, rcu);
70 	assert(!p->count);
71 	pthread_mutex_destroy(&p->lock);
72 	free(p);
73 }
74 
75 static void page_free(struct page *p)
76 {
77 	call_rcu(&p->rcu, page_rcu_free);
78 }
79 
80 static unsigned find_get_pages(unsigned long start,
81 			    unsigned int nr_pages, struct page **pages)
82 {
83 	unsigned int i;
84 	unsigned int ret;
85 	unsigned int nr_found;
86 
87 	rcu_read_lock();
88 restart:
89 	nr_found = radix_tree_gang_lookup_slot(&mt_tree,
90 				(void ***)pages, NULL, start, nr_pages);
91 	ret = 0;
92 	for (i = 0; i < nr_found; i++) {
93 		struct page *page;
94 repeat:
95 		page = radix_tree_deref_slot((void **)pages[i]);
96 		if (unlikely(!page))
97 			continue;
98 
99 		if (radix_tree_exception(page)) {
100 			if (radix_tree_deref_retry(page)) {
101 				/*
102 				 * Transient condition which can only trigger
103 				 * when entry at index 0 moves out of or back
104 				 * to root: none yet gotten, safe to restart.
105 				 */
106 				assert((start | i) == 0);
107 				goto restart;
108 			}
109 			/*
110 			 * No exceptional entries are inserted in this test.
111 			 */
112 			assert(0);
113 		}
114 
115 		pthread_mutex_lock(&page->lock);
116 		if (!page->count) {
117 			pthread_mutex_unlock(&page->lock);
118 			goto repeat;
119 		}
120 		/* don't actually update page refcount */
121 		pthread_mutex_unlock(&page->lock);
122 
123 		/* Has the page moved? */
124 		if (unlikely(page != *((void **)pages[i]))) {
125 			goto repeat;
126 		}
127 
128 		pages[ret] = page;
129 		ret++;
130 	}
131 	rcu_read_unlock();
132 	return ret;
133 }
134 
135 static pthread_barrier_t worker_barrier;
136 
137 static void *regression1_fn(void *arg)
138 {
139 	rcu_register_thread();
140 
141 	if (pthread_barrier_wait(&worker_barrier) ==
142 			PTHREAD_BARRIER_SERIAL_THREAD) {
143 		int j;
144 
145 		for (j = 0; j < 1000000; j++) {
146 			struct page *p;
147 
148 			p = page_alloc();
149 			pthread_mutex_lock(&mt_lock);
150 			radix_tree_insert(&mt_tree, 0, p);
151 			pthread_mutex_unlock(&mt_lock);
152 
153 			p = page_alloc();
154 			pthread_mutex_lock(&mt_lock);
155 			radix_tree_insert(&mt_tree, 1, p);
156 			pthread_mutex_unlock(&mt_lock);
157 
158 			pthread_mutex_lock(&mt_lock);
159 			p = radix_tree_delete(&mt_tree, 1);
160 			pthread_mutex_lock(&p->lock);
161 			p->count--;
162 			pthread_mutex_unlock(&p->lock);
163 			pthread_mutex_unlock(&mt_lock);
164 			page_free(p);
165 
166 			pthread_mutex_lock(&mt_lock);
167 			p = radix_tree_delete(&mt_tree, 0);
168 			pthread_mutex_lock(&p->lock);
169 			p->count--;
170 			pthread_mutex_unlock(&p->lock);
171 			pthread_mutex_unlock(&mt_lock);
172 			page_free(p);
173 		}
174 	} else {
175 		int j;
176 
177 		for (j = 0; j < 100000000; j++) {
178 			struct page *pages[10];
179 
180 			find_get_pages(0, 10, pages);
181 		}
182 	}
183 
184 	rcu_unregister_thread();
185 
186 	return NULL;
187 }
188 
189 static pthread_t *threads;
190 void regression1_test(void)
191 {
192 	int nr_threads;
193 	int i;
194 	long arg;
195 
196 	/* Regression #1 */
197 	printv(1, "running regression test 1, should finish in under a minute\n");
198 	nr_threads = 2;
199 	pthread_barrier_init(&worker_barrier, NULL, nr_threads);
200 
201 	threads = malloc(nr_threads * sizeof(pthread_t *));
202 
203 	for (i = 0; i < nr_threads; i++) {
204 		arg = i;
205 		if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
206 			perror("pthread_create");
207 			exit(1);
208 		}
209 	}
210 
211 	for (i = 0; i < nr_threads; i++) {
212 		if (pthread_join(threads[i], NULL)) {
213 			perror("pthread_join");
214 			exit(1);
215 		}
216 	}
217 
218 	free(threads);
219 
220 	printv(1, "regression test 1, done\n");
221 }
222