1 /*
2  * Regression1
3  * Description:
4  * Salman Qazi describes the following radix-tree bug:
5  *
6  * In the following case, we get can get a deadlock:
7  *
8  * 0.  The radix tree contains two items, one has the index 0.
9  * 1.  The reader (in this case find_get_pages) takes the rcu_read_lock.
10  * 2.  The reader acquires slot(s) for item(s) including the index 0 item.
11  * 3.  The non-zero index item is deleted, and as a consequence the other item
12  *     is moved to the root of the tree. The place where it used to be is queued
13  *     for deletion after the readers finish.
14  * 3b. The zero item is deleted, removing it from the direct slot, it remains in
15  *     the rcu-delayed indirect node.
16  * 4.  The reader looks at the index 0 slot, and finds that the page has 0 ref
17  *     count
18  * 5.  The reader looks at it again, hoping that the item will either be freed
19  *     or the ref count will increase. This never happens, as the slot it is
20  *     looking at will never be updated. Also, this slot can never be reclaimed
21  *     because the reader is holding rcu_read_lock and is in an infinite loop.
22  *
23  * The fix is to re-use the same "indirect" pointer case that requires a slot
24  * lookup retry into a general "retry the lookup" bit.
25  *
26  * Running:
27  * This test should run to completion in a few seconds. The above bug would
28  * cause it to hang indefinitely.
29  *
30  * Upstream commit:
31  * Not yet
32  */
33 #include <linux/kernel.h>
34 #include <linux/gfp.h>
35 #include <linux/slab.h>
36 #include <linux/radix-tree.h>
37 #include <linux/rcupdate.h>
38 #include <stdlib.h>
39 #include <pthread.h>
40 #include <stdio.h>
41 #include <assert.h>
42 
43 #include "regression.h"
44 
45 static RADIX_TREE(mt_tree, GFP_KERNEL);
46 static pthread_mutex_t mt_lock;
47 
48 struct page {
49 	pthread_mutex_t lock;
50 	struct rcu_head rcu;
51 	int count;
52 	unsigned long index;
53 };
54 
55 static struct page *page_alloc(void)
56 {
57 	struct page *p;
58 	p = malloc(sizeof(struct page));
59 	p->count = 1;
60 	p->index = 1;
61 	pthread_mutex_init(&p->lock, NULL);
62 
63 	return p;
64 }
65 
66 static void page_rcu_free(struct rcu_head *rcu)
67 {
68 	struct page *p = container_of(rcu, struct page, rcu);
69 	assert(!p->count);
70 	pthread_mutex_destroy(&p->lock);
71 	free(p);
72 }
73 
74 static void page_free(struct page *p)
75 {
76 	call_rcu(&p->rcu, page_rcu_free);
77 }
78 
79 static unsigned find_get_pages(unsigned long start,
80 			    unsigned int nr_pages, struct page **pages)
81 {
82 	unsigned int i;
83 	unsigned int ret;
84 	unsigned int nr_found;
85 
86 	rcu_read_lock();
87 restart:
88 	nr_found = radix_tree_gang_lookup_slot(&mt_tree,
89 				(void ***)pages, NULL, start, nr_pages);
90 	ret = 0;
91 	for (i = 0; i < nr_found; i++) {
92 		struct page *page;
93 repeat:
94 		page = radix_tree_deref_slot((void **)pages[i]);
95 		if (unlikely(!page))
96 			continue;
97 
98 		if (radix_tree_exception(page)) {
99 			if (radix_tree_deref_retry(page)) {
100 				/*
101 				 * Transient condition which can only trigger
102 				 * when entry at index 0 moves out of or back
103 				 * to root: none yet gotten, safe to restart.
104 				 */
105 				assert((start | i) == 0);
106 				goto restart;
107 			}
108 			/*
109 			 * No exceptional entries are inserted in this test.
110 			 */
111 			assert(0);
112 		}
113 
114 		pthread_mutex_lock(&page->lock);
115 		if (!page->count) {
116 			pthread_mutex_unlock(&page->lock);
117 			goto repeat;
118 		}
119 		/* don't actually update page refcount */
120 		pthread_mutex_unlock(&page->lock);
121 
122 		/* Has the page moved? */
123 		if (unlikely(page != *((void **)pages[i]))) {
124 			goto repeat;
125 		}
126 
127 		pages[ret] = page;
128 		ret++;
129 	}
130 	rcu_read_unlock();
131 	return ret;
132 }
133 
134 static pthread_barrier_t worker_barrier;
135 
136 static void *regression1_fn(void *arg)
137 {
138 	rcu_register_thread();
139 
140 	if (pthread_barrier_wait(&worker_barrier) ==
141 			PTHREAD_BARRIER_SERIAL_THREAD) {
142 		int j;
143 
144 		for (j = 0; j < 1000000; j++) {
145 			struct page *p;
146 
147 			p = page_alloc();
148 			pthread_mutex_lock(&mt_lock);
149 			radix_tree_insert(&mt_tree, 0, p);
150 			pthread_mutex_unlock(&mt_lock);
151 
152 			p = page_alloc();
153 			pthread_mutex_lock(&mt_lock);
154 			radix_tree_insert(&mt_tree, 1, p);
155 			pthread_mutex_unlock(&mt_lock);
156 
157 			pthread_mutex_lock(&mt_lock);
158 			p = radix_tree_delete(&mt_tree, 1);
159 			pthread_mutex_lock(&p->lock);
160 			p->count--;
161 			pthread_mutex_unlock(&p->lock);
162 			pthread_mutex_unlock(&mt_lock);
163 			page_free(p);
164 
165 			pthread_mutex_lock(&mt_lock);
166 			p = radix_tree_delete(&mt_tree, 0);
167 			pthread_mutex_lock(&p->lock);
168 			p->count--;
169 			pthread_mutex_unlock(&p->lock);
170 			pthread_mutex_unlock(&mt_lock);
171 			page_free(p);
172 		}
173 	} else {
174 		int j;
175 
176 		for (j = 0; j < 100000000; j++) {
177 			struct page *pages[10];
178 
179 			find_get_pages(0, 10, pages);
180 		}
181 	}
182 
183 	rcu_unregister_thread();
184 
185 	return NULL;
186 }
187 
188 static pthread_t *threads;
189 void regression1_test(void)
190 {
191 	int nr_threads;
192 	int i;
193 	long arg;
194 
195 	/* Regression #1 */
196 	printf("running regression test 1, should finish in under a minute\n");
197 	nr_threads = 2;
198 	pthread_barrier_init(&worker_barrier, NULL, nr_threads);
199 
200 	threads = malloc(nr_threads * sizeof(pthread_t *));
201 
202 	for (i = 0; i < nr_threads; i++) {
203 		arg = i;
204 		if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
205 			perror("pthread_create");
206 			exit(1);
207 		}
208 	}
209 
210 	for (i = 0; i < nr_threads; i++) {
211 		if (pthread_join(threads[i], NULL)) {
212 			perror("pthread_join");
213 			exit(1);
214 		}
215 	}
216 
217 	free(threads);
218 
219 	printf("regression test 1, done\n");
220 }
221