1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21366c37eSMatthew Wilcox /*
31366c37eSMatthew Wilcox * Regression1
41366c37eSMatthew Wilcox * Description:
51366c37eSMatthew Wilcox * Salman Qazi describes the following radix-tree bug:
61366c37eSMatthew Wilcox *
71366c37eSMatthew Wilcox * In the following case, we get can get a deadlock:
81366c37eSMatthew Wilcox *
91366c37eSMatthew Wilcox * 0. The radix tree contains two items, one has the index 0.
101366c37eSMatthew Wilcox * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
111366c37eSMatthew Wilcox * 2. The reader acquires slot(s) for item(s) including the index 0 item.
121366c37eSMatthew Wilcox * 3. The non-zero index item is deleted, and as a consequence the other item
131366c37eSMatthew Wilcox * is moved to the root of the tree. The place where it used to be is queued
141366c37eSMatthew Wilcox * for deletion after the readers finish.
151366c37eSMatthew Wilcox * 3b. The zero item is deleted, removing it from the direct slot, it remains in
161366c37eSMatthew Wilcox * the rcu-delayed indirect node.
171366c37eSMatthew Wilcox * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
181366c37eSMatthew Wilcox * count
191366c37eSMatthew Wilcox * 5. The reader looks at it again, hoping that the item will either be freed
201366c37eSMatthew Wilcox * or the ref count will increase. This never happens, as the slot it is
211366c37eSMatthew Wilcox * looking at will never be updated. Also, this slot can never be reclaimed
221366c37eSMatthew Wilcox * because the reader is holding rcu_read_lock and is in an infinite loop.
231366c37eSMatthew Wilcox *
241366c37eSMatthew Wilcox * The fix is to re-use the same "indirect" pointer case that requires a slot
251366c37eSMatthew Wilcox * lookup retry into a general "retry the lookup" bit.
261366c37eSMatthew Wilcox *
271366c37eSMatthew Wilcox * Running:
281366c37eSMatthew Wilcox * This test should run to completion in a few seconds. The above bug would
291366c37eSMatthew Wilcox * cause it to hang indefinitely.
301366c37eSMatthew Wilcox *
311366c37eSMatthew Wilcox * Upstream commit:
321366c37eSMatthew Wilcox * Not yet
331366c37eSMatthew Wilcox */
341366c37eSMatthew Wilcox #include <linux/kernel.h>
351366c37eSMatthew Wilcox #include <linux/gfp.h>
361366c37eSMatthew Wilcox #include <linux/slab.h>
371366c37eSMatthew Wilcox #include <linux/radix-tree.h>
381366c37eSMatthew Wilcox #include <linux/rcupdate.h>
391366c37eSMatthew Wilcox #include <stdlib.h>
401366c37eSMatthew Wilcox #include <pthread.h>
411366c37eSMatthew Wilcox #include <stdio.h>
421366c37eSMatthew Wilcox #include <assert.h>
431366c37eSMatthew Wilcox
441366c37eSMatthew Wilcox #include "regression.h"
451366c37eSMatthew Wilcox
461366c37eSMatthew Wilcox static RADIX_TREE(mt_tree, GFP_KERNEL);
471366c37eSMatthew Wilcox
481366c37eSMatthew Wilcox struct page {
491366c37eSMatthew Wilcox pthread_mutex_t lock;
501366c37eSMatthew Wilcox struct rcu_head rcu;
511366c37eSMatthew Wilcox int count;
521366c37eSMatthew Wilcox unsigned long index;
531366c37eSMatthew Wilcox };
541366c37eSMatthew Wilcox
page_alloc(int index)55a332125fSMatthew Wilcox static struct page *page_alloc(int index)
561366c37eSMatthew Wilcox {
571366c37eSMatthew Wilcox struct page *p;
581366c37eSMatthew Wilcox p = malloc(sizeof(struct page));
591366c37eSMatthew Wilcox p->count = 1;
60a332125fSMatthew Wilcox p->index = index;
611366c37eSMatthew Wilcox pthread_mutex_init(&p->lock, NULL);
621366c37eSMatthew Wilcox
631366c37eSMatthew Wilcox return p;
641366c37eSMatthew Wilcox }
651366c37eSMatthew Wilcox
page_rcu_free(struct rcu_head * rcu)661366c37eSMatthew Wilcox static void page_rcu_free(struct rcu_head *rcu)
671366c37eSMatthew Wilcox {
681366c37eSMatthew Wilcox struct page *p = container_of(rcu, struct page, rcu);
691366c37eSMatthew Wilcox assert(!p->count);
701366c37eSMatthew Wilcox pthread_mutex_destroy(&p->lock);
711366c37eSMatthew Wilcox free(p);
721366c37eSMatthew Wilcox }
731366c37eSMatthew Wilcox
page_free(struct page * p)741366c37eSMatthew Wilcox static void page_free(struct page *p)
751366c37eSMatthew Wilcox {
761366c37eSMatthew Wilcox call_rcu(&p->rcu, page_rcu_free);
771366c37eSMatthew Wilcox }
781366c37eSMatthew Wilcox
find_get_pages(unsigned long start,unsigned int nr_pages,struct page ** pages)791366c37eSMatthew Wilcox static unsigned find_get_pages(unsigned long start,
801366c37eSMatthew Wilcox unsigned int nr_pages, struct page **pages)
811366c37eSMatthew Wilcox {
82a332125fSMatthew Wilcox XA_STATE(xas, &mt_tree, start);
83a332125fSMatthew Wilcox struct page *page;
84a332125fSMatthew Wilcox unsigned int ret = 0;
851366c37eSMatthew Wilcox
861366c37eSMatthew Wilcox rcu_read_lock();
87a332125fSMatthew Wilcox xas_for_each(&xas, page, ULONG_MAX) {
88a332125fSMatthew Wilcox if (xas_retry(&xas, page))
891366c37eSMatthew Wilcox continue;
901366c37eSMatthew Wilcox
911366c37eSMatthew Wilcox pthread_mutex_lock(&page->lock);
92a332125fSMatthew Wilcox if (!page->count)
93a332125fSMatthew Wilcox goto unlock;
94a332125fSMatthew Wilcox
951366c37eSMatthew Wilcox /* don't actually update page refcount */
961366c37eSMatthew Wilcox pthread_mutex_unlock(&page->lock);
971366c37eSMatthew Wilcox
981366c37eSMatthew Wilcox /* Has the page moved? */
99a332125fSMatthew Wilcox if (unlikely(page != xas_reload(&xas)))
100a332125fSMatthew Wilcox goto put_page;
1011366c37eSMatthew Wilcox
1021366c37eSMatthew Wilcox pages[ret] = page;
1031366c37eSMatthew Wilcox ret++;
104a332125fSMatthew Wilcox continue;
105a332125fSMatthew Wilcox unlock:
106a332125fSMatthew Wilcox pthread_mutex_unlock(&page->lock);
107a332125fSMatthew Wilcox put_page:
108a332125fSMatthew Wilcox xas_reset(&xas);
1091366c37eSMatthew Wilcox }
1101366c37eSMatthew Wilcox rcu_read_unlock();
1111366c37eSMatthew Wilcox return ret;
1121366c37eSMatthew Wilcox }
1131366c37eSMatthew Wilcox
1141366c37eSMatthew Wilcox static pthread_barrier_t worker_barrier;
1151366c37eSMatthew Wilcox
regression1_fn(void * arg)1161366c37eSMatthew Wilcox static void *regression1_fn(void *arg)
1171366c37eSMatthew Wilcox {
1181366c37eSMatthew Wilcox rcu_register_thread();
1191366c37eSMatthew Wilcox
1201366c37eSMatthew Wilcox if (pthread_barrier_wait(&worker_barrier) ==
1211366c37eSMatthew Wilcox PTHREAD_BARRIER_SERIAL_THREAD) {
1221366c37eSMatthew Wilcox int j;
1231366c37eSMatthew Wilcox
1241366c37eSMatthew Wilcox for (j = 0; j < 1000000; j++) {
1251366c37eSMatthew Wilcox struct page *p;
1261366c37eSMatthew Wilcox
127a332125fSMatthew Wilcox p = page_alloc(0);
128372266baSMatthew Wilcox xa_lock(&mt_tree);
1291366c37eSMatthew Wilcox radix_tree_insert(&mt_tree, 0, p);
130372266baSMatthew Wilcox xa_unlock(&mt_tree);
1311366c37eSMatthew Wilcox
132a332125fSMatthew Wilcox p = page_alloc(1);
133372266baSMatthew Wilcox xa_lock(&mt_tree);
1341366c37eSMatthew Wilcox radix_tree_insert(&mt_tree, 1, p);
135372266baSMatthew Wilcox xa_unlock(&mt_tree);
1361366c37eSMatthew Wilcox
137372266baSMatthew Wilcox xa_lock(&mt_tree);
1381366c37eSMatthew Wilcox p = radix_tree_delete(&mt_tree, 1);
1391366c37eSMatthew Wilcox pthread_mutex_lock(&p->lock);
1401366c37eSMatthew Wilcox p->count--;
1411366c37eSMatthew Wilcox pthread_mutex_unlock(&p->lock);
142372266baSMatthew Wilcox xa_unlock(&mt_tree);
1431366c37eSMatthew Wilcox page_free(p);
1441366c37eSMatthew Wilcox
145372266baSMatthew Wilcox xa_lock(&mt_tree);
1461366c37eSMatthew Wilcox p = radix_tree_delete(&mt_tree, 0);
1471366c37eSMatthew Wilcox pthread_mutex_lock(&p->lock);
1481366c37eSMatthew Wilcox p->count--;
1491366c37eSMatthew Wilcox pthread_mutex_unlock(&p->lock);
150372266baSMatthew Wilcox xa_unlock(&mt_tree);
1511366c37eSMatthew Wilcox page_free(p);
1521366c37eSMatthew Wilcox }
1531366c37eSMatthew Wilcox } else {
1541366c37eSMatthew Wilcox int j;
1551366c37eSMatthew Wilcox
1561366c37eSMatthew Wilcox for (j = 0; j < 100000000; j++) {
1571366c37eSMatthew Wilcox struct page *pages[10];
1581366c37eSMatthew Wilcox
1591366c37eSMatthew Wilcox find_get_pages(0, 10, pages);
1601366c37eSMatthew Wilcox }
1611366c37eSMatthew Wilcox }
1621366c37eSMatthew Wilcox
1631366c37eSMatthew Wilcox rcu_unregister_thread();
1641366c37eSMatthew Wilcox
1651366c37eSMatthew Wilcox return NULL;
1661366c37eSMatthew Wilcox }
1671366c37eSMatthew Wilcox
1681366c37eSMatthew Wilcox static pthread_t *threads;
regression1_test(void)1691366c37eSMatthew Wilcox void regression1_test(void)
1701366c37eSMatthew Wilcox {
1711366c37eSMatthew Wilcox int nr_threads;
1721366c37eSMatthew Wilcox int i;
1731366c37eSMatthew Wilcox long arg;
1741366c37eSMatthew Wilcox
1751366c37eSMatthew Wilcox /* Regression #1 */
17673bc029bSRehas Sachdeva printv(1, "running regression test 1, should finish in under a minute\n");
1771366c37eSMatthew Wilcox nr_threads = 2;
1781366c37eSMatthew Wilcox pthread_barrier_init(&worker_barrier, NULL, nr_threads);
1791366c37eSMatthew Wilcox
180*cac7ea57SColin Ian King threads = malloc(nr_threads * sizeof(*threads));
1811366c37eSMatthew Wilcox
1821366c37eSMatthew Wilcox for (i = 0; i < nr_threads; i++) {
1831366c37eSMatthew Wilcox arg = i;
1841366c37eSMatthew Wilcox if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
1851366c37eSMatthew Wilcox perror("pthread_create");
1861366c37eSMatthew Wilcox exit(1);
1871366c37eSMatthew Wilcox }
1881366c37eSMatthew Wilcox }
1891366c37eSMatthew Wilcox
1901366c37eSMatthew Wilcox for (i = 0; i < nr_threads; i++) {
1911366c37eSMatthew Wilcox if (pthread_join(threads[i], NULL)) {
1921366c37eSMatthew Wilcox perror("pthread_join");
1931366c37eSMatthew Wilcox exit(1);
1941366c37eSMatthew Wilcox }
1951366c37eSMatthew Wilcox }
1961366c37eSMatthew Wilcox
1971366c37eSMatthew Wilcox free(threads);
1981366c37eSMatthew Wilcox
19973bc029bSRehas Sachdeva printv(1, "regression test 1, done\n");
2001366c37eSMatthew Wilcox }
201