xref: /openbmc/linux/fs/btrfs/async-thread.c (revision 5d99a998)
18b712842SChris Mason /*
28b712842SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
308a9ff32SQu Wenruo  * Copyright (C) 2014 Fujitsu.  All rights reserved.
48b712842SChris Mason  *
58b712842SChris Mason  * This program is free software; you can redistribute it and/or
68b712842SChris Mason  * modify it under the terms of the GNU General Public
78b712842SChris Mason  * License v2 as published by the Free Software Foundation.
88b712842SChris Mason  *
98b712842SChris Mason  * This program is distributed in the hope that it will be useful,
108b712842SChris Mason  * but WITHOUT ANY WARRANTY; without even the implied warranty of
118b712842SChris Mason  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
128b712842SChris Mason  * General Public License for more details.
138b712842SChris Mason  *
148b712842SChris Mason  * You should have received a copy of the GNU General Public
158b712842SChris Mason  * License along with this program; if not, write to the
168b712842SChris Mason  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
178b712842SChris Mason  * Boston, MA 021110-1307, USA.
188b712842SChris Mason  */
198b712842SChris Mason 
208b712842SChris Mason #include <linux/kthread.h>
215a0e3ad6STejun Heo #include <linux/slab.h>
228b712842SChris Mason #include <linux/list.h>
238b712842SChris Mason #include <linux/spinlock.h>
248b712842SChris Mason #include <linux/freezer.h>
258b712842SChris Mason #include "async-thread.h"
2652483bc2SQu Wenruo #include "ctree.h"
278b712842SChris Mason 
28a046e9c8SQu Wenruo #define WORK_DONE_BIT 0
29a046e9c8SQu Wenruo #define WORK_ORDER_DONE_BIT 1
30a046e9c8SQu Wenruo #define WORK_HIGH_PRIO_BIT 2
314a69a410SChris Mason 
320bd9289cSQu Wenruo #define NO_THRESHOLD (-1)
330bd9289cSQu Wenruo #define DFT_THRESHOLD (32)
340bd9289cSQu Wenruo 
35d458b054SQu Wenruo struct __btrfs_workqueue {
3608a9ff32SQu Wenruo 	struct workqueue_struct *normal_wq;
3708a9ff32SQu Wenruo 	/* List head pointing to ordered work list */
3808a9ff32SQu Wenruo 	struct list_head ordered_list;
3908a9ff32SQu Wenruo 
4008a9ff32SQu Wenruo 	/* Spinlock for ordered_list */
4108a9ff32SQu Wenruo 	spinlock_t list_lock;
420bd9289cSQu Wenruo 
430bd9289cSQu Wenruo 	/* Thresholding related variants */
440bd9289cSQu Wenruo 	atomic_t pending;
450bd9289cSQu Wenruo 	int max_active;
460bd9289cSQu Wenruo 	int current_max;
470bd9289cSQu Wenruo 	int thresh;
480bd9289cSQu Wenruo 	unsigned int count;
490bd9289cSQu Wenruo 	spinlock_t thres_lock;
5008a9ff32SQu Wenruo };
5108a9ff32SQu Wenruo 
52d458b054SQu Wenruo struct btrfs_workqueue {
53d458b054SQu Wenruo 	struct __btrfs_workqueue *normal;
54d458b054SQu Wenruo 	struct __btrfs_workqueue *high;
551ca08976SQu Wenruo };
561ca08976SQu Wenruo 
579e0af237SLiu Bo static void normal_work_helper(struct btrfs_work *work);
589e0af237SLiu Bo 
599e0af237SLiu Bo #define BTRFS_WORK_HELPER(name)					\
609e0af237SLiu Bo void btrfs_##name(struct work_struct *arg)				\
619e0af237SLiu Bo {									\
629e0af237SLiu Bo 	struct btrfs_work *work = container_of(arg, struct btrfs_work,	\
639e0af237SLiu Bo 					       normal_work);		\
649e0af237SLiu Bo 	normal_work_helper(work);					\
659e0af237SLiu Bo }
669e0af237SLiu Bo 
679e0af237SLiu Bo BTRFS_WORK_HELPER(worker_helper);
689e0af237SLiu Bo BTRFS_WORK_HELPER(delalloc_helper);
699e0af237SLiu Bo BTRFS_WORK_HELPER(flush_delalloc_helper);
709e0af237SLiu Bo BTRFS_WORK_HELPER(cache_helper);
719e0af237SLiu Bo BTRFS_WORK_HELPER(submit_helper);
729e0af237SLiu Bo BTRFS_WORK_HELPER(fixup_helper);
739e0af237SLiu Bo BTRFS_WORK_HELPER(endio_helper);
749e0af237SLiu Bo BTRFS_WORK_HELPER(endio_meta_helper);
759e0af237SLiu Bo BTRFS_WORK_HELPER(endio_meta_write_helper);
769e0af237SLiu Bo BTRFS_WORK_HELPER(endio_raid56_helper);
778b110e39SMiao Xie BTRFS_WORK_HELPER(endio_repair_helper);
789e0af237SLiu Bo BTRFS_WORK_HELPER(rmw_helper);
799e0af237SLiu Bo BTRFS_WORK_HELPER(endio_write_helper);
809e0af237SLiu Bo BTRFS_WORK_HELPER(freespace_write_helper);
819e0af237SLiu Bo BTRFS_WORK_HELPER(delayed_meta_helper);
829e0af237SLiu Bo BTRFS_WORK_HELPER(readahead_helper);
839e0af237SLiu Bo BTRFS_WORK_HELPER(qgroup_rescan_helper);
849e0af237SLiu Bo BTRFS_WORK_HELPER(extent_refs_helper);
859e0af237SLiu Bo BTRFS_WORK_HELPER(scrub_helper);
869e0af237SLiu Bo BTRFS_WORK_HELPER(scrubwrc_helper);
879e0af237SLiu Bo BTRFS_WORK_HELPER(scrubnc_helper);
889e0af237SLiu Bo 
899e0af237SLiu Bo static struct __btrfs_workqueue *
909e0af237SLiu Bo __btrfs_alloc_workqueue(const char *name, int flags, int max_active,
91c3a46891SQu Wenruo 			 int thresh)
9208a9ff32SQu Wenruo {
93d458b054SQu Wenruo 	struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
9408a9ff32SQu Wenruo 
955d99a998SDavid Sterba 	if (!ret)
9608a9ff32SQu Wenruo 		return NULL;
9708a9ff32SQu Wenruo 
980bd9289cSQu Wenruo 	ret->max_active = max_active;
990bd9289cSQu Wenruo 	atomic_set(&ret->pending, 0);
1000bd9289cSQu Wenruo 	if (thresh == 0)
1010bd9289cSQu Wenruo 		thresh = DFT_THRESHOLD;
1020bd9289cSQu Wenruo 	/* For low threshold, disabling threshold is a better choice */
1030bd9289cSQu Wenruo 	if (thresh < DFT_THRESHOLD) {
1040bd9289cSQu Wenruo 		ret->current_max = max_active;
1050bd9289cSQu Wenruo 		ret->thresh = NO_THRESHOLD;
1060bd9289cSQu Wenruo 	} else {
1070bd9289cSQu Wenruo 		ret->current_max = 1;
1080bd9289cSQu Wenruo 		ret->thresh = thresh;
1090bd9289cSQu Wenruo 	}
1100bd9289cSQu Wenruo 
1111ca08976SQu Wenruo 	if (flags & WQ_HIGHPRI)
1121ca08976SQu Wenruo 		ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
1130bd9289cSQu Wenruo 						 ret->max_active,
1140bd9289cSQu Wenruo 						 "btrfs", name);
1151ca08976SQu Wenruo 	else
1161ca08976SQu Wenruo 		ret->normal_wq = alloc_workqueue("%s-%s", flags,
1170bd9289cSQu Wenruo 						 ret->max_active, "btrfs",
1180bd9289cSQu Wenruo 						 name);
1195d99a998SDavid Sterba 	if (!ret->normal_wq) {
12008a9ff32SQu Wenruo 		kfree(ret);
12108a9ff32SQu Wenruo 		return NULL;
12208a9ff32SQu Wenruo 	}
12308a9ff32SQu Wenruo 
12408a9ff32SQu Wenruo 	INIT_LIST_HEAD(&ret->ordered_list);
12508a9ff32SQu Wenruo 	spin_lock_init(&ret->list_lock);
1260bd9289cSQu Wenruo 	spin_lock_init(&ret->thres_lock);
127c3a46891SQu Wenruo 	trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI);
12808a9ff32SQu Wenruo 	return ret;
12908a9ff32SQu Wenruo }
13008a9ff32SQu Wenruo 
1311ca08976SQu Wenruo static inline void
132d458b054SQu Wenruo __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
1331ca08976SQu Wenruo 
134c3a46891SQu Wenruo struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
1351ca08976SQu Wenruo 					      int flags,
1360bd9289cSQu Wenruo 					      int max_active,
1370bd9289cSQu Wenruo 					      int thresh)
1381ca08976SQu Wenruo {
139d458b054SQu Wenruo 	struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
1401ca08976SQu Wenruo 
1415d99a998SDavid Sterba 	if (!ret)
1421ca08976SQu Wenruo 		return NULL;
1431ca08976SQu Wenruo 
1441ca08976SQu Wenruo 	ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
1450bd9289cSQu Wenruo 					      max_active, thresh);
1465d99a998SDavid Sterba 	if (!ret->normal) {
1471ca08976SQu Wenruo 		kfree(ret);
1481ca08976SQu Wenruo 		return NULL;
1491ca08976SQu Wenruo 	}
1501ca08976SQu Wenruo 
1511ca08976SQu Wenruo 	if (flags & WQ_HIGHPRI) {
1520bd9289cSQu Wenruo 		ret->high = __btrfs_alloc_workqueue(name, flags, max_active,
1530bd9289cSQu Wenruo 						    thresh);
1545d99a998SDavid Sterba 		if (!ret->high) {
1551ca08976SQu Wenruo 			__btrfs_destroy_workqueue(ret->normal);
1561ca08976SQu Wenruo 			kfree(ret);
1571ca08976SQu Wenruo 			return NULL;
1581ca08976SQu Wenruo 		}
1591ca08976SQu Wenruo 	}
1601ca08976SQu Wenruo 	return ret;
1611ca08976SQu Wenruo }
1621ca08976SQu Wenruo 
1630bd9289cSQu Wenruo /*
1640bd9289cSQu Wenruo  * Hook for threshold which will be called in btrfs_queue_work.
1650bd9289cSQu Wenruo  * This hook WILL be called in IRQ handler context,
1660bd9289cSQu Wenruo  * so workqueue_set_max_active MUST NOT be called in this hook
1670bd9289cSQu Wenruo  */
168d458b054SQu Wenruo static inline void thresh_queue_hook(struct __btrfs_workqueue *wq)
1690bd9289cSQu Wenruo {
1700bd9289cSQu Wenruo 	if (wq->thresh == NO_THRESHOLD)
1710bd9289cSQu Wenruo 		return;
1720bd9289cSQu Wenruo 	atomic_inc(&wq->pending);
1730bd9289cSQu Wenruo }
1740bd9289cSQu Wenruo 
1750bd9289cSQu Wenruo /*
1760bd9289cSQu Wenruo  * Hook for threshold which will be called before executing the work,
1770bd9289cSQu Wenruo  * This hook is called in kthread content.
1780bd9289cSQu Wenruo  * So workqueue_set_max_active is called here.
1790bd9289cSQu Wenruo  */
180d458b054SQu Wenruo static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
1810bd9289cSQu Wenruo {
1820bd9289cSQu Wenruo 	int new_max_active;
1830bd9289cSQu Wenruo 	long pending;
1840bd9289cSQu Wenruo 	int need_change = 0;
1850bd9289cSQu Wenruo 
1860bd9289cSQu Wenruo 	if (wq->thresh == NO_THRESHOLD)
1870bd9289cSQu Wenruo 		return;
1880bd9289cSQu Wenruo 
1890bd9289cSQu Wenruo 	atomic_dec(&wq->pending);
1900bd9289cSQu Wenruo 	spin_lock(&wq->thres_lock);
1910bd9289cSQu Wenruo 	/*
1920bd9289cSQu Wenruo 	 * Use wq->count to limit the calling frequency of
1930bd9289cSQu Wenruo 	 * workqueue_set_max_active.
1940bd9289cSQu Wenruo 	 */
1950bd9289cSQu Wenruo 	wq->count++;
1960bd9289cSQu Wenruo 	wq->count %= (wq->thresh / 4);
1970bd9289cSQu Wenruo 	if (!wq->count)
1980bd9289cSQu Wenruo 		goto  out;
1990bd9289cSQu Wenruo 	new_max_active = wq->current_max;
2000bd9289cSQu Wenruo 
2010bd9289cSQu Wenruo 	/*
2020bd9289cSQu Wenruo 	 * pending may be changed later, but it's OK since we really
2030bd9289cSQu Wenruo 	 * don't need it so accurate to calculate new_max_active.
2040bd9289cSQu Wenruo 	 */
2050bd9289cSQu Wenruo 	pending = atomic_read(&wq->pending);
2060bd9289cSQu Wenruo 	if (pending > wq->thresh)
2070bd9289cSQu Wenruo 		new_max_active++;
2080bd9289cSQu Wenruo 	if (pending < wq->thresh / 2)
2090bd9289cSQu Wenruo 		new_max_active--;
2100bd9289cSQu Wenruo 	new_max_active = clamp_val(new_max_active, 1, wq->max_active);
2110bd9289cSQu Wenruo 	if (new_max_active != wq->current_max)  {
2120bd9289cSQu Wenruo 		need_change = 1;
2130bd9289cSQu Wenruo 		wq->current_max = new_max_active;
2140bd9289cSQu Wenruo 	}
2150bd9289cSQu Wenruo out:
2160bd9289cSQu Wenruo 	spin_unlock(&wq->thres_lock);
2170bd9289cSQu Wenruo 
2180bd9289cSQu Wenruo 	if (need_change) {
2190bd9289cSQu Wenruo 		workqueue_set_max_active(wq->normal_wq, wq->current_max);
2200bd9289cSQu Wenruo 	}
2210bd9289cSQu Wenruo }
2220bd9289cSQu Wenruo 
223d458b054SQu Wenruo static void run_ordered_work(struct __btrfs_workqueue *wq)
22408a9ff32SQu Wenruo {
22508a9ff32SQu Wenruo 	struct list_head *list = &wq->ordered_list;
226d458b054SQu Wenruo 	struct btrfs_work *work;
22708a9ff32SQu Wenruo 	spinlock_t *lock = &wq->list_lock;
22808a9ff32SQu Wenruo 	unsigned long flags;
22908a9ff32SQu Wenruo 
23008a9ff32SQu Wenruo 	while (1) {
23108a9ff32SQu Wenruo 		spin_lock_irqsave(lock, flags);
23208a9ff32SQu Wenruo 		if (list_empty(list))
23308a9ff32SQu Wenruo 			break;
234d458b054SQu Wenruo 		work = list_entry(list->next, struct btrfs_work,
23508a9ff32SQu Wenruo 				  ordered_list);
23608a9ff32SQu Wenruo 		if (!test_bit(WORK_DONE_BIT, &work->flags))
23708a9ff32SQu Wenruo 			break;
23808a9ff32SQu Wenruo 
23908a9ff32SQu Wenruo 		/*
24008a9ff32SQu Wenruo 		 * we are going to call the ordered done function, but
24108a9ff32SQu Wenruo 		 * we leave the work item on the list as a barrier so
24208a9ff32SQu Wenruo 		 * that later work items that are done don't have their
24308a9ff32SQu Wenruo 		 * functions called before this one returns
24408a9ff32SQu Wenruo 		 */
24508a9ff32SQu Wenruo 		if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
24608a9ff32SQu Wenruo 			break;
24752483bc2SQu Wenruo 		trace_btrfs_ordered_sched(work);
24808a9ff32SQu Wenruo 		spin_unlock_irqrestore(lock, flags);
24908a9ff32SQu Wenruo 		work->ordered_func(work);
25008a9ff32SQu Wenruo 
25108a9ff32SQu Wenruo 		/* now take the lock again and drop our item from the list */
25208a9ff32SQu Wenruo 		spin_lock_irqsave(lock, flags);
25308a9ff32SQu Wenruo 		list_del(&work->ordered_list);
25408a9ff32SQu Wenruo 		spin_unlock_irqrestore(lock, flags);
25508a9ff32SQu Wenruo 
25608a9ff32SQu Wenruo 		/*
25708a9ff32SQu Wenruo 		 * we don't want to call the ordered free functions
25808a9ff32SQu Wenruo 		 * with the lock held though
25908a9ff32SQu Wenruo 		 */
26008a9ff32SQu Wenruo 		work->ordered_free(work);
26152483bc2SQu Wenruo 		trace_btrfs_all_work_done(work);
26208a9ff32SQu Wenruo 	}
26308a9ff32SQu Wenruo 	spin_unlock_irqrestore(lock, flags);
26408a9ff32SQu Wenruo }
26508a9ff32SQu Wenruo 
2669e0af237SLiu Bo static void normal_work_helper(struct btrfs_work *work)
26708a9ff32SQu Wenruo {
268d458b054SQu Wenruo 	struct __btrfs_workqueue *wq;
26908a9ff32SQu Wenruo 	int need_order = 0;
27008a9ff32SQu Wenruo 
27108a9ff32SQu Wenruo 	/*
27208a9ff32SQu Wenruo 	 * We should not touch things inside work in the following cases:
27308a9ff32SQu Wenruo 	 * 1) after work->func() if it has no ordered_free
27408a9ff32SQu Wenruo 	 *    Since the struct is freed in work->func().
27508a9ff32SQu Wenruo 	 * 2) after setting WORK_DONE_BIT
27608a9ff32SQu Wenruo 	 *    The work may be freed in other threads almost instantly.
27708a9ff32SQu Wenruo 	 * So we save the needed things here.
27808a9ff32SQu Wenruo 	 */
27908a9ff32SQu Wenruo 	if (work->ordered_func)
28008a9ff32SQu Wenruo 		need_order = 1;
28108a9ff32SQu Wenruo 	wq = work->wq;
28208a9ff32SQu Wenruo 
28352483bc2SQu Wenruo 	trace_btrfs_work_sched(work);
2840bd9289cSQu Wenruo 	thresh_exec_hook(wq);
28508a9ff32SQu Wenruo 	work->func(work);
28608a9ff32SQu Wenruo 	if (need_order) {
28708a9ff32SQu Wenruo 		set_bit(WORK_DONE_BIT, &work->flags);
28808a9ff32SQu Wenruo 		run_ordered_work(wq);
28908a9ff32SQu Wenruo 	}
29052483bc2SQu Wenruo 	if (!need_order)
29152483bc2SQu Wenruo 		trace_btrfs_all_work_done(work);
29208a9ff32SQu Wenruo }
29308a9ff32SQu Wenruo 
2949e0af237SLiu Bo void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
2956db8914fSQu Wenruo 		     btrfs_func_t func,
2966db8914fSQu Wenruo 		     btrfs_func_t ordered_func,
2976db8914fSQu Wenruo 		     btrfs_func_t ordered_free)
29808a9ff32SQu Wenruo {
29908a9ff32SQu Wenruo 	work->func = func;
30008a9ff32SQu Wenruo 	work->ordered_func = ordered_func;
30108a9ff32SQu Wenruo 	work->ordered_free = ordered_free;
3029e0af237SLiu Bo 	INIT_WORK(&work->normal_work, uniq_func);
30308a9ff32SQu Wenruo 	INIT_LIST_HEAD(&work->ordered_list);
30408a9ff32SQu Wenruo 	work->flags = 0;
30508a9ff32SQu Wenruo }
30608a9ff32SQu Wenruo 
307d458b054SQu Wenruo static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
308d458b054SQu Wenruo 				      struct btrfs_work *work)
30908a9ff32SQu Wenruo {
31008a9ff32SQu Wenruo 	unsigned long flags;
31108a9ff32SQu Wenruo 
31208a9ff32SQu Wenruo 	work->wq = wq;
3130bd9289cSQu Wenruo 	thresh_queue_hook(wq);
31408a9ff32SQu Wenruo 	if (work->ordered_func) {
31508a9ff32SQu Wenruo 		spin_lock_irqsave(&wq->list_lock, flags);
31608a9ff32SQu Wenruo 		list_add_tail(&work->ordered_list, &wq->ordered_list);
31708a9ff32SQu Wenruo 		spin_unlock_irqrestore(&wq->list_lock, flags);
31808a9ff32SQu Wenruo 	}
31908a9ff32SQu Wenruo 	queue_work(wq->normal_wq, &work->normal_work);
32052483bc2SQu Wenruo 	trace_btrfs_work_queued(work);
32108a9ff32SQu Wenruo }
32208a9ff32SQu Wenruo 
323d458b054SQu Wenruo void btrfs_queue_work(struct btrfs_workqueue *wq,
324d458b054SQu Wenruo 		      struct btrfs_work *work)
3251ca08976SQu Wenruo {
326d458b054SQu Wenruo 	struct __btrfs_workqueue *dest_wq;
3271ca08976SQu Wenruo 
3281ca08976SQu Wenruo 	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high)
3291ca08976SQu Wenruo 		dest_wq = wq->high;
3301ca08976SQu Wenruo 	else
3311ca08976SQu Wenruo 		dest_wq = wq->normal;
3321ca08976SQu Wenruo 	__btrfs_queue_work(dest_wq, work);
3331ca08976SQu Wenruo }
3341ca08976SQu Wenruo 
3351ca08976SQu Wenruo static inline void
336d458b054SQu Wenruo __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq)
33708a9ff32SQu Wenruo {
33808a9ff32SQu Wenruo 	destroy_workqueue(wq->normal_wq);
339c3a46891SQu Wenruo 	trace_btrfs_workqueue_destroy(wq);
34008a9ff32SQu Wenruo 	kfree(wq);
34108a9ff32SQu Wenruo }
34208a9ff32SQu Wenruo 
343d458b054SQu Wenruo void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
3441ca08976SQu Wenruo {
3451ca08976SQu Wenruo 	if (!wq)
3461ca08976SQu Wenruo 		return;
3471ca08976SQu Wenruo 	if (wq->high)
3481ca08976SQu Wenruo 		__btrfs_destroy_workqueue(wq->high);
3491ca08976SQu Wenruo 	__btrfs_destroy_workqueue(wq->normal);
350ef66af10SFilipe Manana 	kfree(wq);
3511ca08976SQu Wenruo }
3521ca08976SQu Wenruo 
353d458b054SQu Wenruo void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
35408a9ff32SQu Wenruo {
355800ee224SSergei Trofimovich 	if (!wq)
356800ee224SSergei Trofimovich 		return;
3570bd9289cSQu Wenruo 	wq->normal->max_active = max;
3581ca08976SQu Wenruo 	if (wq->high)
3590bd9289cSQu Wenruo 		wq->high->max_active = max;
3601ca08976SQu Wenruo }
3611ca08976SQu Wenruo 
362d458b054SQu Wenruo void btrfs_set_work_high_priority(struct btrfs_work *work)
3631ca08976SQu Wenruo {
3641ca08976SQu Wenruo 	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
36508a9ff32SQu Wenruo }
366