xref: /openbmc/linux/fs/btrfs/async-thread.c (revision 4a69a410)
18b712842SChris Mason /*
28b712842SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
38b712842SChris Mason  *
48b712842SChris Mason  * This program is free software; you can redistribute it and/or
58b712842SChris Mason  * modify it under the terms of the GNU General Public
68b712842SChris Mason  * License v2 as published by the Free Software Foundation.
78b712842SChris Mason  *
88b712842SChris Mason  * This program is distributed in the hope that it will be useful,
98b712842SChris Mason  * but WITHOUT ANY WARRANTY; without even the implied warranty of
108b712842SChris Mason  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
118b712842SChris Mason  * General Public License for more details.
128b712842SChris Mason  *
138b712842SChris Mason  * You should have received a copy of the GNU General Public
148b712842SChris Mason  * License along with this program; if not, write to the
158b712842SChris Mason  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
168b712842SChris Mason  * Boston, MA 021110-1307, USA.
178b712842SChris Mason  */
188b712842SChris Mason 
19d05e5a4dSChris Mason #include <linux/version.h>
208b712842SChris Mason #include <linux/kthread.h>
218b712842SChris Mason #include <linux/list.h>
228b712842SChris Mason #include <linux/spinlock.h>
238b712842SChris Mason # include <linux/freezer.h>
248b712842SChris Mason #include "async-thread.h"
258b712842SChris Mason 
264a69a410SChris Mason #define WORK_QUEUED_BIT 0
274a69a410SChris Mason #define WORK_DONE_BIT 1
284a69a410SChris Mason #define WORK_ORDER_DONE_BIT 2
294a69a410SChris Mason 
308b712842SChris Mason /*
318b712842SChris Mason  * container for the kthread task pointer and the list of pending work
328b712842SChris Mason  * One of these is allocated per thread.
338b712842SChris Mason  */
348b712842SChris Mason struct btrfs_worker_thread {
3535d8ba66SChris Mason 	/* pool we belong to */
3635d8ba66SChris Mason 	struct btrfs_workers *workers;
3735d8ba66SChris Mason 
388b712842SChris Mason 	/* list of struct btrfs_work that are waiting for service */
398b712842SChris Mason 	struct list_head pending;
408b712842SChris Mason 
418b712842SChris Mason 	/* list of worker threads from struct btrfs_workers */
428b712842SChris Mason 	struct list_head worker_list;
438b712842SChris Mason 
448b712842SChris Mason 	/* kthread */
458b712842SChris Mason 	struct task_struct *task;
468b712842SChris Mason 
478b712842SChris Mason 	/* number of things on the pending list */
488b712842SChris Mason 	atomic_t num_pending;
4953863232SChris Mason 
504854ddd0SChris Mason 	unsigned long sequence;
518b712842SChris Mason 
528b712842SChris Mason 	/* protects the pending list. */
538b712842SChris Mason 	spinlock_t lock;
548b712842SChris Mason 
558b712842SChris Mason 	/* set to non-zero when this thread is already awake and kicking */
568b712842SChris Mason 	int working;
5735d8ba66SChris Mason 
5835d8ba66SChris Mason 	/* are we currently idle */
5935d8ba66SChris Mason 	int idle;
608b712842SChris Mason };
618b712842SChris Mason 
628b712842SChris Mason /*
6335d8ba66SChris Mason  * helper function to move a thread onto the idle list after it
6435d8ba66SChris Mason  * has finished some requests.
6535d8ba66SChris Mason  */
6635d8ba66SChris Mason static void check_idle_worker(struct btrfs_worker_thread *worker)
6735d8ba66SChris Mason {
6835d8ba66SChris Mason 	if (!worker->idle && atomic_read(&worker->num_pending) <
6935d8ba66SChris Mason 	    worker->workers->idle_thresh / 2) {
7035d8ba66SChris Mason 		unsigned long flags;
7135d8ba66SChris Mason 		spin_lock_irqsave(&worker->workers->lock, flags);
7235d8ba66SChris Mason 		worker->idle = 1;
7335d8ba66SChris Mason 		list_move(&worker->worker_list, &worker->workers->idle_list);
7435d8ba66SChris Mason 		spin_unlock_irqrestore(&worker->workers->lock, flags);
7535d8ba66SChris Mason 	}
7635d8ba66SChris Mason }
7735d8ba66SChris Mason 
7835d8ba66SChris Mason /*
7935d8ba66SChris Mason  * helper function to move a thread off the idle list after new
8035d8ba66SChris Mason  * pending work is added.
8135d8ba66SChris Mason  */
8235d8ba66SChris Mason static void check_busy_worker(struct btrfs_worker_thread *worker)
8335d8ba66SChris Mason {
8435d8ba66SChris Mason 	if (worker->idle && atomic_read(&worker->num_pending) >=
8535d8ba66SChris Mason 	    worker->workers->idle_thresh) {
8635d8ba66SChris Mason 		unsigned long flags;
8735d8ba66SChris Mason 		spin_lock_irqsave(&worker->workers->lock, flags);
8835d8ba66SChris Mason 		worker->idle = 0;
8935d8ba66SChris Mason 		list_move_tail(&worker->worker_list,
9035d8ba66SChris Mason 			       &worker->workers->worker_list);
9135d8ba66SChris Mason 		spin_unlock_irqrestore(&worker->workers->lock, flags);
9235d8ba66SChris Mason 	}
9335d8ba66SChris Mason }
9435d8ba66SChris Mason 
954a69a410SChris Mason static noinline int run_ordered_completions(struct btrfs_workers *workers,
964a69a410SChris Mason 					    struct btrfs_work *work)
974a69a410SChris Mason {
984a69a410SChris Mason 	unsigned long flags;
994a69a410SChris Mason 
1004a69a410SChris Mason 	if (!workers->ordered)
1014a69a410SChris Mason 		return 0;
1024a69a410SChris Mason 
1034a69a410SChris Mason 	set_bit(WORK_DONE_BIT, &work->flags);
1044a69a410SChris Mason 
1054a69a410SChris Mason 	spin_lock_irqsave(&workers->lock, flags);
1064a69a410SChris Mason 
1074a69a410SChris Mason 	while(!list_empty(&workers->order_list)) {
1084a69a410SChris Mason 		work = list_entry(workers->order_list.next,
1094a69a410SChris Mason 				  struct btrfs_work, order_list);
1104a69a410SChris Mason 
1114a69a410SChris Mason 		if (!test_bit(WORK_DONE_BIT, &work->flags))
1124a69a410SChris Mason 			break;
1134a69a410SChris Mason 
1144a69a410SChris Mason 		/* we are going to call the ordered done function, but
1154a69a410SChris Mason 		 * we leave the work item on the list as a barrier so
1164a69a410SChris Mason 		 * that later work items that are done don't have their
1174a69a410SChris Mason 		 * functions called before this one returns
1184a69a410SChris Mason 		 */
1194a69a410SChris Mason 		if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
1204a69a410SChris Mason 			break;
1214a69a410SChris Mason 
1224a69a410SChris Mason 		spin_unlock_irqrestore(&workers->lock, flags);
1234a69a410SChris Mason 
1244a69a410SChris Mason 		work->ordered_func(work);
1254a69a410SChris Mason 
1264a69a410SChris Mason 		/* now take the lock again and call the freeing code */
1274a69a410SChris Mason 		spin_lock_irqsave(&workers->lock, flags);
1284a69a410SChris Mason 		list_del(&work->order_list);
1294a69a410SChris Mason 		work->ordered_free(work);
1304a69a410SChris Mason 	}
1314a69a410SChris Mason 
1324a69a410SChris Mason 	spin_unlock_irqrestore(&workers->lock, flags);
1334a69a410SChris Mason 	return 0;
1344a69a410SChris Mason }
1354a69a410SChris Mason 
13635d8ba66SChris Mason /*
1378b712842SChris Mason  * main loop for servicing work items
1388b712842SChris Mason  */
1398b712842SChris Mason static int worker_loop(void *arg)
1408b712842SChris Mason {
1418b712842SChris Mason 	struct btrfs_worker_thread *worker = arg;
1428b712842SChris Mason 	struct list_head *cur;
1438b712842SChris Mason 	struct btrfs_work *work;
1448b712842SChris Mason 	do {
1458b712842SChris Mason 		spin_lock_irq(&worker->lock);
1468b712842SChris Mason 		while(!list_empty(&worker->pending)) {
1478b712842SChris Mason 			cur = worker->pending.next;
1488b712842SChris Mason 			work = list_entry(cur, struct btrfs_work, list);
1498b712842SChris Mason 			list_del(&work->list);
1504a69a410SChris Mason 			clear_bit(WORK_QUEUED_BIT, &work->flags);
1518b712842SChris Mason 
1528b712842SChris Mason 			work->worker = worker;
1538b712842SChris Mason 			spin_unlock_irq(&worker->lock);
1548b712842SChris Mason 
1558b712842SChris Mason 			work->func(work);
1568b712842SChris Mason 
1578b712842SChris Mason 			atomic_dec(&worker->num_pending);
1584a69a410SChris Mason 			/*
1594a69a410SChris Mason 			 * unless this is an ordered work queue,
1604a69a410SChris Mason 			 * 'work' was probably freed by func above.
1614a69a410SChris Mason 			 */
1624a69a410SChris Mason 			run_ordered_completions(worker->workers, work);
1634a69a410SChris Mason 
1648b712842SChris Mason 			spin_lock_irq(&worker->lock);
16535d8ba66SChris Mason 			check_idle_worker(worker);
1664a69a410SChris Mason 
1678b712842SChris Mason 		}
1688b712842SChris Mason 		worker->working = 0;
1698b712842SChris Mason 		if (freezing(current)) {
1708b712842SChris Mason 			refrigerator();
1718b712842SChris Mason 		} else {
1728b712842SChris Mason 			set_current_state(TASK_INTERRUPTIBLE);
1738b712842SChris Mason 			spin_unlock_irq(&worker->lock);
1748b712842SChris Mason 			schedule();
1758b712842SChris Mason 			__set_current_state(TASK_RUNNING);
1768b712842SChris Mason 		}
1778b712842SChris Mason 	} while (!kthread_should_stop());
1788b712842SChris Mason 	return 0;
1798b712842SChris Mason }
1808b712842SChris Mason 
1818b712842SChris Mason /*
1828b712842SChris Mason  * this will wait for all the worker threads to shutdown
1838b712842SChris Mason  */
1848b712842SChris Mason int btrfs_stop_workers(struct btrfs_workers *workers)
1858b712842SChris Mason {
1868b712842SChris Mason 	struct list_head *cur;
1878b712842SChris Mason 	struct btrfs_worker_thread *worker;
1888b712842SChris Mason 
18935d8ba66SChris Mason 	list_splice_init(&workers->idle_list, &workers->worker_list);
1908b712842SChris Mason 	while(!list_empty(&workers->worker_list)) {
1918b712842SChris Mason 		cur = workers->worker_list.next;
1928b712842SChris Mason 		worker = list_entry(cur, struct btrfs_worker_thread,
1938b712842SChris Mason 				    worker_list);
1948b712842SChris Mason 		kthread_stop(worker->task);
1958b712842SChris Mason 		list_del(&worker->worker_list);
1968b712842SChris Mason 		kfree(worker);
1978b712842SChris Mason 	}
1988b712842SChris Mason 	return 0;
1998b712842SChris Mason }
2008b712842SChris Mason 
2018b712842SChris Mason /*
2028b712842SChris Mason  * simple init on struct btrfs_workers
2038b712842SChris Mason  */
2045443be45SChris Mason void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
2058b712842SChris Mason {
2068b712842SChris Mason 	workers->num_workers = 0;
2078b712842SChris Mason 	INIT_LIST_HEAD(&workers->worker_list);
20835d8ba66SChris Mason 	INIT_LIST_HEAD(&workers->idle_list);
2094a69a410SChris Mason 	INIT_LIST_HEAD(&workers->order_list);
2108b712842SChris Mason 	spin_lock_init(&workers->lock);
2118b712842SChris Mason 	workers->max_workers = max;
21261b49440SChris Mason 	workers->idle_thresh = 32;
2135443be45SChris Mason 	workers->name = name;
2144a69a410SChris Mason 	workers->ordered = 0;
2158b712842SChris Mason }
2168b712842SChris Mason 
2178b712842SChris Mason /*
2188b712842SChris Mason  * starts new worker threads.  This does not enforce the max worker
2198b712842SChris Mason  * count in case you need to temporarily go past it.
2208b712842SChris Mason  */
2218b712842SChris Mason int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
2228b712842SChris Mason {
2238b712842SChris Mason 	struct btrfs_worker_thread *worker;
2248b712842SChris Mason 	int ret = 0;
2258b712842SChris Mason 	int i;
2268b712842SChris Mason 
2278b712842SChris Mason 	for (i = 0; i < num_workers; i++) {
2288b712842SChris Mason 		worker = kzalloc(sizeof(*worker), GFP_NOFS);
2298b712842SChris Mason 		if (!worker) {
2308b712842SChris Mason 			ret = -ENOMEM;
2318b712842SChris Mason 			goto fail;
2328b712842SChris Mason 		}
2338b712842SChris Mason 
2348b712842SChris Mason 		INIT_LIST_HEAD(&worker->pending);
2358b712842SChris Mason 		INIT_LIST_HEAD(&worker->worker_list);
2368b712842SChris Mason 		spin_lock_init(&worker->lock);
2378b712842SChris Mason 		atomic_set(&worker->num_pending, 0);
2385443be45SChris Mason 		worker->task = kthread_run(worker_loop, worker,
2395443be45SChris Mason 					   "btrfs-%s-%d", workers->name,
2405443be45SChris Mason 					   workers->num_workers + i);
24135d8ba66SChris Mason 		worker->workers = workers;
2428b712842SChris Mason 		if (IS_ERR(worker->task)) {
2433bf10418SLi Zefan 			kfree(worker);
2448b712842SChris Mason 			ret = PTR_ERR(worker->task);
2458b712842SChris Mason 			goto fail;
2468b712842SChris Mason 		}
2478b712842SChris Mason 
2488b712842SChris Mason 		spin_lock_irq(&workers->lock);
24935d8ba66SChris Mason 		list_add_tail(&worker->worker_list, &workers->idle_list);
2504854ddd0SChris Mason 		worker->idle = 1;
2518b712842SChris Mason 		workers->num_workers++;
2528b712842SChris Mason 		spin_unlock_irq(&workers->lock);
2538b712842SChris Mason 	}
2548b712842SChris Mason 	return 0;
2558b712842SChris Mason fail:
2568b712842SChris Mason 	btrfs_stop_workers(workers);
2578b712842SChris Mason 	return ret;
2588b712842SChris Mason }
2598b712842SChris Mason 
2608b712842SChris Mason /*
2618b712842SChris Mason  * run through the list and find a worker thread that doesn't have a lot
2628b712842SChris Mason  * to do right now.  This can return null if we aren't yet at the thread
2638b712842SChris Mason  * count limit and all of the threads are busy.
2648b712842SChris Mason  */
2658b712842SChris Mason static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
2668b712842SChris Mason {
2678b712842SChris Mason 	struct btrfs_worker_thread *worker;
2688b712842SChris Mason 	struct list_head *next;
2698b712842SChris Mason 	int enforce_min = workers->num_workers < workers->max_workers;
2708b712842SChris Mason 
2718b712842SChris Mason 	/*
27235d8ba66SChris Mason 	 * if we find an idle thread, don't move it to the end of the
27335d8ba66SChris Mason 	 * idle list.  This improves the chance that the next submission
27435d8ba66SChris Mason 	 * will reuse the same thread, and maybe catch it while it is still
27535d8ba66SChris Mason 	 * working
2768b712842SChris Mason 	 */
27735d8ba66SChris Mason 	if (!list_empty(&workers->idle_list)) {
27835d8ba66SChris Mason 		next = workers->idle_list.next;
2798b712842SChris Mason 		worker = list_entry(next, struct btrfs_worker_thread,
2808b712842SChris Mason 				    worker_list);
28135d8ba66SChris Mason 		return worker;
2828b712842SChris Mason 	}
28335d8ba66SChris Mason 	if (enforce_min || list_empty(&workers->worker_list))
2848b712842SChris Mason 		return NULL;
28535d8ba66SChris Mason 
28635d8ba66SChris Mason 	/*
28735d8ba66SChris Mason 	 * if we pick a busy task, move the task to the end of the list.
288d352ac68SChris Mason 	 * hopefully this will keep things somewhat evenly balanced.
289d352ac68SChris Mason 	 * Do the move in batches based on the sequence number.  This groups
290d352ac68SChris Mason 	 * requests submitted at roughly the same time onto the same worker.
29135d8ba66SChris Mason 	 */
29235d8ba66SChris Mason 	next = workers->worker_list.next;
29335d8ba66SChris Mason 	worker = list_entry(next, struct btrfs_worker_thread, worker_list);
2944854ddd0SChris Mason 	atomic_inc(&worker->num_pending);
2954854ddd0SChris Mason 	worker->sequence++;
296d352ac68SChris Mason 
29753863232SChris Mason 	if (worker->sequence % workers->idle_thresh == 0)
29835d8ba66SChris Mason 		list_move_tail(next, &workers->worker_list);
2998b712842SChris Mason 	return worker;
3008b712842SChris Mason }
3018b712842SChris Mason 
302d352ac68SChris Mason /*
303d352ac68SChris Mason  * selects a worker thread to take the next job.  This will either find
304d352ac68SChris Mason  * an idle worker, start a new worker up to the max count, or just return
305d352ac68SChris Mason  * one of the existing busy workers.
306d352ac68SChris Mason  */
3078b712842SChris Mason static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
3088b712842SChris Mason {
3098b712842SChris Mason 	struct btrfs_worker_thread *worker;
3108b712842SChris Mason 	unsigned long flags;
3118b712842SChris Mason 
3128b712842SChris Mason again:
3138b712842SChris Mason 	spin_lock_irqsave(&workers->lock, flags);
3148b712842SChris Mason 	worker = next_worker(workers);
3158b712842SChris Mason 	spin_unlock_irqrestore(&workers->lock, flags);
3168b712842SChris Mason 
3178b712842SChris Mason 	if (!worker) {
3188b712842SChris Mason 		spin_lock_irqsave(&workers->lock, flags);
3198b712842SChris Mason 		if (workers->num_workers >= workers->max_workers) {
32035d8ba66SChris Mason 			struct list_head *fallback = NULL;
3218b712842SChris Mason 			/*
3228b712842SChris Mason 			 * we have failed to find any workers, just
3238b712842SChris Mason 			 * return the force one
3248b712842SChris Mason 			 */
32535d8ba66SChris Mason 			if (!list_empty(&workers->worker_list))
32635d8ba66SChris Mason 				fallback = workers->worker_list.next;
32735d8ba66SChris Mason 			if (!list_empty(&workers->idle_list))
32835d8ba66SChris Mason 				fallback = workers->idle_list.next;
32935d8ba66SChris Mason 			BUG_ON(!fallback);
33035d8ba66SChris Mason 			worker = list_entry(fallback,
3318b712842SChris Mason 				  struct btrfs_worker_thread, worker_list);
3328b712842SChris Mason 			spin_unlock_irqrestore(&workers->lock, flags);
3338b712842SChris Mason 		} else {
3348b712842SChris Mason 			spin_unlock_irqrestore(&workers->lock, flags);
3358b712842SChris Mason 			/* we're below the limit, start another worker */
3368b712842SChris Mason 			btrfs_start_workers(workers, 1);
3378b712842SChris Mason 			goto again;
3388b712842SChris Mason 		}
3398b712842SChris Mason 	}
3408b712842SChris Mason 	return worker;
3418b712842SChris Mason }
3428b712842SChris Mason 
3438b712842SChris Mason /*
3448b712842SChris Mason  * btrfs_requeue_work just puts the work item back on the tail of the list
3458b712842SChris Mason  * it was taken from.  It is intended for use with long running work functions
3468b712842SChris Mason  * that make some progress and want to give the cpu up for others.
3478b712842SChris Mason  */
3488b712842SChris Mason int btrfs_requeue_work(struct btrfs_work *work)
3498b712842SChris Mason {
3508b712842SChris Mason 	struct btrfs_worker_thread *worker = work->worker;
3518b712842SChris Mason 	unsigned long flags;
3528b712842SChris Mason 
3534a69a410SChris Mason 	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
3548b712842SChris Mason 		goto out;
3558b712842SChris Mason 
3568b712842SChris Mason 	spin_lock_irqsave(&worker->lock, flags);
3578b712842SChris Mason 	atomic_inc(&worker->num_pending);
3588b712842SChris Mason 	list_add_tail(&work->list, &worker->pending);
35975ccf47dSChris Mason 
36075ccf47dSChris Mason 	/* by definition we're busy, take ourselves off the idle
36175ccf47dSChris Mason 	 * list
36275ccf47dSChris Mason 	 */
36375ccf47dSChris Mason 	if (worker->idle) {
36475ccf47dSChris Mason 		spin_lock_irqsave(&worker->workers->lock, flags);
36575ccf47dSChris Mason 		worker->idle = 0;
36675ccf47dSChris Mason 		list_move_tail(&worker->worker_list,
36775ccf47dSChris Mason 			       &worker->workers->worker_list);
36875ccf47dSChris Mason 		spin_unlock_irqrestore(&worker->workers->lock, flags);
36975ccf47dSChris Mason 	}
37075ccf47dSChris Mason 
3718b712842SChris Mason 	spin_unlock_irqrestore(&worker->lock, flags);
37275ccf47dSChris Mason 
3738b712842SChris Mason out:
3748b712842SChris Mason 	return 0;
3758b712842SChris Mason }
3768b712842SChris Mason 
3778b712842SChris Mason /*
3788b712842SChris Mason  * places a struct btrfs_work into the pending queue of one of the kthreads
3798b712842SChris Mason  */
3808b712842SChris Mason int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
3818b712842SChris Mason {
3828b712842SChris Mason 	struct btrfs_worker_thread *worker;
3838b712842SChris Mason 	unsigned long flags;
3848b712842SChris Mason 	int wake = 0;
3858b712842SChris Mason 
3868b712842SChris Mason 	/* don't requeue something already on a list */
3874a69a410SChris Mason 	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
3888b712842SChris Mason 		goto out;
3898b712842SChris Mason 
3908b712842SChris Mason 	worker = find_worker(workers);
3914a69a410SChris Mason 	if (workers->ordered) {
3924a69a410SChris Mason 		spin_lock_irqsave(&workers->lock, flags);
3934a69a410SChris Mason 		list_add_tail(&work->order_list, &workers->order_list);
3944a69a410SChris Mason 		spin_unlock_irqrestore(&workers->lock, flags);
3954a69a410SChris Mason 	} else {
3964a69a410SChris Mason 		INIT_LIST_HEAD(&work->order_list);
3974a69a410SChris Mason 	}
3988b712842SChris Mason 
3998b712842SChris Mason 	spin_lock_irqsave(&worker->lock, flags);
4008b712842SChris Mason 	atomic_inc(&worker->num_pending);
40135d8ba66SChris Mason 	check_busy_worker(worker);
4028b712842SChris Mason 	list_add_tail(&work->list, &worker->pending);
4038b712842SChris Mason 
4048b712842SChris Mason 	/*
4058b712842SChris Mason 	 * avoid calling into wake_up_process if this thread has already
4068b712842SChris Mason 	 * been kicked
4078b712842SChris Mason 	 */
4088b712842SChris Mason 	if (!worker->working)
4098b712842SChris Mason 		wake = 1;
4108b712842SChris Mason 	worker->working = 1;
4118b712842SChris Mason 
4128b712842SChris Mason 	spin_unlock_irqrestore(&worker->lock, flags);
4138b712842SChris Mason 
4148b712842SChris Mason 	if (wake)
4158b712842SChris Mason 		wake_up_process(worker->task);
4168b712842SChris Mason out:
4178b712842SChris Mason 	return 0;
4188b712842SChris Mason }
419