xref: /openbmc/linux/fs/btrfs/async-thread.c (revision 0bd9289c)
18b712842SChris Mason /*
28b712842SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
308a9ff32SQu Wenruo  * Copyright (C) 2014 Fujitsu.  All rights reserved.
48b712842SChris Mason  *
58b712842SChris Mason  * This program is free software; you can redistribute it and/or
68b712842SChris Mason  * modify it under the terms of the GNU General Public
78b712842SChris Mason  * License v2 as published by the Free Software Foundation.
88b712842SChris Mason  *
98b712842SChris Mason  * This program is distributed in the hope that it will be useful,
108b712842SChris Mason  * but WITHOUT ANY WARRANTY; without even the implied warranty of
118b712842SChris Mason  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
128b712842SChris Mason  * General Public License for more details.
138b712842SChris Mason  *
148b712842SChris Mason  * You should have received a copy of the GNU General Public
158b712842SChris Mason  * License along with this program; if not, write to the
168b712842SChris Mason  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
178b712842SChris Mason  * Boston, MA 021110-1307, USA.
188b712842SChris Mason  */
198b712842SChris Mason 
208b712842SChris Mason #include <linux/kthread.h>
215a0e3ad6STejun Heo #include <linux/slab.h>
228b712842SChris Mason #include <linux/list.h>
238b712842SChris Mason #include <linux/spinlock.h>
248b712842SChris Mason #include <linux/freezer.h>
2508a9ff32SQu Wenruo #include <linux/workqueue.h>
268b712842SChris Mason #include "async-thread.h"
278b712842SChris Mason 
284a69a410SChris Mason #define WORK_QUEUED_BIT 0
294a69a410SChris Mason #define WORK_DONE_BIT 1
304a69a410SChris Mason #define WORK_ORDER_DONE_BIT 2
31d313d7a3SChris Mason #define WORK_HIGH_PRIO_BIT 3
324a69a410SChris Mason 
330bd9289cSQu Wenruo #define NO_THRESHOLD (-1)
340bd9289cSQu Wenruo #define DFT_THRESHOLD (32)
350bd9289cSQu Wenruo 
368b712842SChris Mason /*
378b712842SChris Mason  * container for the kthread task pointer and the list of pending work
388b712842SChris Mason  * One of these is allocated per thread.
398b712842SChris Mason  */
408b712842SChris Mason struct btrfs_worker_thread {
4135d8ba66SChris Mason 	/* pool we belong to */
4235d8ba66SChris Mason 	struct btrfs_workers *workers;
4335d8ba66SChris Mason 
448b712842SChris Mason 	/* list of struct btrfs_work that are waiting for service */
458b712842SChris Mason 	struct list_head pending;
46d313d7a3SChris Mason 	struct list_head prio_pending;
478b712842SChris Mason 
488b712842SChris Mason 	/* list of worker threads from struct btrfs_workers */
498b712842SChris Mason 	struct list_head worker_list;
508b712842SChris Mason 
518b712842SChris Mason 	/* kthread */
528b712842SChris Mason 	struct task_struct *task;
538b712842SChris Mason 
548b712842SChris Mason 	/* number of things on the pending list */
558b712842SChris Mason 	atomic_t num_pending;
5653863232SChris Mason 
579042846bSChris Mason 	/* reference counter for this struct */
589042846bSChris Mason 	atomic_t refs;
599042846bSChris Mason 
604854ddd0SChris Mason 	unsigned long sequence;
618b712842SChris Mason 
628b712842SChris Mason 	/* protects the pending list. */
638b712842SChris Mason 	spinlock_t lock;
648b712842SChris Mason 
658b712842SChris Mason 	/* set to non-zero when this thread is already awake and kicking */
668b712842SChris Mason 	int working;
6735d8ba66SChris Mason 
6835d8ba66SChris Mason 	/* are we currently idle */
6935d8ba66SChris Mason 	int idle;
708b712842SChris Mason };
718b712842SChris Mason 
720dc3b84aSJosef Bacik static int __btrfs_start_workers(struct btrfs_workers *workers);
730dc3b84aSJosef Bacik 
748b712842SChris Mason /*
7561d92c32SChris Mason  * btrfs_start_workers uses kthread_run, which can block waiting for memory
7661d92c32SChris Mason  * for a very long time.  It will actually throttle on page writeback,
7761d92c32SChris Mason  * and so it may not make progress until after our btrfs worker threads
7861d92c32SChris Mason  * process all of the pending work structs in their queue
7961d92c32SChris Mason  *
8061d92c32SChris Mason  * This means we can't use btrfs_start_workers from inside a btrfs worker
8161d92c32SChris Mason  * thread that is used as part of cleaning dirty memory, which pretty much
8261d92c32SChris Mason  * involves all of the worker threads.
8361d92c32SChris Mason  *
8461d92c32SChris Mason  * Instead we have a helper queue who never has more than one thread
8561d92c32SChris Mason  * where we scheduler thread start operations.  This worker_start struct
8661d92c32SChris Mason  * is used to contain the work and hold a pointer to the queue that needs
8761d92c32SChris Mason  * another worker.
8861d92c32SChris Mason  */
8961d92c32SChris Mason struct worker_start {
9061d92c32SChris Mason 	struct btrfs_work work;
9161d92c32SChris Mason 	struct btrfs_workers *queue;
9261d92c32SChris Mason };
9361d92c32SChris Mason 
9461d92c32SChris Mason static void start_new_worker_func(struct btrfs_work *work)
9561d92c32SChris Mason {
9661d92c32SChris Mason 	struct worker_start *start;
9761d92c32SChris Mason 	start = container_of(work, struct worker_start, work);
980dc3b84aSJosef Bacik 	__btrfs_start_workers(start->queue);
9961d92c32SChris Mason 	kfree(start);
10061d92c32SChris Mason }
10161d92c32SChris Mason 
10261d92c32SChris Mason /*
10335d8ba66SChris Mason  * helper function to move a thread onto the idle list after it
10435d8ba66SChris Mason  * has finished some requests.
10535d8ba66SChris Mason  */
10635d8ba66SChris Mason static void check_idle_worker(struct btrfs_worker_thread *worker)
10735d8ba66SChris Mason {
10835d8ba66SChris Mason 	if (!worker->idle && atomic_read(&worker->num_pending) <
10935d8ba66SChris Mason 	    worker->workers->idle_thresh / 2) {
11035d8ba66SChris Mason 		unsigned long flags;
11135d8ba66SChris Mason 		spin_lock_irqsave(&worker->workers->lock, flags);
11235d8ba66SChris Mason 		worker->idle = 1;
1133e99d8ebSChris Mason 
1143e99d8ebSChris Mason 		/* the list may be empty if the worker is just starting */
115964fb15aSIlya Dryomov 		if (!list_empty(&worker->worker_list) &&
116964fb15aSIlya Dryomov 		    !worker->workers->stopping) {
1173e99d8ebSChris Mason 			list_move(&worker->worker_list,
1183e99d8ebSChris Mason 				 &worker->workers->idle_list);
1193e99d8ebSChris Mason 		}
12035d8ba66SChris Mason 		spin_unlock_irqrestore(&worker->workers->lock, flags);
12135d8ba66SChris Mason 	}
12235d8ba66SChris Mason }
12335d8ba66SChris Mason 
12435d8ba66SChris Mason /*
12535d8ba66SChris Mason  * helper function to move a thread off the idle list after new
12635d8ba66SChris Mason  * pending work is added.
12735d8ba66SChris Mason  */
12835d8ba66SChris Mason static void check_busy_worker(struct btrfs_worker_thread *worker)
12935d8ba66SChris Mason {
13035d8ba66SChris Mason 	if (worker->idle && atomic_read(&worker->num_pending) >=
13135d8ba66SChris Mason 	    worker->workers->idle_thresh) {
13235d8ba66SChris Mason 		unsigned long flags;
13335d8ba66SChris Mason 		spin_lock_irqsave(&worker->workers->lock, flags);
13435d8ba66SChris Mason 		worker->idle = 0;
1353e99d8ebSChris Mason 
136964fb15aSIlya Dryomov 		if (!list_empty(&worker->worker_list) &&
137964fb15aSIlya Dryomov 		    !worker->workers->stopping) {
13835d8ba66SChris Mason 			list_move_tail(&worker->worker_list,
13935d8ba66SChris Mason 				      &worker->workers->worker_list);
1403e99d8ebSChris Mason 		}
14135d8ba66SChris Mason 		spin_unlock_irqrestore(&worker->workers->lock, flags);
14235d8ba66SChris Mason 	}
14335d8ba66SChris Mason }
14435d8ba66SChris Mason 
1459042846bSChris Mason static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
1469042846bSChris Mason {
1479042846bSChris Mason 	struct btrfs_workers *workers = worker->workers;
1480dc3b84aSJosef Bacik 	struct worker_start *start;
1499042846bSChris Mason 	unsigned long flags;
1509042846bSChris Mason 
1519042846bSChris Mason 	rmb();
1529042846bSChris Mason 	if (!workers->atomic_start_pending)
1539042846bSChris Mason 		return;
1549042846bSChris Mason 
1550dc3b84aSJosef Bacik 	start = kzalloc(sizeof(*start), GFP_NOFS);
1560dc3b84aSJosef Bacik 	if (!start)
1570dc3b84aSJosef Bacik 		return;
1580dc3b84aSJosef Bacik 
1590dc3b84aSJosef Bacik 	start->work.func = start_new_worker_func;
1600dc3b84aSJosef Bacik 	start->queue = workers;
1610dc3b84aSJosef Bacik 
1629042846bSChris Mason 	spin_lock_irqsave(&workers->lock, flags);
1639042846bSChris Mason 	if (!workers->atomic_start_pending)
1649042846bSChris Mason 		goto out;
1659042846bSChris Mason 
1669042846bSChris Mason 	workers->atomic_start_pending = 0;
16761d92c32SChris Mason 	if (workers->num_workers + workers->num_workers_starting >=
16861d92c32SChris Mason 	    workers->max_workers)
1699042846bSChris Mason 		goto out;
1709042846bSChris Mason 
17161d92c32SChris Mason 	workers->num_workers_starting += 1;
1729042846bSChris Mason 	spin_unlock_irqrestore(&workers->lock, flags);
1730dc3b84aSJosef Bacik 	btrfs_queue_worker(workers->atomic_worker_start, &start->work);
1749042846bSChris Mason 	return;
1759042846bSChris Mason 
1769042846bSChris Mason out:
1770dc3b84aSJosef Bacik 	kfree(start);
1789042846bSChris Mason 	spin_unlock_irqrestore(&workers->lock, flags);
1799042846bSChris Mason }
1809042846bSChris Mason 
181143bede5SJeff Mahoney static noinline void run_ordered_completions(struct btrfs_workers *workers,
1824a69a410SChris Mason 					    struct btrfs_work *work)
1834a69a410SChris Mason {
1844a69a410SChris Mason 	if (!workers->ordered)
185143bede5SJeff Mahoney 		return;
1864a69a410SChris Mason 
1874a69a410SChris Mason 	set_bit(WORK_DONE_BIT, &work->flags);
1884a69a410SChris Mason 
1894e3f9c50SChris Mason 	spin_lock(&workers->order_lock);
1904a69a410SChris Mason 
191d313d7a3SChris Mason 	while (1) {
192d313d7a3SChris Mason 		if (!list_empty(&workers->prio_order_list)) {
193d313d7a3SChris Mason 			work = list_entry(workers->prio_order_list.next,
194d313d7a3SChris Mason 					  struct btrfs_work, order_list);
195d313d7a3SChris Mason 		} else if (!list_empty(&workers->order_list)) {
1964a69a410SChris Mason 			work = list_entry(workers->order_list.next,
1974a69a410SChris Mason 					  struct btrfs_work, order_list);
198d313d7a3SChris Mason 		} else {
199d313d7a3SChris Mason 			break;
200d313d7a3SChris Mason 		}
2014a69a410SChris Mason 		if (!test_bit(WORK_DONE_BIT, &work->flags))
2024a69a410SChris Mason 			break;
2034a69a410SChris Mason 
2044a69a410SChris Mason 		/* we are going to call the ordered done function, but
2054a69a410SChris Mason 		 * we leave the work item on the list as a barrier so
2064a69a410SChris Mason 		 * that later work items that are done don't have their
2074a69a410SChris Mason 		 * functions called before this one returns
2084a69a410SChris Mason 		 */
2094a69a410SChris Mason 		if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
2104a69a410SChris Mason 			break;
2114a69a410SChris Mason 
2124e3f9c50SChris Mason 		spin_unlock(&workers->order_lock);
2134a69a410SChris Mason 
2144a69a410SChris Mason 		work->ordered_func(work);
2154a69a410SChris Mason 
216e9fbcb42SChris Mason 		/* now take the lock again and drop our item from the list */
2174e3f9c50SChris Mason 		spin_lock(&workers->order_lock);
2184a69a410SChris Mason 		list_del(&work->order_list);
219e9fbcb42SChris Mason 		spin_unlock(&workers->order_lock);
220e9fbcb42SChris Mason 
221e9fbcb42SChris Mason 		/*
222e9fbcb42SChris Mason 		 * we don't want to call the ordered free functions
223e9fbcb42SChris Mason 		 * with the lock held though
224e9fbcb42SChris Mason 		 */
2254a69a410SChris Mason 		work->ordered_free(work);
226e9fbcb42SChris Mason 		spin_lock(&workers->order_lock);
2274a69a410SChris Mason 	}
2284a69a410SChris Mason 
2294e3f9c50SChris Mason 	spin_unlock(&workers->order_lock);
2304a69a410SChris Mason }
2314a69a410SChris Mason 
2329042846bSChris Mason static void put_worker(struct btrfs_worker_thread *worker)
2339042846bSChris Mason {
2349042846bSChris Mason 	if (atomic_dec_and_test(&worker->refs))
2359042846bSChris Mason 		kfree(worker);
2369042846bSChris Mason }
2379042846bSChris Mason 
2389042846bSChris Mason static int try_worker_shutdown(struct btrfs_worker_thread *worker)
2399042846bSChris Mason {
2409042846bSChris Mason 	int freeit = 0;
2419042846bSChris Mason 
2429042846bSChris Mason 	spin_lock_irq(&worker->lock);
243627e421aSChris Mason 	spin_lock(&worker->workers->lock);
2449042846bSChris Mason 	if (worker->workers->num_workers > 1 &&
2459042846bSChris Mason 	    worker->idle &&
2469042846bSChris Mason 	    !worker->working &&
2479042846bSChris Mason 	    !list_empty(&worker->worker_list) &&
2489042846bSChris Mason 	    list_empty(&worker->prio_pending) &&
2496e74057cSChris Mason 	    list_empty(&worker->pending) &&
2506e74057cSChris Mason 	    atomic_read(&worker->num_pending) == 0) {
2519042846bSChris Mason 		freeit = 1;
2529042846bSChris Mason 		list_del_init(&worker->worker_list);
2539042846bSChris Mason 		worker->workers->num_workers--;
2549042846bSChris Mason 	}
255627e421aSChris Mason 	spin_unlock(&worker->workers->lock);
2569042846bSChris Mason 	spin_unlock_irq(&worker->lock);
2579042846bSChris Mason 
2589042846bSChris Mason 	if (freeit)
2599042846bSChris Mason 		put_worker(worker);
2609042846bSChris Mason 	return freeit;
2619042846bSChris Mason }
2629042846bSChris Mason 
2634f878e84SChris Mason static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
2644f878e84SChris Mason 					struct list_head *prio_head,
2654f878e84SChris Mason 					struct list_head *head)
2664f878e84SChris Mason {
2674f878e84SChris Mason 	struct btrfs_work *work = NULL;
2684f878e84SChris Mason 	struct list_head *cur = NULL;
2694f878e84SChris Mason 
27051b98effSStanislaw Gruszka 	if (!list_empty(prio_head)) {
2714f878e84SChris Mason 		cur = prio_head->next;
27251b98effSStanislaw Gruszka 		goto out;
27351b98effSStanislaw Gruszka 	}
2744f878e84SChris Mason 
2754f878e84SChris Mason 	smp_mb();
2764f878e84SChris Mason 	if (!list_empty(&worker->prio_pending))
2774f878e84SChris Mason 		goto refill;
2784f878e84SChris Mason 
27951b98effSStanislaw Gruszka 	if (!list_empty(head)) {
2804f878e84SChris Mason 		cur = head->next;
2814f878e84SChris Mason 		goto out;
28251b98effSStanislaw Gruszka 	}
2834f878e84SChris Mason 
2844f878e84SChris Mason refill:
2854f878e84SChris Mason 	spin_lock_irq(&worker->lock);
2864f878e84SChris Mason 	list_splice_tail_init(&worker->prio_pending, prio_head);
2874f878e84SChris Mason 	list_splice_tail_init(&worker->pending, head);
2884f878e84SChris Mason 
2894f878e84SChris Mason 	if (!list_empty(prio_head))
2904f878e84SChris Mason 		cur = prio_head->next;
2914f878e84SChris Mason 	else if (!list_empty(head))
2924f878e84SChris Mason 		cur = head->next;
2934f878e84SChris Mason 	spin_unlock_irq(&worker->lock);
2944f878e84SChris Mason 
2954f878e84SChris Mason 	if (!cur)
2964f878e84SChris Mason 		goto out_fail;
2974f878e84SChris Mason 
2984f878e84SChris Mason out:
2994f878e84SChris Mason 	work = list_entry(cur, struct btrfs_work, list);
3004f878e84SChris Mason 
3014f878e84SChris Mason out_fail:
3024f878e84SChris Mason 	return work;
3034f878e84SChris Mason }
3044f878e84SChris Mason 
30535d8ba66SChris Mason /*
3068b712842SChris Mason  * main loop for servicing work items
3078b712842SChris Mason  */
3088b712842SChris Mason static int worker_loop(void *arg)
3098b712842SChris Mason {
3108b712842SChris Mason 	struct btrfs_worker_thread *worker = arg;
3114f878e84SChris Mason 	struct list_head head;
3124f878e84SChris Mason 	struct list_head prio_head;
3138b712842SChris Mason 	struct btrfs_work *work;
3144f878e84SChris Mason 
3154f878e84SChris Mason 	INIT_LIST_HEAD(&head);
3164f878e84SChris Mason 	INIT_LIST_HEAD(&prio_head);
3174f878e84SChris Mason 
3188b712842SChris Mason 	do {
3194f878e84SChris Mason again:
320d313d7a3SChris Mason 		while (1) {
3214f878e84SChris Mason 
3224f878e84SChris Mason 
3234f878e84SChris Mason 			work = get_next_work(worker, &prio_head, &head);
3244f878e84SChris Mason 			if (!work)
325d313d7a3SChris Mason 				break;
326d313d7a3SChris Mason 
3278b712842SChris Mason 			list_del(&work->list);
3284a69a410SChris Mason 			clear_bit(WORK_QUEUED_BIT, &work->flags);
3298b712842SChris Mason 
3308b712842SChris Mason 			work->worker = worker;
3318b712842SChris Mason 
3328b712842SChris Mason 			work->func(work);
3338b712842SChris Mason 
3348b712842SChris Mason 			atomic_dec(&worker->num_pending);
3354a69a410SChris Mason 			/*
3364a69a410SChris Mason 			 * unless this is an ordered work queue,
3374a69a410SChris Mason 			 * 'work' was probably freed by func above.
3384a69a410SChris Mason 			 */
3394a69a410SChris Mason 			run_ordered_completions(worker->workers, work);
3404a69a410SChris Mason 
3419042846bSChris Mason 			check_pending_worker_creates(worker);
3428f3b65a3SChris Mason 			cond_resched();
3434f878e84SChris Mason 		}
3444f878e84SChris Mason 
3458b712842SChris Mason 		spin_lock_irq(&worker->lock);
34635d8ba66SChris Mason 		check_idle_worker(worker);
3474f878e84SChris Mason 
3488b712842SChris Mason 		if (freezing(current)) {
349b51912c9SChris Mason 			worker->working = 0;
350b51912c9SChris Mason 			spin_unlock_irq(&worker->lock);
351a0acae0eSTejun Heo 			try_to_freeze();
3528b712842SChris Mason 		} else {
3538b712842SChris Mason 			spin_unlock_irq(&worker->lock);
354b51912c9SChris Mason 			if (!kthread_should_stop()) {
355b51912c9SChris Mason 				cpu_relax();
356b51912c9SChris Mason 				/*
357b51912c9SChris Mason 				 * we've dropped the lock, did someone else
358b51912c9SChris Mason 				 * jump_in?
359b51912c9SChris Mason 				 */
360b51912c9SChris Mason 				smp_mb();
361d313d7a3SChris Mason 				if (!list_empty(&worker->pending) ||
362d313d7a3SChris Mason 				    !list_empty(&worker->prio_pending))
363b51912c9SChris Mason 					continue;
364b51912c9SChris Mason 
365b51912c9SChris Mason 				/*
366b51912c9SChris Mason 				 * this short schedule allows more work to
367b51912c9SChris Mason 				 * come in without the queue functions
368b51912c9SChris Mason 				 * needing to go through wake_up_process()
369b51912c9SChris Mason 				 *
370b51912c9SChris Mason 				 * worker->working is still 1, so nobody
371b51912c9SChris Mason 				 * is going to try and wake us up
372b51912c9SChris Mason 				 */
373b51912c9SChris Mason 				schedule_timeout(1);
374b51912c9SChris Mason 				smp_mb();
375d313d7a3SChris Mason 				if (!list_empty(&worker->pending) ||
376d313d7a3SChris Mason 				    !list_empty(&worker->prio_pending))
377b51912c9SChris Mason 					continue;
378b51912c9SChris Mason 
379b5555f77SAmit Gud 				if (kthread_should_stop())
380b5555f77SAmit Gud 					break;
381b5555f77SAmit Gud 
382b51912c9SChris Mason 				/* still no more work?, sleep for real */
383b51912c9SChris Mason 				spin_lock_irq(&worker->lock);
384b51912c9SChris Mason 				set_current_state(TASK_INTERRUPTIBLE);
385d313d7a3SChris Mason 				if (!list_empty(&worker->pending) ||
3864f878e84SChris Mason 				    !list_empty(&worker->prio_pending)) {
3874f878e84SChris Mason 					spin_unlock_irq(&worker->lock);
388ed3b3d31SChris Mason 					set_current_state(TASK_RUNNING);
3894f878e84SChris Mason 					goto again;
3904f878e84SChris Mason 				}
391b51912c9SChris Mason 
392b51912c9SChris Mason 				/*
393b51912c9SChris Mason 				 * this makes sure we get a wakeup when someone
394b51912c9SChris Mason 				 * adds something new to the queue
395b51912c9SChris Mason 				 */
396b51912c9SChris Mason 				worker->working = 0;
397b51912c9SChris Mason 				spin_unlock_irq(&worker->lock);
398b51912c9SChris Mason 
3999042846bSChris Mason 				if (!kthread_should_stop()) {
4009042846bSChris Mason 					schedule_timeout(HZ * 120);
4019042846bSChris Mason 					if (!worker->working &&
4029042846bSChris Mason 					    try_worker_shutdown(worker)) {
4039042846bSChris Mason 						return 0;
4049042846bSChris Mason 					}
4059042846bSChris Mason 				}
406b51912c9SChris Mason 			}
4078b712842SChris Mason 			__set_current_state(TASK_RUNNING);
4088b712842SChris Mason 		}
4098b712842SChris Mason 	} while (!kthread_should_stop());
4108b712842SChris Mason 	return 0;
4118b712842SChris Mason }
4128b712842SChris Mason 
4138b712842SChris Mason /*
4148b712842SChris Mason  * this will wait for all the worker threads to shutdown
4158b712842SChris Mason  */
416143bede5SJeff Mahoney void btrfs_stop_workers(struct btrfs_workers *workers)
4178b712842SChris Mason {
4188b712842SChris Mason 	struct list_head *cur;
4198b712842SChris Mason 	struct btrfs_worker_thread *worker;
4209042846bSChris Mason 	int can_stop;
4218b712842SChris Mason 
4229042846bSChris Mason 	spin_lock_irq(&workers->lock);
423964fb15aSIlya Dryomov 	workers->stopping = 1;
42435d8ba66SChris Mason 	list_splice_init(&workers->idle_list, &workers->worker_list);
4258b712842SChris Mason 	while (!list_empty(&workers->worker_list)) {
4268b712842SChris Mason 		cur = workers->worker_list.next;
4278b712842SChris Mason 		worker = list_entry(cur, struct btrfs_worker_thread,
4288b712842SChris Mason 				    worker_list);
4299042846bSChris Mason 
4309042846bSChris Mason 		atomic_inc(&worker->refs);
4319042846bSChris Mason 		workers->num_workers -= 1;
4329042846bSChris Mason 		if (!list_empty(&worker->worker_list)) {
4339042846bSChris Mason 			list_del_init(&worker->worker_list);
4349042846bSChris Mason 			put_worker(worker);
4359042846bSChris Mason 			can_stop = 1;
4369042846bSChris Mason 		} else
4379042846bSChris Mason 			can_stop = 0;
4389042846bSChris Mason 		spin_unlock_irq(&workers->lock);
4399042846bSChris Mason 		if (can_stop)
4408b712842SChris Mason 			kthread_stop(worker->task);
4419042846bSChris Mason 		spin_lock_irq(&workers->lock);
4429042846bSChris Mason 		put_worker(worker);
4438b712842SChris Mason 	}
4449042846bSChris Mason 	spin_unlock_irq(&workers->lock);
4458b712842SChris Mason }
4468b712842SChris Mason 
4478b712842SChris Mason /*
4488b712842SChris Mason  * simple init on struct btrfs_workers
4498b712842SChris Mason  */
45061d92c32SChris Mason void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
45161d92c32SChris Mason 			struct btrfs_workers *async_helper)
4528b712842SChris Mason {
4538b712842SChris Mason 	workers->num_workers = 0;
45461d92c32SChris Mason 	workers->num_workers_starting = 0;
4558b712842SChris Mason 	INIT_LIST_HEAD(&workers->worker_list);
45635d8ba66SChris Mason 	INIT_LIST_HEAD(&workers->idle_list);
4574a69a410SChris Mason 	INIT_LIST_HEAD(&workers->order_list);
458d313d7a3SChris Mason 	INIT_LIST_HEAD(&workers->prio_order_list);
4598b712842SChris Mason 	spin_lock_init(&workers->lock);
4604e3f9c50SChris Mason 	spin_lock_init(&workers->order_lock);
4618b712842SChris Mason 	workers->max_workers = max;
46261b49440SChris Mason 	workers->idle_thresh = 32;
4635443be45SChris Mason 	workers->name = name;
4644a69a410SChris Mason 	workers->ordered = 0;
4659042846bSChris Mason 	workers->atomic_start_pending = 0;
46661d92c32SChris Mason 	workers->atomic_worker_start = async_helper;
467964fb15aSIlya Dryomov 	workers->stopping = 0;
4688b712842SChris Mason }
4698b712842SChris Mason 
4708b712842SChris Mason /*
4718b712842SChris Mason  * starts new worker threads.  This does not enforce the max worker
4728b712842SChris Mason  * count in case you need to temporarily go past it.
4738b712842SChris Mason  */
4740dc3b84aSJosef Bacik static int __btrfs_start_workers(struct btrfs_workers *workers)
4758b712842SChris Mason {
4768b712842SChris Mason 	struct btrfs_worker_thread *worker;
4778b712842SChris Mason 	int ret = 0;
4788b712842SChris Mason 
4798b712842SChris Mason 	worker = kzalloc(sizeof(*worker), GFP_NOFS);
4808b712842SChris Mason 	if (!worker) {
4818b712842SChris Mason 		ret = -ENOMEM;
4828b712842SChris Mason 		goto fail;
4838b712842SChris Mason 	}
4848b712842SChris Mason 
4858b712842SChris Mason 	INIT_LIST_HEAD(&worker->pending);
486d313d7a3SChris Mason 	INIT_LIST_HEAD(&worker->prio_pending);
4878b712842SChris Mason 	INIT_LIST_HEAD(&worker->worker_list);
4888b712842SChris Mason 	spin_lock_init(&worker->lock);
4894e3f9c50SChris Mason 
4908b712842SChris Mason 	atomic_set(&worker->num_pending, 0);
4919042846bSChris Mason 	atomic_set(&worker->refs, 1);
492fd0fb038SShin Hong 	worker->workers = workers;
493964fb15aSIlya Dryomov 	worker->task = kthread_create(worker_loop, worker,
4945443be45SChris Mason 				      "btrfs-%s-%d", workers->name,
4950dc3b84aSJosef Bacik 				      workers->num_workers + 1);
4968b712842SChris Mason 	if (IS_ERR(worker->task)) {
4978b712842SChris Mason 		ret = PTR_ERR(worker->task);
4988b712842SChris Mason 		goto fail;
4998b712842SChris Mason 	}
500964fb15aSIlya Dryomov 
5018b712842SChris Mason 	spin_lock_irq(&workers->lock);
502964fb15aSIlya Dryomov 	if (workers->stopping) {
503964fb15aSIlya Dryomov 		spin_unlock_irq(&workers->lock);
504ba69994aSIlya Dryomov 		ret = -EINVAL;
505964fb15aSIlya Dryomov 		goto fail_kthread;
506964fb15aSIlya Dryomov 	}
50735d8ba66SChris Mason 	list_add_tail(&worker->worker_list, &workers->idle_list);
5084854ddd0SChris Mason 	worker->idle = 1;
5098b712842SChris Mason 	workers->num_workers++;
51061d92c32SChris Mason 	workers->num_workers_starting--;
51161d92c32SChris Mason 	WARN_ON(workers->num_workers_starting < 0);
5128b712842SChris Mason 	spin_unlock_irq(&workers->lock);
5130dc3b84aSJosef Bacik 
514964fb15aSIlya Dryomov 	wake_up_process(worker->task);
5158b712842SChris Mason 	return 0;
516964fb15aSIlya Dryomov 
517964fb15aSIlya Dryomov fail_kthread:
518964fb15aSIlya Dryomov 	kthread_stop(worker->task);
5198b712842SChris Mason fail:
520964fb15aSIlya Dryomov 	kfree(worker);
5210dc3b84aSJosef Bacik 	spin_lock_irq(&workers->lock);
5220dc3b84aSJosef Bacik 	workers->num_workers_starting--;
5230dc3b84aSJosef Bacik 	spin_unlock_irq(&workers->lock);
5248b712842SChris Mason 	return ret;
5258b712842SChris Mason }
5268b712842SChris Mason 
5270dc3b84aSJosef Bacik int btrfs_start_workers(struct btrfs_workers *workers)
52861d92c32SChris Mason {
52961d92c32SChris Mason 	spin_lock_irq(&workers->lock);
5300dc3b84aSJosef Bacik 	workers->num_workers_starting++;
53161d92c32SChris Mason 	spin_unlock_irq(&workers->lock);
5320dc3b84aSJosef Bacik 	return __btrfs_start_workers(workers);
53361d92c32SChris Mason }
53461d92c32SChris Mason 
5358b712842SChris Mason /*
5368b712842SChris Mason  * run through the list and find a worker thread that doesn't have a lot
5378b712842SChris Mason  * to do right now.  This can return null if we aren't yet at the thread
5388b712842SChris Mason  * count limit and all of the threads are busy.
5398b712842SChris Mason  */
5408b712842SChris Mason static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
5418b712842SChris Mason {
5428b712842SChris Mason 	struct btrfs_worker_thread *worker;
5438b712842SChris Mason 	struct list_head *next;
54461d92c32SChris Mason 	int enforce_min;
54561d92c32SChris Mason 
54661d92c32SChris Mason 	enforce_min = (workers->num_workers + workers->num_workers_starting) <
54761d92c32SChris Mason 		workers->max_workers;
5488b712842SChris Mason 
5498b712842SChris Mason 	/*
55035d8ba66SChris Mason 	 * if we find an idle thread, don't move it to the end of the
55135d8ba66SChris Mason 	 * idle list.  This improves the chance that the next submission
55235d8ba66SChris Mason 	 * will reuse the same thread, and maybe catch it while it is still
55335d8ba66SChris Mason 	 * working
5548b712842SChris Mason 	 */
55535d8ba66SChris Mason 	if (!list_empty(&workers->idle_list)) {
55635d8ba66SChris Mason 		next = workers->idle_list.next;
5578b712842SChris Mason 		worker = list_entry(next, struct btrfs_worker_thread,
5588b712842SChris Mason 				    worker_list);
55935d8ba66SChris Mason 		return worker;
5608b712842SChris Mason 	}
56135d8ba66SChris Mason 	if (enforce_min || list_empty(&workers->worker_list))
5628b712842SChris Mason 		return NULL;
56335d8ba66SChris Mason 
56435d8ba66SChris Mason 	/*
56535d8ba66SChris Mason 	 * if we pick a busy task, move the task to the end of the list.
566d352ac68SChris Mason 	 * hopefully this will keep things somewhat evenly balanced.
567d352ac68SChris Mason 	 * Do the move in batches based on the sequence number.  This groups
568d352ac68SChris Mason 	 * requests submitted at roughly the same time onto the same worker.
56935d8ba66SChris Mason 	 */
57035d8ba66SChris Mason 	next = workers->worker_list.next;
57135d8ba66SChris Mason 	worker = list_entry(next, struct btrfs_worker_thread, worker_list);
5724854ddd0SChris Mason 	worker->sequence++;
573d352ac68SChris Mason 
57453863232SChris Mason 	if (worker->sequence % workers->idle_thresh == 0)
57535d8ba66SChris Mason 		list_move_tail(next, &workers->worker_list);
5768b712842SChris Mason 	return worker;
5778b712842SChris Mason }
5788b712842SChris Mason 
579d352ac68SChris Mason /*
580d352ac68SChris Mason  * selects a worker thread to take the next job.  This will either find
581d352ac68SChris Mason  * an idle worker, start a new worker up to the max count, or just return
582d352ac68SChris Mason  * one of the existing busy workers.
583d352ac68SChris Mason  */
5848b712842SChris Mason static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
5858b712842SChris Mason {
5868b712842SChris Mason 	struct btrfs_worker_thread *worker;
5878b712842SChris Mason 	unsigned long flags;
5889042846bSChris Mason 	struct list_head *fallback;
5890dc3b84aSJosef Bacik 	int ret;
5908b712842SChris Mason 
5918b712842SChris Mason 	spin_lock_irqsave(&workers->lock, flags);
5928d532b2aSChris Mason again:
5938b712842SChris Mason 	worker = next_worker(workers);
5948b712842SChris Mason 
5958b712842SChris Mason 	if (!worker) {
59661d92c32SChris Mason 		if (workers->num_workers + workers->num_workers_starting >=
59761d92c32SChris Mason 		    workers->max_workers) {
5989042846bSChris Mason 			goto fallback;
5999042846bSChris Mason 		} else if (workers->atomic_worker_start) {
6009042846bSChris Mason 			workers->atomic_start_pending = 1;
6019042846bSChris Mason 			goto fallback;
6029042846bSChris Mason 		} else {
60361d92c32SChris Mason 			workers->num_workers_starting++;
6049042846bSChris Mason 			spin_unlock_irqrestore(&workers->lock, flags);
6059042846bSChris Mason 			/* we're below the limit, start another worker */
6060dc3b84aSJosef Bacik 			ret = __btrfs_start_workers(workers);
6078d532b2aSChris Mason 			spin_lock_irqsave(&workers->lock, flags);
6080dc3b84aSJosef Bacik 			if (ret)
6090dc3b84aSJosef Bacik 				goto fallback;
6109042846bSChris Mason 			goto again;
6119042846bSChris Mason 		}
6129042846bSChris Mason 	}
6136e74057cSChris Mason 	goto found;
6149042846bSChris Mason 
6159042846bSChris Mason fallback:
6169042846bSChris Mason 	fallback = NULL;
6178b712842SChris Mason 	/*
6188b712842SChris Mason 	 * we have failed to find any workers, just
6199042846bSChris Mason 	 * return the first one we can find.
6208b712842SChris Mason 	 */
62135d8ba66SChris Mason 	if (!list_empty(&workers->worker_list))
62235d8ba66SChris Mason 		fallback = workers->worker_list.next;
62335d8ba66SChris Mason 	if (!list_empty(&workers->idle_list))
62435d8ba66SChris Mason 		fallback = workers->idle_list.next;
62535d8ba66SChris Mason 	BUG_ON(!fallback);
62635d8ba66SChris Mason 	worker = list_entry(fallback,
6278b712842SChris Mason 		  struct btrfs_worker_thread, worker_list);
6286e74057cSChris Mason found:
6296e74057cSChris Mason 	/*
6306e74057cSChris Mason 	 * this makes sure the worker doesn't exit before it is placed
6316e74057cSChris Mason 	 * onto a busy/idle list
6326e74057cSChris Mason 	 */
6336e74057cSChris Mason 	atomic_inc(&worker->num_pending);
6348b712842SChris Mason 	spin_unlock_irqrestore(&workers->lock, flags);
6358b712842SChris Mason 	return worker;
6368b712842SChris Mason }
6378b712842SChris Mason 
6388b712842SChris Mason /*
6398b712842SChris Mason  * btrfs_requeue_work just puts the work item back on the tail of the list
6408b712842SChris Mason  * it was taken from.  It is intended for use with long running work functions
6418b712842SChris Mason  * that make some progress and want to give the cpu up for others.
6428b712842SChris Mason  */
643143bede5SJeff Mahoney void btrfs_requeue_work(struct btrfs_work *work)
6448b712842SChris Mason {
6458b712842SChris Mason 	struct btrfs_worker_thread *worker = work->worker;
6468b712842SChris Mason 	unsigned long flags;
647a6837051SChris Mason 	int wake = 0;
6488b712842SChris Mason 
6494a69a410SChris Mason 	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
650143bede5SJeff Mahoney 		return;
6518b712842SChris Mason 
6528b712842SChris Mason 	spin_lock_irqsave(&worker->lock, flags);
653d313d7a3SChris Mason 	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
654d313d7a3SChris Mason 		list_add_tail(&work->list, &worker->prio_pending);
655d313d7a3SChris Mason 	else
6568b712842SChris Mason 		list_add_tail(&work->list, &worker->pending);
657b51912c9SChris Mason 	atomic_inc(&worker->num_pending);
65875ccf47dSChris Mason 
65975ccf47dSChris Mason 	/* by definition we're busy, take ourselves off the idle
66075ccf47dSChris Mason 	 * list
66175ccf47dSChris Mason 	 */
66275ccf47dSChris Mason 	if (worker->idle) {
66329c5e8ceSJulia Lawall 		spin_lock(&worker->workers->lock);
66475ccf47dSChris Mason 		worker->idle = 0;
66575ccf47dSChris Mason 		list_move_tail(&worker->worker_list,
66675ccf47dSChris Mason 			      &worker->workers->worker_list);
66729c5e8ceSJulia Lawall 		spin_unlock(&worker->workers->lock);
66875ccf47dSChris Mason 	}
669a6837051SChris Mason 	if (!worker->working) {
670a6837051SChris Mason 		wake = 1;
671a6837051SChris Mason 		worker->working = 1;
672a6837051SChris Mason 	}
67375ccf47dSChris Mason 
674a6837051SChris Mason 	if (wake)
675a6837051SChris Mason 		wake_up_process(worker->task);
6769042846bSChris Mason 	spin_unlock_irqrestore(&worker->lock, flags);
6778b712842SChris Mason }
6788b712842SChris Mason 
679d313d7a3SChris Mason void btrfs_set_work_high_prio(struct btrfs_work *work)
680d313d7a3SChris Mason {
681d313d7a3SChris Mason 	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
682d313d7a3SChris Mason }
683d313d7a3SChris Mason 
6848b712842SChris Mason /*
6858b712842SChris Mason  * places a struct btrfs_work into the pending queue of one of the kthreads
6868b712842SChris Mason  */
6870dc3b84aSJosef Bacik void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
6888b712842SChris Mason {
6898b712842SChris Mason 	struct btrfs_worker_thread *worker;
6908b712842SChris Mason 	unsigned long flags;
6918b712842SChris Mason 	int wake = 0;
6928b712842SChris Mason 
6938b712842SChris Mason 	/* don't requeue something already on a list */
6944a69a410SChris Mason 	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
6950dc3b84aSJosef Bacik 		return;
6968b712842SChris Mason 
6978b712842SChris Mason 	worker = find_worker(workers);
6984a69a410SChris Mason 	if (workers->ordered) {
6994e3f9c50SChris Mason 		/*
7004e3f9c50SChris Mason 		 * you're not allowed to do ordered queues from an
7014e3f9c50SChris Mason 		 * interrupt handler
7024e3f9c50SChris Mason 		 */
7034e3f9c50SChris Mason 		spin_lock(&workers->order_lock);
704d313d7a3SChris Mason 		if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
705d313d7a3SChris Mason 			list_add_tail(&work->order_list,
706d313d7a3SChris Mason 				      &workers->prio_order_list);
707d313d7a3SChris Mason 		} else {
7084a69a410SChris Mason 			list_add_tail(&work->order_list, &workers->order_list);
709d313d7a3SChris Mason 		}
7104e3f9c50SChris Mason 		spin_unlock(&workers->order_lock);
7114a69a410SChris Mason 	} else {
7124a69a410SChris Mason 		INIT_LIST_HEAD(&work->order_list);
7134a69a410SChris Mason 	}
7148b712842SChris Mason 
7158b712842SChris Mason 	spin_lock_irqsave(&worker->lock, flags);
716a6837051SChris Mason 
717d313d7a3SChris Mason 	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
718d313d7a3SChris Mason 		list_add_tail(&work->list, &worker->prio_pending);
719d313d7a3SChris Mason 	else
720b51912c9SChris Mason 		list_add_tail(&work->list, &worker->pending);
72135d8ba66SChris Mason 	check_busy_worker(worker);
7228b712842SChris Mason 
7238b712842SChris Mason 	/*
7248b712842SChris Mason 	 * avoid calling into wake_up_process if this thread has already
7258b712842SChris Mason 	 * been kicked
7268b712842SChris Mason 	 */
7278b712842SChris Mason 	if (!worker->working)
7288b712842SChris Mason 		wake = 1;
7298b712842SChris Mason 	worker->working = 1;
7308b712842SChris Mason 
7318b712842SChris Mason 	if (wake)
7328b712842SChris Mason 		wake_up_process(worker->task);
7339042846bSChris Mason 	spin_unlock_irqrestore(&worker->lock, flags);
7348b712842SChris Mason }
73508a9ff32SQu Wenruo 
7361ca08976SQu Wenruo struct __btrfs_workqueue_struct {
73708a9ff32SQu Wenruo 	struct workqueue_struct *normal_wq;
73808a9ff32SQu Wenruo 	/* List head pointing to ordered work list */
73908a9ff32SQu Wenruo 	struct list_head ordered_list;
74008a9ff32SQu Wenruo 
74108a9ff32SQu Wenruo 	/* Spinlock for ordered_list */
74208a9ff32SQu Wenruo 	spinlock_t list_lock;
7430bd9289cSQu Wenruo 
7440bd9289cSQu Wenruo 	/* Thresholding related variants */
7450bd9289cSQu Wenruo 	atomic_t pending;
7460bd9289cSQu Wenruo 	int max_active;
7470bd9289cSQu Wenruo 	int current_max;
7480bd9289cSQu Wenruo 	int thresh;
7490bd9289cSQu Wenruo 	unsigned int count;
7500bd9289cSQu Wenruo 	spinlock_t thres_lock;
75108a9ff32SQu Wenruo };
75208a9ff32SQu Wenruo 
7531ca08976SQu Wenruo struct btrfs_workqueue_struct {
7541ca08976SQu Wenruo 	struct __btrfs_workqueue_struct *normal;
7551ca08976SQu Wenruo 	struct __btrfs_workqueue_struct *high;
7561ca08976SQu Wenruo };
7571ca08976SQu Wenruo 
7581ca08976SQu Wenruo static inline struct __btrfs_workqueue_struct
7590bd9289cSQu Wenruo *__btrfs_alloc_workqueue(char *name, int flags, int max_active, int thresh)
76008a9ff32SQu Wenruo {
7611ca08976SQu Wenruo 	struct __btrfs_workqueue_struct *ret = kzalloc(sizeof(*ret), GFP_NOFS);
76208a9ff32SQu Wenruo 
76308a9ff32SQu Wenruo 	if (unlikely(!ret))
76408a9ff32SQu Wenruo 		return NULL;
76508a9ff32SQu Wenruo 
7660bd9289cSQu Wenruo 	ret->max_active = max_active;
7670bd9289cSQu Wenruo 	atomic_set(&ret->pending, 0);
7680bd9289cSQu Wenruo 	if (thresh == 0)
7690bd9289cSQu Wenruo 		thresh = DFT_THRESHOLD;
7700bd9289cSQu Wenruo 	/* For low threshold, disabling threshold is a better choice */
7710bd9289cSQu Wenruo 	if (thresh < DFT_THRESHOLD) {
7720bd9289cSQu Wenruo 		ret->current_max = max_active;
7730bd9289cSQu Wenruo 		ret->thresh = NO_THRESHOLD;
7740bd9289cSQu Wenruo 	} else {
7750bd9289cSQu Wenruo 		ret->current_max = 1;
7760bd9289cSQu Wenruo 		ret->thresh = thresh;
7770bd9289cSQu Wenruo 	}
7780bd9289cSQu Wenruo 
7791ca08976SQu Wenruo 	if (flags & WQ_HIGHPRI)
7801ca08976SQu Wenruo 		ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
7810bd9289cSQu Wenruo 						 ret->max_active,
7820bd9289cSQu Wenruo 						 "btrfs", name);
7831ca08976SQu Wenruo 	else
7841ca08976SQu Wenruo 		ret->normal_wq = alloc_workqueue("%s-%s", flags,
7850bd9289cSQu Wenruo 						 ret->max_active, "btrfs",
7860bd9289cSQu Wenruo 						 name);
78708a9ff32SQu Wenruo 	if (unlikely(!ret->normal_wq)) {
78808a9ff32SQu Wenruo 		kfree(ret);
78908a9ff32SQu Wenruo 		return NULL;
79008a9ff32SQu Wenruo 	}
79108a9ff32SQu Wenruo 
79208a9ff32SQu Wenruo 	INIT_LIST_HEAD(&ret->ordered_list);
79308a9ff32SQu Wenruo 	spin_lock_init(&ret->list_lock);
7940bd9289cSQu Wenruo 	spin_lock_init(&ret->thres_lock);
79508a9ff32SQu Wenruo 	return ret;
79608a9ff32SQu Wenruo }
79708a9ff32SQu Wenruo 
7981ca08976SQu Wenruo static inline void
7991ca08976SQu Wenruo __btrfs_destroy_workqueue(struct __btrfs_workqueue_struct *wq);
8001ca08976SQu Wenruo 
8011ca08976SQu Wenruo struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name,
8021ca08976SQu Wenruo 						     int flags,
8030bd9289cSQu Wenruo 						     int max_active,
8040bd9289cSQu Wenruo 						     int thresh)
8051ca08976SQu Wenruo {
8061ca08976SQu Wenruo 	struct btrfs_workqueue_struct *ret = kzalloc(sizeof(*ret), GFP_NOFS);
8071ca08976SQu Wenruo 
8081ca08976SQu Wenruo 	if (unlikely(!ret))
8091ca08976SQu Wenruo 		return NULL;
8101ca08976SQu Wenruo 
8111ca08976SQu Wenruo 	ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
8120bd9289cSQu Wenruo 					      max_active, thresh);
8131ca08976SQu Wenruo 	if (unlikely(!ret->normal)) {
8141ca08976SQu Wenruo 		kfree(ret);
8151ca08976SQu Wenruo 		return NULL;
8161ca08976SQu Wenruo 	}
8171ca08976SQu Wenruo 
8181ca08976SQu Wenruo 	if (flags & WQ_HIGHPRI) {
8190bd9289cSQu Wenruo 		ret->high = __btrfs_alloc_workqueue(name, flags, max_active,
8200bd9289cSQu Wenruo 						    thresh);
8211ca08976SQu Wenruo 		if (unlikely(!ret->high)) {
8221ca08976SQu Wenruo 			__btrfs_destroy_workqueue(ret->normal);
8231ca08976SQu Wenruo 			kfree(ret);
8241ca08976SQu Wenruo 			return NULL;
8251ca08976SQu Wenruo 		}
8261ca08976SQu Wenruo 	}
8271ca08976SQu Wenruo 	return ret;
8281ca08976SQu Wenruo }
8291ca08976SQu Wenruo 
8300bd9289cSQu Wenruo /*
8310bd9289cSQu Wenruo  * Hook for threshold which will be called in btrfs_queue_work.
8320bd9289cSQu Wenruo  * This hook WILL be called in IRQ handler context,
8330bd9289cSQu Wenruo  * so workqueue_set_max_active MUST NOT be called in this hook
8340bd9289cSQu Wenruo  */
8350bd9289cSQu Wenruo static inline void thresh_queue_hook(struct __btrfs_workqueue_struct *wq)
8360bd9289cSQu Wenruo {
8370bd9289cSQu Wenruo 	if (wq->thresh == NO_THRESHOLD)
8380bd9289cSQu Wenruo 		return;
8390bd9289cSQu Wenruo 	atomic_inc(&wq->pending);
8400bd9289cSQu Wenruo }
8410bd9289cSQu Wenruo 
8420bd9289cSQu Wenruo /*
8430bd9289cSQu Wenruo  * Hook for threshold which will be called before executing the work,
8440bd9289cSQu Wenruo  * This hook is called in kthread content.
8450bd9289cSQu Wenruo  * So workqueue_set_max_active is called here.
8460bd9289cSQu Wenruo  */
8470bd9289cSQu Wenruo static inline void thresh_exec_hook(struct __btrfs_workqueue_struct *wq)
8480bd9289cSQu Wenruo {
8490bd9289cSQu Wenruo 	int new_max_active;
8500bd9289cSQu Wenruo 	long pending;
8510bd9289cSQu Wenruo 	int need_change = 0;
8520bd9289cSQu Wenruo 
8530bd9289cSQu Wenruo 	if (wq->thresh == NO_THRESHOLD)
8540bd9289cSQu Wenruo 		return;
8550bd9289cSQu Wenruo 
8560bd9289cSQu Wenruo 	atomic_dec(&wq->pending);
8570bd9289cSQu Wenruo 	spin_lock(&wq->thres_lock);
8580bd9289cSQu Wenruo 	/*
8590bd9289cSQu Wenruo 	 * Use wq->count to limit the calling frequency of
8600bd9289cSQu Wenruo 	 * workqueue_set_max_active.
8610bd9289cSQu Wenruo 	 */
8620bd9289cSQu Wenruo 	wq->count++;
8630bd9289cSQu Wenruo 	wq->count %= (wq->thresh / 4);
8640bd9289cSQu Wenruo 	if (!wq->count)
8650bd9289cSQu Wenruo 		goto  out;
8660bd9289cSQu Wenruo 	new_max_active = wq->current_max;
8670bd9289cSQu Wenruo 
8680bd9289cSQu Wenruo 	/*
8690bd9289cSQu Wenruo 	 * pending may be changed later, but it's OK since we really
8700bd9289cSQu Wenruo 	 * don't need it so accurate to calculate new_max_active.
8710bd9289cSQu Wenruo 	 */
8720bd9289cSQu Wenruo 	pending = atomic_read(&wq->pending);
8730bd9289cSQu Wenruo 	if (pending > wq->thresh)
8740bd9289cSQu Wenruo 		new_max_active++;
8750bd9289cSQu Wenruo 	if (pending < wq->thresh / 2)
8760bd9289cSQu Wenruo 		new_max_active--;
8770bd9289cSQu Wenruo 	new_max_active = clamp_val(new_max_active, 1, wq->max_active);
8780bd9289cSQu Wenruo 	if (new_max_active != wq->current_max)  {
8790bd9289cSQu Wenruo 		need_change = 1;
8800bd9289cSQu Wenruo 		wq->current_max = new_max_active;
8810bd9289cSQu Wenruo 	}
8820bd9289cSQu Wenruo out:
8830bd9289cSQu Wenruo 	spin_unlock(&wq->thres_lock);
8840bd9289cSQu Wenruo 
8850bd9289cSQu Wenruo 	if (need_change) {
8860bd9289cSQu Wenruo 		workqueue_set_max_active(wq->normal_wq, wq->current_max);
8870bd9289cSQu Wenruo 	}
8880bd9289cSQu Wenruo }
8890bd9289cSQu Wenruo 
8901ca08976SQu Wenruo static void run_ordered_work(struct __btrfs_workqueue_struct *wq)
89108a9ff32SQu Wenruo {
89208a9ff32SQu Wenruo 	struct list_head *list = &wq->ordered_list;
89308a9ff32SQu Wenruo 	struct btrfs_work_struct *work;
89408a9ff32SQu Wenruo 	spinlock_t *lock = &wq->list_lock;
89508a9ff32SQu Wenruo 	unsigned long flags;
89608a9ff32SQu Wenruo 
89708a9ff32SQu Wenruo 	while (1) {
89808a9ff32SQu Wenruo 		spin_lock_irqsave(lock, flags);
89908a9ff32SQu Wenruo 		if (list_empty(list))
90008a9ff32SQu Wenruo 			break;
90108a9ff32SQu Wenruo 		work = list_entry(list->next, struct btrfs_work_struct,
90208a9ff32SQu Wenruo 				  ordered_list);
90308a9ff32SQu Wenruo 		if (!test_bit(WORK_DONE_BIT, &work->flags))
90408a9ff32SQu Wenruo 			break;
90508a9ff32SQu Wenruo 
90608a9ff32SQu Wenruo 		/*
90708a9ff32SQu Wenruo 		 * we are going to call the ordered done function, but
90808a9ff32SQu Wenruo 		 * we leave the work item on the list as a barrier so
90908a9ff32SQu Wenruo 		 * that later work items that are done don't have their
91008a9ff32SQu Wenruo 		 * functions called before this one returns
91108a9ff32SQu Wenruo 		 */
91208a9ff32SQu Wenruo 		if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
91308a9ff32SQu Wenruo 			break;
91408a9ff32SQu Wenruo 		spin_unlock_irqrestore(lock, flags);
91508a9ff32SQu Wenruo 		work->ordered_func(work);
91608a9ff32SQu Wenruo 
91708a9ff32SQu Wenruo 		/* now take the lock again and drop our item from the list */
91808a9ff32SQu Wenruo 		spin_lock_irqsave(lock, flags);
91908a9ff32SQu Wenruo 		list_del(&work->ordered_list);
92008a9ff32SQu Wenruo 		spin_unlock_irqrestore(lock, flags);
92108a9ff32SQu Wenruo 
92208a9ff32SQu Wenruo 		/*
92308a9ff32SQu Wenruo 		 * we don't want to call the ordered free functions
92408a9ff32SQu Wenruo 		 * with the lock held though
92508a9ff32SQu Wenruo 		 */
92608a9ff32SQu Wenruo 		work->ordered_free(work);
92708a9ff32SQu Wenruo 	}
92808a9ff32SQu Wenruo 	spin_unlock_irqrestore(lock, flags);
92908a9ff32SQu Wenruo }
93008a9ff32SQu Wenruo 
93108a9ff32SQu Wenruo static void normal_work_helper(struct work_struct *arg)
93208a9ff32SQu Wenruo {
93308a9ff32SQu Wenruo 	struct btrfs_work_struct *work;
9341ca08976SQu Wenruo 	struct __btrfs_workqueue_struct *wq;
93508a9ff32SQu Wenruo 	int need_order = 0;
93608a9ff32SQu Wenruo 
93708a9ff32SQu Wenruo 	work = container_of(arg, struct btrfs_work_struct, normal_work);
93808a9ff32SQu Wenruo 	/*
93908a9ff32SQu Wenruo 	 * We should not touch things inside work in the following cases:
94008a9ff32SQu Wenruo 	 * 1) after work->func() if it has no ordered_free
94108a9ff32SQu Wenruo 	 *    Since the struct is freed in work->func().
94208a9ff32SQu Wenruo 	 * 2) after setting WORK_DONE_BIT
94308a9ff32SQu Wenruo 	 *    The work may be freed in other threads almost instantly.
94408a9ff32SQu Wenruo 	 * So we save the needed things here.
94508a9ff32SQu Wenruo 	 */
94608a9ff32SQu Wenruo 	if (work->ordered_func)
94708a9ff32SQu Wenruo 		need_order = 1;
94808a9ff32SQu Wenruo 	wq = work->wq;
94908a9ff32SQu Wenruo 
9500bd9289cSQu Wenruo 	thresh_exec_hook(wq);
95108a9ff32SQu Wenruo 	work->func(work);
95208a9ff32SQu Wenruo 	if (need_order) {
95308a9ff32SQu Wenruo 		set_bit(WORK_DONE_BIT, &work->flags);
95408a9ff32SQu Wenruo 		run_ordered_work(wq);
95508a9ff32SQu Wenruo 	}
95608a9ff32SQu Wenruo }
95708a9ff32SQu Wenruo 
95808a9ff32SQu Wenruo void btrfs_init_work(struct btrfs_work_struct *work,
95908a9ff32SQu Wenruo 		     void (*func)(struct btrfs_work_struct *),
96008a9ff32SQu Wenruo 		     void (*ordered_func)(struct btrfs_work_struct *),
96108a9ff32SQu Wenruo 		     void (*ordered_free)(struct btrfs_work_struct *))
96208a9ff32SQu Wenruo {
96308a9ff32SQu Wenruo 	work->func = func;
96408a9ff32SQu Wenruo 	work->ordered_func = ordered_func;
96508a9ff32SQu Wenruo 	work->ordered_free = ordered_free;
96608a9ff32SQu Wenruo 	INIT_WORK(&work->normal_work, normal_work_helper);
96708a9ff32SQu Wenruo 	INIT_LIST_HEAD(&work->ordered_list);
96808a9ff32SQu Wenruo 	work->flags = 0;
96908a9ff32SQu Wenruo }
97008a9ff32SQu Wenruo 
9711ca08976SQu Wenruo static inline void __btrfs_queue_work(struct __btrfs_workqueue_struct *wq,
97208a9ff32SQu Wenruo 				      struct btrfs_work_struct *work)
97308a9ff32SQu Wenruo {
97408a9ff32SQu Wenruo 	unsigned long flags;
97508a9ff32SQu Wenruo 
97608a9ff32SQu Wenruo 	work->wq = wq;
9770bd9289cSQu Wenruo 	thresh_queue_hook(wq);
97808a9ff32SQu Wenruo 	if (work->ordered_func) {
97908a9ff32SQu Wenruo 		spin_lock_irqsave(&wq->list_lock, flags);
98008a9ff32SQu Wenruo 		list_add_tail(&work->ordered_list, &wq->ordered_list);
98108a9ff32SQu Wenruo 		spin_unlock_irqrestore(&wq->list_lock, flags);
98208a9ff32SQu Wenruo 	}
98308a9ff32SQu Wenruo 	queue_work(wq->normal_wq, &work->normal_work);
98408a9ff32SQu Wenruo }
98508a9ff32SQu Wenruo 
9861ca08976SQu Wenruo void btrfs_queue_work(struct btrfs_workqueue_struct *wq,
9871ca08976SQu Wenruo 		      struct btrfs_work_struct *work)
9881ca08976SQu Wenruo {
9891ca08976SQu Wenruo 	struct __btrfs_workqueue_struct *dest_wq;
9901ca08976SQu Wenruo 
9911ca08976SQu Wenruo 	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high)
9921ca08976SQu Wenruo 		dest_wq = wq->high;
9931ca08976SQu Wenruo 	else
9941ca08976SQu Wenruo 		dest_wq = wq->normal;
9951ca08976SQu Wenruo 	__btrfs_queue_work(dest_wq, work);
9961ca08976SQu Wenruo }
9971ca08976SQu Wenruo 
9981ca08976SQu Wenruo static inline void
9991ca08976SQu Wenruo __btrfs_destroy_workqueue(struct __btrfs_workqueue_struct *wq)
100008a9ff32SQu Wenruo {
100108a9ff32SQu Wenruo 	destroy_workqueue(wq->normal_wq);
100208a9ff32SQu Wenruo 	kfree(wq);
100308a9ff32SQu Wenruo }
100408a9ff32SQu Wenruo 
10051ca08976SQu Wenruo void btrfs_destroy_workqueue(struct btrfs_workqueue_struct *wq)
10061ca08976SQu Wenruo {
10071ca08976SQu Wenruo 	if (!wq)
10081ca08976SQu Wenruo 		return;
10091ca08976SQu Wenruo 	if (wq->high)
10101ca08976SQu Wenruo 		__btrfs_destroy_workqueue(wq->high);
10111ca08976SQu Wenruo 	__btrfs_destroy_workqueue(wq->normal);
10121ca08976SQu Wenruo }
10131ca08976SQu Wenruo 
101408a9ff32SQu Wenruo void btrfs_workqueue_set_max(struct btrfs_workqueue_struct *wq, int max)
101508a9ff32SQu Wenruo {
10160bd9289cSQu Wenruo 	wq->normal->max_active = max;
10171ca08976SQu Wenruo 	if (wq->high)
10180bd9289cSQu Wenruo 		wq->high->max_active = max;
10191ca08976SQu Wenruo }
10201ca08976SQu Wenruo 
10211ca08976SQu Wenruo void btrfs_set_work_high_priority(struct btrfs_work_struct *work)
10221ca08976SQu Wenruo {
10231ca08976SQu Wenruo 	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
102408a9ff32SQu Wenruo }
1025