18b712842SChris Mason /* 28b712842SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 38b712842SChris Mason * 48b712842SChris Mason * This program is free software; you can redistribute it and/or 58b712842SChris Mason * modify it under the terms of the GNU General Public 68b712842SChris Mason * License v2 as published by the Free Software Foundation. 78b712842SChris Mason * 88b712842SChris Mason * This program is distributed in the hope that it will be useful, 98b712842SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 108b712842SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 118b712842SChris Mason * General Public License for more details. 128b712842SChris Mason * 138b712842SChris Mason * You should have received a copy of the GNU General Public 148b712842SChris Mason * License along with this program; if not, write to the 158b712842SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 168b712842SChris Mason * Boston, MA 021110-1307, USA. 178b712842SChris Mason */ 188b712842SChris Mason 19d05e5a4dSChris Mason #include <linux/version.h> 208b712842SChris Mason #include <linux/kthread.h> 218b712842SChris Mason #include <linux/list.h> 228b712842SChris Mason #include <linux/spinlock.h> 238b712842SChris Mason # include <linux/freezer.h> 248b712842SChris Mason #include "async-thread.h" 258b712842SChris Mason 264a69a410SChris Mason #define WORK_QUEUED_BIT 0 274a69a410SChris Mason #define WORK_DONE_BIT 1 284a69a410SChris Mason #define WORK_ORDER_DONE_BIT 2 294a69a410SChris Mason 308b712842SChris Mason /* 318b712842SChris Mason * container for the kthread task pointer and the list of pending work 328b712842SChris Mason * One of these is allocated per thread. 338b712842SChris Mason */ 348b712842SChris Mason struct btrfs_worker_thread { 3535d8ba66SChris Mason /* pool we belong to */ 3635d8ba66SChris Mason struct btrfs_workers *workers; 3735d8ba66SChris Mason 388b712842SChris Mason /* list of struct btrfs_work that are waiting for service */ 398b712842SChris Mason struct list_head pending; 408b712842SChris Mason 418b712842SChris Mason /* list of worker threads from struct btrfs_workers */ 428b712842SChris Mason struct list_head worker_list; 438b712842SChris Mason 448b712842SChris Mason /* kthread */ 458b712842SChris Mason struct task_struct *task; 468b712842SChris Mason 478b712842SChris Mason /* number of things on the pending list */ 488b712842SChris Mason atomic_t num_pending; 4953863232SChris Mason 504854ddd0SChris Mason unsigned long sequence; 518b712842SChris Mason 528b712842SChris Mason /* protects the pending list. */ 538b712842SChris Mason spinlock_t lock; 548b712842SChris Mason 558b712842SChris Mason /* set to non-zero when this thread is already awake and kicking */ 568b712842SChris Mason int working; 5735d8ba66SChris Mason 5835d8ba66SChris Mason /* are we currently idle */ 5935d8ba66SChris Mason int idle; 608b712842SChris Mason }; 618b712842SChris Mason 628b712842SChris Mason /* 6335d8ba66SChris Mason * helper function to move a thread onto the idle list after it 6435d8ba66SChris Mason * has finished some requests. 6535d8ba66SChris Mason */ 6635d8ba66SChris Mason static void check_idle_worker(struct btrfs_worker_thread *worker) 6735d8ba66SChris Mason { 6835d8ba66SChris Mason if (!worker->idle && atomic_read(&worker->num_pending) < 6935d8ba66SChris Mason worker->workers->idle_thresh / 2) { 7035d8ba66SChris Mason unsigned long flags; 7135d8ba66SChris Mason spin_lock_irqsave(&worker->workers->lock, flags); 7235d8ba66SChris Mason worker->idle = 1; 7335d8ba66SChris Mason list_move(&worker->worker_list, &worker->workers->idle_list); 7435d8ba66SChris Mason spin_unlock_irqrestore(&worker->workers->lock, flags); 7535d8ba66SChris Mason } 7635d8ba66SChris Mason } 7735d8ba66SChris Mason 7835d8ba66SChris Mason /* 7935d8ba66SChris Mason * helper function to move a thread off the idle list after new 8035d8ba66SChris Mason * pending work is added. 8135d8ba66SChris Mason */ 8235d8ba66SChris Mason static void check_busy_worker(struct btrfs_worker_thread *worker) 8335d8ba66SChris Mason { 8435d8ba66SChris Mason if (worker->idle && atomic_read(&worker->num_pending) >= 8535d8ba66SChris Mason worker->workers->idle_thresh) { 8635d8ba66SChris Mason unsigned long flags; 8735d8ba66SChris Mason spin_lock_irqsave(&worker->workers->lock, flags); 8835d8ba66SChris Mason worker->idle = 0; 8935d8ba66SChris Mason list_move_tail(&worker->worker_list, 9035d8ba66SChris Mason &worker->workers->worker_list); 9135d8ba66SChris Mason spin_unlock_irqrestore(&worker->workers->lock, flags); 9235d8ba66SChris Mason } 9335d8ba66SChris Mason } 9435d8ba66SChris Mason 954a69a410SChris Mason static noinline int run_ordered_completions(struct btrfs_workers *workers, 964a69a410SChris Mason struct btrfs_work *work) 974a69a410SChris Mason { 984a69a410SChris Mason unsigned long flags; 994a69a410SChris Mason 1004a69a410SChris Mason if (!workers->ordered) 1014a69a410SChris Mason return 0; 1024a69a410SChris Mason 1034a69a410SChris Mason set_bit(WORK_DONE_BIT, &work->flags); 1044a69a410SChris Mason 1054a69a410SChris Mason spin_lock_irqsave(&workers->lock, flags); 1064a69a410SChris Mason 1074a69a410SChris Mason while(!list_empty(&workers->order_list)) { 1084a69a410SChris Mason work = list_entry(workers->order_list.next, 1094a69a410SChris Mason struct btrfs_work, order_list); 1104a69a410SChris Mason 1114a69a410SChris Mason if (!test_bit(WORK_DONE_BIT, &work->flags)) 1124a69a410SChris Mason break; 1134a69a410SChris Mason 1144a69a410SChris Mason /* we are going to call the ordered done function, but 1154a69a410SChris Mason * we leave the work item on the list as a barrier so 1164a69a410SChris Mason * that later work items that are done don't have their 1174a69a410SChris Mason * functions called before this one returns 1184a69a410SChris Mason */ 1194a69a410SChris Mason if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) 1204a69a410SChris Mason break; 1214a69a410SChris Mason 1224a69a410SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 1234a69a410SChris Mason 1244a69a410SChris Mason work->ordered_func(work); 1254a69a410SChris Mason 1264a69a410SChris Mason /* now take the lock again and call the freeing code */ 1274a69a410SChris Mason spin_lock_irqsave(&workers->lock, flags); 1284a69a410SChris Mason list_del(&work->order_list); 1294a69a410SChris Mason work->ordered_free(work); 1304a69a410SChris Mason } 1314a69a410SChris Mason 1324a69a410SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 1334a69a410SChris Mason return 0; 1344a69a410SChris Mason } 1354a69a410SChris Mason 13635d8ba66SChris Mason /* 1378b712842SChris Mason * main loop for servicing work items 1388b712842SChris Mason */ 1398b712842SChris Mason static int worker_loop(void *arg) 1408b712842SChris Mason { 1418b712842SChris Mason struct btrfs_worker_thread *worker = arg; 1428b712842SChris Mason struct list_head *cur; 1438b712842SChris Mason struct btrfs_work *work; 1448b712842SChris Mason do { 1458b712842SChris Mason spin_lock_irq(&worker->lock); 1468b712842SChris Mason while(!list_empty(&worker->pending)) { 1478b712842SChris Mason cur = worker->pending.next; 1488b712842SChris Mason work = list_entry(cur, struct btrfs_work, list); 1498b712842SChris Mason list_del(&work->list); 1504a69a410SChris Mason clear_bit(WORK_QUEUED_BIT, &work->flags); 1518b712842SChris Mason 1528b712842SChris Mason work->worker = worker; 1538b712842SChris Mason spin_unlock_irq(&worker->lock); 1548b712842SChris Mason 1558b712842SChris Mason work->func(work); 1568b712842SChris Mason 1578b712842SChris Mason atomic_dec(&worker->num_pending); 1584a69a410SChris Mason /* 1594a69a410SChris Mason * unless this is an ordered work queue, 1604a69a410SChris Mason * 'work' was probably freed by func above. 1614a69a410SChris Mason */ 1624a69a410SChris Mason run_ordered_completions(worker->workers, work); 1634a69a410SChris Mason 1648b712842SChris Mason spin_lock_irq(&worker->lock); 16535d8ba66SChris Mason check_idle_worker(worker); 1664a69a410SChris Mason 1678b712842SChris Mason } 1688b712842SChris Mason worker->working = 0; 1698b712842SChris Mason if (freezing(current)) { 1708b712842SChris Mason refrigerator(); 1718b712842SChris Mason } else { 1728b712842SChris Mason set_current_state(TASK_INTERRUPTIBLE); 1738b712842SChris Mason spin_unlock_irq(&worker->lock); 1748b712842SChris Mason schedule(); 1758b712842SChris Mason __set_current_state(TASK_RUNNING); 1768b712842SChris Mason } 1778b712842SChris Mason } while (!kthread_should_stop()); 1788b712842SChris Mason return 0; 1798b712842SChris Mason } 1808b712842SChris Mason 1818b712842SChris Mason /* 1828b712842SChris Mason * this will wait for all the worker threads to shutdown 1838b712842SChris Mason */ 1848b712842SChris Mason int btrfs_stop_workers(struct btrfs_workers *workers) 1858b712842SChris Mason { 1868b712842SChris Mason struct list_head *cur; 1878b712842SChris Mason struct btrfs_worker_thread *worker; 1888b712842SChris Mason 18935d8ba66SChris Mason list_splice_init(&workers->idle_list, &workers->worker_list); 1908b712842SChris Mason while(!list_empty(&workers->worker_list)) { 1918b712842SChris Mason cur = workers->worker_list.next; 1928b712842SChris Mason worker = list_entry(cur, struct btrfs_worker_thread, 1938b712842SChris Mason worker_list); 1948b712842SChris Mason kthread_stop(worker->task); 1958b712842SChris Mason list_del(&worker->worker_list); 1968b712842SChris Mason kfree(worker); 1978b712842SChris Mason } 1988b712842SChris Mason return 0; 1998b712842SChris Mason } 2008b712842SChris Mason 2018b712842SChris Mason /* 2028b712842SChris Mason * simple init on struct btrfs_workers 2038b712842SChris Mason */ 2045443be45SChris Mason void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) 2058b712842SChris Mason { 2068b712842SChris Mason workers->num_workers = 0; 2078b712842SChris Mason INIT_LIST_HEAD(&workers->worker_list); 20835d8ba66SChris Mason INIT_LIST_HEAD(&workers->idle_list); 2094a69a410SChris Mason INIT_LIST_HEAD(&workers->order_list); 2108b712842SChris Mason spin_lock_init(&workers->lock); 2118b712842SChris Mason workers->max_workers = max; 21261b49440SChris Mason workers->idle_thresh = 32; 2135443be45SChris Mason workers->name = name; 2144a69a410SChris Mason workers->ordered = 0; 2158b712842SChris Mason } 2168b712842SChris Mason 2178b712842SChris Mason /* 2188b712842SChris Mason * starts new worker threads. This does not enforce the max worker 2198b712842SChris Mason * count in case you need to temporarily go past it. 2208b712842SChris Mason */ 2218b712842SChris Mason int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 2228b712842SChris Mason { 2238b712842SChris Mason struct btrfs_worker_thread *worker; 2248b712842SChris Mason int ret = 0; 2258b712842SChris Mason int i; 2268b712842SChris Mason 2278b712842SChris Mason for (i = 0; i < num_workers; i++) { 2288b712842SChris Mason worker = kzalloc(sizeof(*worker), GFP_NOFS); 2298b712842SChris Mason if (!worker) { 2308b712842SChris Mason ret = -ENOMEM; 2318b712842SChris Mason goto fail; 2328b712842SChris Mason } 2338b712842SChris Mason 2348b712842SChris Mason INIT_LIST_HEAD(&worker->pending); 2358b712842SChris Mason INIT_LIST_HEAD(&worker->worker_list); 2368b712842SChris Mason spin_lock_init(&worker->lock); 2378b712842SChris Mason atomic_set(&worker->num_pending, 0); 2385443be45SChris Mason worker->task = kthread_run(worker_loop, worker, 2395443be45SChris Mason "btrfs-%s-%d", workers->name, 2405443be45SChris Mason workers->num_workers + i); 24135d8ba66SChris Mason worker->workers = workers; 2428b712842SChris Mason if (IS_ERR(worker->task)) { 2433bf10418SLi Zefan kfree(worker); 2448b712842SChris Mason ret = PTR_ERR(worker->task); 2458b712842SChris Mason goto fail; 2468b712842SChris Mason } 2478b712842SChris Mason 2488b712842SChris Mason spin_lock_irq(&workers->lock); 24935d8ba66SChris Mason list_add_tail(&worker->worker_list, &workers->idle_list); 2504854ddd0SChris Mason worker->idle = 1; 2518b712842SChris Mason workers->num_workers++; 2528b712842SChris Mason spin_unlock_irq(&workers->lock); 2538b712842SChris Mason } 2548b712842SChris Mason return 0; 2558b712842SChris Mason fail: 2568b712842SChris Mason btrfs_stop_workers(workers); 2578b712842SChris Mason return ret; 2588b712842SChris Mason } 2598b712842SChris Mason 2608b712842SChris Mason /* 2618b712842SChris Mason * run through the list and find a worker thread that doesn't have a lot 2628b712842SChris Mason * to do right now. This can return null if we aren't yet at the thread 2638b712842SChris Mason * count limit and all of the threads are busy. 2648b712842SChris Mason */ 2658b712842SChris Mason static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) 2668b712842SChris Mason { 2678b712842SChris Mason struct btrfs_worker_thread *worker; 2688b712842SChris Mason struct list_head *next; 2698b712842SChris Mason int enforce_min = workers->num_workers < workers->max_workers; 2708b712842SChris Mason 2718b712842SChris Mason /* 27235d8ba66SChris Mason * if we find an idle thread, don't move it to the end of the 27335d8ba66SChris Mason * idle list. This improves the chance that the next submission 27435d8ba66SChris Mason * will reuse the same thread, and maybe catch it while it is still 27535d8ba66SChris Mason * working 2768b712842SChris Mason */ 27735d8ba66SChris Mason if (!list_empty(&workers->idle_list)) { 27835d8ba66SChris Mason next = workers->idle_list.next; 2798b712842SChris Mason worker = list_entry(next, struct btrfs_worker_thread, 2808b712842SChris Mason worker_list); 28135d8ba66SChris Mason return worker; 2828b712842SChris Mason } 28335d8ba66SChris Mason if (enforce_min || list_empty(&workers->worker_list)) 2848b712842SChris Mason return NULL; 28535d8ba66SChris Mason 28635d8ba66SChris Mason /* 28735d8ba66SChris Mason * if we pick a busy task, move the task to the end of the list. 288d352ac68SChris Mason * hopefully this will keep things somewhat evenly balanced. 289d352ac68SChris Mason * Do the move in batches based on the sequence number. This groups 290d352ac68SChris Mason * requests submitted at roughly the same time onto the same worker. 29135d8ba66SChris Mason */ 29235d8ba66SChris Mason next = workers->worker_list.next; 29335d8ba66SChris Mason worker = list_entry(next, struct btrfs_worker_thread, worker_list); 2944854ddd0SChris Mason atomic_inc(&worker->num_pending); 2954854ddd0SChris Mason worker->sequence++; 296d352ac68SChris Mason 29753863232SChris Mason if (worker->sequence % workers->idle_thresh == 0) 29835d8ba66SChris Mason list_move_tail(next, &workers->worker_list); 2998b712842SChris Mason return worker; 3008b712842SChris Mason } 3018b712842SChris Mason 302d352ac68SChris Mason /* 303d352ac68SChris Mason * selects a worker thread to take the next job. This will either find 304d352ac68SChris Mason * an idle worker, start a new worker up to the max count, or just return 305d352ac68SChris Mason * one of the existing busy workers. 306d352ac68SChris Mason */ 3078b712842SChris Mason static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) 3088b712842SChris Mason { 3098b712842SChris Mason struct btrfs_worker_thread *worker; 3108b712842SChris Mason unsigned long flags; 3118b712842SChris Mason 3128b712842SChris Mason again: 3138b712842SChris Mason spin_lock_irqsave(&workers->lock, flags); 3148b712842SChris Mason worker = next_worker(workers); 3158b712842SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 3168b712842SChris Mason 3178b712842SChris Mason if (!worker) { 3188b712842SChris Mason spin_lock_irqsave(&workers->lock, flags); 3198b712842SChris Mason if (workers->num_workers >= workers->max_workers) { 32035d8ba66SChris Mason struct list_head *fallback = NULL; 3218b712842SChris Mason /* 3228b712842SChris Mason * we have failed to find any workers, just 3238b712842SChris Mason * return the force one 3248b712842SChris Mason */ 32535d8ba66SChris Mason if (!list_empty(&workers->worker_list)) 32635d8ba66SChris Mason fallback = workers->worker_list.next; 32735d8ba66SChris Mason if (!list_empty(&workers->idle_list)) 32835d8ba66SChris Mason fallback = workers->idle_list.next; 32935d8ba66SChris Mason BUG_ON(!fallback); 33035d8ba66SChris Mason worker = list_entry(fallback, 3318b712842SChris Mason struct btrfs_worker_thread, worker_list); 3328b712842SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 3338b712842SChris Mason } else { 3348b712842SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 3358b712842SChris Mason /* we're below the limit, start another worker */ 3368b712842SChris Mason btrfs_start_workers(workers, 1); 3378b712842SChris Mason goto again; 3388b712842SChris Mason } 3398b712842SChris Mason } 3408b712842SChris Mason return worker; 3418b712842SChris Mason } 3428b712842SChris Mason 3438b712842SChris Mason /* 3448b712842SChris Mason * btrfs_requeue_work just puts the work item back on the tail of the list 3458b712842SChris Mason * it was taken from. It is intended for use with long running work functions 3468b712842SChris Mason * that make some progress and want to give the cpu up for others. 3478b712842SChris Mason */ 3488b712842SChris Mason int btrfs_requeue_work(struct btrfs_work *work) 3498b712842SChris Mason { 3508b712842SChris Mason struct btrfs_worker_thread *worker = work->worker; 3518b712842SChris Mason unsigned long flags; 3528b712842SChris Mason 3534a69a410SChris Mason if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 3548b712842SChris Mason goto out; 3558b712842SChris Mason 3568b712842SChris Mason spin_lock_irqsave(&worker->lock, flags); 3578b712842SChris Mason atomic_inc(&worker->num_pending); 3588b712842SChris Mason list_add_tail(&work->list, &worker->pending); 35975ccf47dSChris Mason 36075ccf47dSChris Mason /* by definition we're busy, take ourselves off the idle 36175ccf47dSChris Mason * list 36275ccf47dSChris Mason */ 36375ccf47dSChris Mason if (worker->idle) { 36475ccf47dSChris Mason spin_lock_irqsave(&worker->workers->lock, flags); 36575ccf47dSChris Mason worker->idle = 0; 36675ccf47dSChris Mason list_move_tail(&worker->worker_list, 36775ccf47dSChris Mason &worker->workers->worker_list); 36875ccf47dSChris Mason spin_unlock_irqrestore(&worker->workers->lock, flags); 36975ccf47dSChris Mason } 37075ccf47dSChris Mason 3718b712842SChris Mason spin_unlock_irqrestore(&worker->lock, flags); 37275ccf47dSChris Mason 3738b712842SChris Mason out: 3748b712842SChris Mason return 0; 3758b712842SChris Mason } 3768b712842SChris Mason 3778b712842SChris Mason /* 3788b712842SChris Mason * places a struct btrfs_work into the pending queue of one of the kthreads 3798b712842SChris Mason */ 3808b712842SChris Mason int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) 3818b712842SChris Mason { 3828b712842SChris Mason struct btrfs_worker_thread *worker; 3838b712842SChris Mason unsigned long flags; 3848b712842SChris Mason int wake = 0; 3858b712842SChris Mason 3868b712842SChris Mason /* don't requeue something already on a list */ 3874a69a410SChris Mason if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 3888b712842SChris Mason goto out; 3898b712842SChris Mason 3908b712842SChris Mason worker = find_worker(workers); 3914a69a410SChris Mason if (workers->ordered) { 3924a69a410SChris Mason spin_lock_irqsave(&workers->lock, flags); 3934a69a410SChris Mason list_add_tail(&work->order_list, &workers->order_list); 3944a69a410SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 3954a69a410SChris Mason } else { 3964a69a410SChris Mason INIT_LIST_HEAD(&work->order_list); 3974a69a410SChris Mason } 3988b712842SChris Mason 3998b712842SChris Mason spin_lock_irqsave(&worker->lock, flags); 4008b712842SChris Mason atomic_inc(&worker->num_pending); 40135d8ba66SChris Mason check_busy_worker(worker); 4028b712842SChris Mason list_add_tail(&work->list, &worker->pending); 4038b712842SChris Mason 4048b712842SChris Mason /* 4058b712842SChris Mason * avoid calling into wake_up_process if this thread has already 4068b712842SChris Mason * been kicked 4078b712842SChris Mason */ 4088b712842SChris Mason if (!worker->working) 4098b712842SChris Mason wake = 1; 4108b712842SChris Mason worker->working = 1; 4118b712842SChris Mason 4128b712842SChris Mason spin_unlock_irqrestore(&worker->lock, flags); 4138b712842SChris Mason 4148b712842SChris Mason if (wake) 4158b712842SChris Mason wake_up_process(worker->task); 4168b712842SChris Mason out: 4178b712842SChris Mason return 0; 4188b712842SChris Mason } 419