18b712842SChris Mason /* 28b712842SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 308a9ff32SQu Wenruo * Copyright (C) 2014 Fujitsu. All rights reserved. 48b712842SChris Mason * 58b712842SChris Mason * This program is free software; you can redistribute it and/or 68b712842SChris Mason * modify it under the terms of the GNU General Public 78b712842SChris Mason * License v2 as published by the Free Software Foundation. 88b712842SChris Mason * 98b712842SChris Mason * This program is distributed in the hope that it will be useful, 108b712842SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 118b712842SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 128b712842SChris Mason * General Public License for more details. 138b712842SChris Mason * 148b712842SChris Mason * You should have received a copy of the GNU General Public 158b712842SChris Mason * License along with this program; if not, write to the 168b712842SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 178b712842SChris Mason * Boston, MA 021110-1307, USA. 188b712842SChris Mason */ 198b712842SChris Mason 208b712842SChris Mason #include <linux/kthread.h> 215a0e3ad6STejun Heo #include <linux/slab.h> 228b712842SChris Mason #include <linux/list.h> 238b712842SChris Mason #include <linux/spinlock.h> 248b712842SChris Mason #include <linux/freezer.h> 2508a9ff32SQu Wenruo #include <linux/workqueue.h> 268b712842SChris Mason #include "async-thread.h" 278b712842SChris Mason 284a69a410SChris Mason #define WORK_QUEUED_BIT 0 294a69a410SChris Mason #define WORK_DONE_BIT 1 304a69a410SChris Mason #define WORK_ORDER_DONE_BIT 2 31d313d7a3SChris Mason #define WORK_HIGH_PRIO_BIT 3 324a69a410SChris Mason 338b712842SChris Mason /* 348b712842SChris Mason * container for the kthread task pointer and the list of pending work 358b712842SChris Mason * One of these is allocated per thread. 368b712842SChris Mason */ 378b712842SChris Mason struct btrfs_worker_thread { 3835d8ba66SChris Mason /* pool we belong to */ 3935d8ba66SChris Mason struct btrfs_workers *workers; 4035d8ba66SChris Mason 418b712842SChris Mason /* list of struct btrfs_work that are waiting for service */ 428b712842SChris Mason struct list_head pending; 43d313d7a3SChris Mason struct list_head prio_pending; 448b712842SChris Mason 458b712842SChris Mason /* list of worker threads from struct btrfs_workers */ 468b712842SChris Mason struct list_head worker_list; 478b712842SChris Mason 488b712842SChris Mason /* kthread */ 498b712842SChris Mason struct task_struct *task; 508b712842SChris Mason 518b712842SChris Mason /* number of things on the pending list */ 528b712842SChris Mason atomic_t num_pending; 5353863232SChris Mason 549042846bSChris Mason /* reference counter for this struct */ 559042846bSChris Mason atomic_t refs; 569042846bSChris Mason 574854ddd0SChris Mason unsigned long sequence; 588b712842SChris Mason 598b712842SChris Mason /* protects the pending list. */ 608b712842SChris Mason spinlock_t lock; 618b712842SChris Mason 628b712842SChris Mason /* set to non-zero when this thread is already awake and kicking */ 638b712842SChris Mason int working; 6435d8ba66SChris Mason 6535d8ba66SChris Mason /* are we currently idle */ 6635d8ba66SChris Mason int idle; 678b712842SChris Mason }; 688b712842SChris Mason 690dc3b84aSJosef Bacik static int __btrfs_start_workers(struct btrfs_workers *workers); 700dc3b84aSJosef Bacik 718b712842SChris Mason /* 7261d92c32SChris Mason * btrfs_start_workers uses kthread_run, which can block waiting for memory 7361d92c32SChris Mason * for a very long time. It will actually throttle on page writeback, 7461d92c32SChris Mason * and so it may not make progress until after our btrfs worker threads 7561d92c32SChris Mason * process all of the pending work structs in their queue 7661d92c32SChris Mason * 7761d92c32SChris Mason * This means we can't use btrfs_start_workers from inside a btrfs worker 7861d92c32SChris Mason * thread that is used as part of cleaning dirty memory, which pretty much 7961d92c32SChris Mason * involves all of the worker threads. 8061d92c32SChris Mason * 8161d92c32SChris Mason * Instead we have a helper queue who never has more than one thread 8261d92c32SChris Mason * where we scheduler thread start operations. This worker_start struct 8361d92c32SChris Mason * is used to contain the work and hold a pointer to the queue that needs 8461d92c32SChris Mason * another worker. 8561d92c32SChris Mason */ 8661d92c32SChris Mason struct worker_start { 8761d92c32SChris Mason struct btrfs_work work; 8861d92c32SChris Mason struct btrfs_workers *queue; 8961d92c32SChris Mason }; 9061d92c32SChris Mason 9161d92c32SChris Mason static void start_new_worker_func(struct btrfs_work *work) 9261d92c32SChris Mason { 9361d92c32SChris Mason struct worker_start *start; 9461d92c32SChris Mason start = container_of(work, struct worker_start, work); 950dc3b84aSJosef Bacik __btrfs_start_workers(start->queue); 9661d92c32SChris Mason kfree(start); 9761d92c32SChris Mason } 9861d92c32SChris Mason 9961d92c32SChris Mason /* 10035d8ba66SChris Mason * helper function to move a thread onto the idle list after it 10135d8ba66SChris Mason * has finished some requests. 10235d8ba66SChris Mason */ 10335d8ba66SChris Mason static void check_idle_worker(struct btrfs_worker_thread *worker) 10435d8ba66SChris Mason { 10535d8ba66SChris Mason if (!worker->idle && atomic_read(&worker->num_pending) < 10635d8ba66SChris Mason worker->workers->idle_thresh / 2) { 10735d8ba66SChris Mason unsigned long flags; 10835d8ba66SChris Mason spin_lock_irqsave(&worker->workers->lock, flags); 10935d8ba66SChris Mason worker->idle = 1; 1103e99d8ebSChris Mason 1113e99d8ebSChris Mason /* the list may be empty if the worker is just starting */ 112964fb15aSIlya Dryomov if (!list_empty(&worker->worker_list) && 113964fb15aSIlya Dryomov !worker->workers->stopping) { 1143e99d8ebSChris Mason list_move(&worker->worker_list, 1153e99d8ebSChris Mason &worker->workers->idle_list); 1163e99d8ebSChris Mason } 11735d8ba66SChris Mason spin_unlock_irqrestore(&worker->workers->lock, flags); 11835d8ba66SChris Mason } 11935d8ba66SChris Mason } 12035d8ba66SChris Mason 12135d8ba66SChris Mason /* 12235d8ba66SChris Mason * helper function to move a thread off the idle list after new 12335d8ba66SChris Mason * pending work is added. 12435d8ba66SChris Mason */ 12535d8ba66SChris Mason static void check_busy_worker(struct btrfs_worker_thread *worker) 12635d8ba66SChris Mason { 12735d8ba66SChris Mason if (worker->idle && atomic_read(&worker->num_pending) >= 12835d8ba66SChris Mason worker->workers->idle_thresh) { 12935d8ba66SChris Mason unsigned long flags; 13035d8ba66SChris Mason spin_lock_irqsave(&worker->workers->lock, flags); 13135d8ba66SChris Mason worker->idle = 0; 1323e99d8ebSChris Mason 133964fb15aSIlya Dryomov if (!list_empty(&worker->worker_list) && 134964fb15aSIlya Dryomov !worker->workers->stopping) { 13535d8ba66SChris Mason list_move_tail(&worker->worker_list, 13635d8ba66SChris Mason &worker->workers->worker_list); 1373e99d8ebSChris Mason } 13835d8ba66SChris Mason spin_unlock_irqrestore(&worker->workers->lock, flags); 13935d8ba66SChris Mason } 14035d8ba66SChris Mason } 14135d8ba66SChris Mason 1429042846bSChris Mason static void check_pending_worker_creates(struct btrfs_worker_thread *worker) 1439042846bSChris Mason { 1449042846bSChris Mason struct btrfs_workers *workers = worker->workers; 1450dc3b84aSJosef Bacik struct worker_start *start; 1469042846bSChris Mason unsigned long flags; 1479042846bSChris Mason 1489042846bSChris Mason rmb(); 1499042846bSChris Mason if (!workers->atomic_start_pending) 1509042846bSChris Mason return; 1519042846bSChris Mason 1520dc3b84aSJosef Bacik start = kzalloc(sizeof(*start), GFP_NOFS); 1530dc3b84aSJosef Bacik if (!start) 1540dc3b84aSJosef Bacik return; 1550dc3b84aSJosef Bacik 1560dc3b84aSJosef Bacik start->work.func = start_new_worker_func; 1570dc3b84aSJosef Bacik start->queue = workers; 1580dc3b84aSJosef Bacik 1599042846bSChris Mason spin_lock_irqsave(&workers->lock, flags); 1609042846bSChris Mason if (!workers->atomic_start_pending) 1619042846bSChris Mason goto out; 1629042846bSChris Mason 1639042846bSChris Mason workers->atomic_start_pending = 0; 16461d92c32SChris Mason if (workers->num_workers + workers->num_workers_starting >= 16561d92c32SChris Mason workers->max_workers) 1669042846bSChris Mason goto out; 1679042846bSChris Mason 16861d92c32SChris Mason workers->num_workers_starting += 1; 1699042846bSChris Mason spin_unlock_irqrestore(&workers->lock, flags); 1700dc3b84aSJosef Bacik btrfs_queue_worker(workers->atomic_worker_start, &start->work); 1719042846bSChris Mason return; 1729042846bSChris Mason 1739042846bSChris Mason out: 1740dc3b84aSJosef Bacik kfree(start); 1759042846bSChris Mason spin_unlock_irqrestore(&workers->lock, flags); 1769042846bSChris Mason } 1779042846bSChris Mason 178143bede5SJeff Mahoney static noinline void run_ordered_completions(struct btrfs_workers *workers, 1794a69a410SChris Mason struct btrfs_work *work) 1804a69a410SChris Mason { 1814a69a410SChris Mason if (!workers->ordered) 182143bede5SJeff Mahoney return; 1834a69a410SChris Mason 1844a69a410SChris Mason set_bit(WORK_DONE_BIT, &work->flags); 1854a69a410SChris Mason 1864e3f9c50SChris Mason spin_lock(&workers->order_lock); 1874a69a410SChris Mason 188d313d7a3SChris Mason while (1) { 189d313d7a3SChris Mason if (!list_empty(&workers->prio_order_list)) { 190d313d7a3SChris Mason work = list_entry(workers->prio_order_list.next, 191d313d7a3SChris Mason struct btrfs_work, order_list); 192d313d7a3SChris Mason } else if (!list_empty(&workers->order_list)) { 1934a69a410SChris Mason work = list_entry(workers->order_list.next, 1944a69a410SChris Mason struct btrfs_work, order_list); 195d313d7a3SChris Mason } else { 196d313d7a3SChris Mason break; 197d313d7a3SChris Mason } 1984a69a410SChris Mason if (!test_bit(WORK_DONE_BIT, &work->flags)) 1994a69a410SChris Mason break; 2004a69a410SChris Mason 2014a69a410SChris Mason /* we are going to call the ordered done function, but 2024a69a410SChris Mason * we leave the work item on the list as a barrier so 2034a69a410SChris Mason * that later work items that are done don't have their 2044a69a410SChris Mason * functions called before this one returns 2054a69a410SChris Mason */ 2064a69a410SChris Mason if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) 2074a69a410SChris Mason break; 2084a69a410SChris Mason 2094e3f9c50SChris Mason spin_unlock(&workers->order_lock); 2104a69a410SChris Mason 2114a69a410SChris Mason work->ordered_func(work); 2124a69a410SChris Mason 213e9fbcb42SChris Mason /* now take the lock again and drop our item from the list */ 2144e3f9c50SChris Mason spin_lock(&workers->order_lock); 2154a69a410SChris Mason list_del(&work->order_list); 216e9fbcb42SChris Mason spin_unlock(&workers->order_lock); 217e9fbcb42SChris Mason 218e9fbcb42SChris Mason /* 219e9fbcb42SChris Mason * we don't want to call the ordered free functions 220e9fbcb42SChris Mason * with the lock held though 221e9fbcb42SChris Mason */ 2224a69a410SChris Mason work->ordered_free(work); 223e9fbcb42SChris Mason spin_lock(&workers->order_lock); 2244a69a410SChris Mason } 2254a69a410SChris Mason 2264e3f9c50SChris Mason spin_unlock(&workers->order_lock); 2274a69a410SChris Mason } 2284a69a410SChris Mason 2299042846bSChris Mason static void put_worker(struct btrfs_worker_thread *worker) 2309042846bSChris Mason { 2319042846bSChris Mason if (atomic_dec_and_test(&worker->refs)) 2329042846bSChris Mason kfree(worker); 2339042846bSChris Mason } 2349042846bSChris Mason 2359042846bSChris Mason static int try_worker_shutdown(struct btrfs_worker_thread *worker) 2369042846bSChris Mason { 2379042846bSChris Mason int freeit = 0; 2389042846bSChris Mason 2399042846bSChris Mason spin_lock_irq(&worker->lock); 240627e421aSChris Mason spin_lock(&worker->workers->lock); 2419042846bSChris Mason if (worker->workers->num_workers > 1 && 2429042846bSChris Mason worker->idle && 2439042846bSChris Mason !worker->working && 2449042846bSChris Mason !list_empty(&worker->worker_list) && 2459042846bSChris Mason list_empty(&worker->prio_pending) && 2466e74057cSChris Mason list_empty(&worker->pending) && 2476e74057cSChris Mason atomic_read(&worker->num_pending) == 0) { 2489042846bSChris Mason freeit = 1; 2499042846bSChris Mason list_del_init(&worker->worker_list); 2509042846bSChris Mason worker->workers->num_workers--; 2519042846bSChris Mason } 252627e421aSChris Mason spin_unlock(&worker->workers->lock); 2539042846bSChris Mason spin_unlock_irq(&worker->lock); 2549042846bSChris Mason 2559042846bSChris Mason if (freeit) 2569042846bSChris Mason put_worker(worker); 2579042846bSChris Mason return freeit; 2589042846bSChris Mason } 2599042846bSChris Mason 2604f878e84SChris Mason static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, 2614f878e84SChris Mason struct list_head *prio_head, 2624f878e84SChris Mason struct list_head *head) 2634f878e84SChris Mason { 2644f878e84SChris Mason struct btrfs_work *work = NULL; 2654f878e84SChris Mason struct list_head *cur = NULL; 2664f878e84SChris Mason 26751b98effSStanislaw Gruszka if (!list_empty(prio_head)) { 2684f878e84SChris Mason cur = prio_head->next; 26951b98effSStanislaw Gruszka goto out; 27051b98effSStanislaw Gruszka } 2714f878e84SChris Mason 2724f878e84SChris Mason smp_mb(); 2734f878e84SChris Mason if (!list_empty(&worker->prio_pending)) 2744f878e84SChris Mason goto refill; 2754f878e84SChris Mason 27651b98effSStanislaw Gruszka if (!list_empty(head)) { 2774f878e84SChris Mason cur = head->next; 2784f878e84SChris Mason goto out; 27951b98effSStanislaw Gruszka } 2804f878e84SChris Mason 2814f878e84SChris Mason refill: 2824f878e84SChris Mason spin_lock_irq(&worker->lock); 2834f878e84SChris Mason list_splice_tail_init(&worker->prio_pending, prio_head); 2844f878e84SChris Mason list_splice_tail_init(&worker->pending, head); 2854f878e84SChris Mason 2864f878e84SChris Mason if (!list_empty(prio_head)) 2874f878e84SChris Mason cur = prio_head->next; 2884f878e84SChris Mason else if (!list_empty(head)) 2894f878e84SChris Mason cur = head->next; 2904f878e84SChris Mason spin_unlock_irq(&worker->lock); 2914f878e84SChris Mason 2924f878e84SChris Mason if (!cur) 2934f878e84SChris Mason goto out_fail; 2944f878e84SChris Mason 2954f878e84SChris Mason out: 2964f878e84SChris Mason work = list_entry(cur, struct btrfs_work, list); 2974f878e84SChris Mason 2984f878e84SChris Mason out_fail: 2994f878e84SChris Mason return work; 3004f878e84SChris Mason } 3014f878e84SChris Mason 30235d8ba66SChris Mason /* 3038b712842SChris Mason * main loop for servicing work items 3048b712842SChris Mason */ 3058b712842SChris Mason static int worker_loop(void *arg) 3068b712842SChris Mason { 3078b712842SChris Mason struct btrfs_worker_thread *worker = arg; 3084f878e84SChris Mason struct list_head head; 3094f878e84SChris Mason struct list_head prio_head; 3108b712842SChris Mason struct btrfs_work *work; 3114f878e84SChris Mason 3124f878e84SChris Mason INIT_LIST_HEAD(&head); 3134f878e84SChris Mason INIT_LIST_HEAD(&prio_head); 3144f878e84SChris Mason 3158b712842SChris Mason do { 3164f878e84SChris Mason again: 317d313d7a3SChris Mason while (1) { 3184f878e84SChris Mason 3194f878e84SChris Mason 3204f878e84SChris Mason work = get_next_work(worker, &prio_head, &head); 3214f878e84SChris Mason if (!work) 322d313d7a3SChris Mason break; 323d313d7a3SChris Mason 3248b712842SChris Mason list_del(&work->list); 3254a69a410SChris Mason clear_bit(WORK_QUEUED_BIT, &work->flags); 3268b712842SChris Mason 3278b712842SChris Mason work->worker = worker; 3288b712842SChris Mason 3298b712842SChris Mason work->func(work); 3308b712842SChris Mason 3318b712842SChris Mason atomic_dec(&worker->num_pending); 3324a69a410SChris Mason /* 3334a69a410SChris Mason * unless this is an ordered work queue, 3344a69a410SChris Mason * 'work' was probably freed by func above. 3354a69a410SChris Mason */ 3364a69a410SChris Mason run_ordered_completions(worker->workers, work); 3374a69a410SChris Mason 3389042846bSChris Mason check_pending_worker_creates(worker); 3398f3b65a3SChris Mason cond_resched(); 3404f878e84SChris Mason } 3414f878e84SChris Mason 3428b712842SChris Mason spin_lock_irq(&worker->lock); 34335d8ba66SChris Mason check_idle_worker(worker); 3444f878e84SChris Mason 3458b712842SChris Mason if (freezing(current)) { 346b51912c9SChris Mason worker->working = 0; 347b51912c9SChris Mason spin_unlock_irq(&worker->lock); 348a0acae0eSTejun Heo try_to_freeze(); 3498b712842SChris Mason } else { 3508b712842SChris Mason spin_unlock_irq(&worker->lock); 351b51912c9SChris Mason if (!kthread_should_stop()) { 352b51912c9SChris Mason cpu_relax(); 353b51912c9SChris Mason /* 354b51912c9SChris Mason * we've dropped the lock, did someone else 355b51912c9SChris Mason * jump_in? 356b51912c9SChris Mason */ 357b51912c9SChris Mason smp_mb(); 358d313d7a3SChris Mason if (!list_empty(&worker->pending) || 359d313d7a3SChris Mason !list_empty(&worker->prio_pending)) 360b51912c9SChris Mason continue; 361b51912c9SChris Mason 362b51912c9SChris Mason /* 363b51912c9SChris Mason * this short schedule allows more work to 364b51912c9SChris Mason * come in without the queue functions 365b51912c9SChris Mason * needing to go through wake_up_process() 366b51912c9SChris Mason * 367b51912c9SChris Mason * worker->working is still 1, so nobody 368b51912c9SChris Mason * is going to try and wake us up 369b51912c9SChris Mason */ 370b51912c9SChris Mason schedule_timeout(1); 371b51912c9SChris Mason smp_mb(); 372d313d7a3SChris Mason if (!list_empty(&worker->pending) || 373d313d7a3SChris Mason !list_empty(&worker->prio_pending)) 374b51912c9SChris Mason continue; 375b51912c9SChris Mason 376b5555f77SAmit Gud if (kthread_should_stop()) 377b5555f77SAmit Gud break; 378b5555f77SAmit Gud 379b51912c9SChris Mason /* still no more work?, sleep for real */ 380b51912c9SChris Mason spin_lock_irq(&worker->lock); 381b51912c9SChris Mason set_current_state(TASK_INTERRUPTIBLE); 382d313d7a3SChris Mason if (!list_empty(&worker->pending) || 3834f878e84SChris Mason !list_empty(&worker->prio_pending)) { 3844f878e84SChris Mason spin_unlock_irq(&worker->lock); 385ed3b3d31SChris Mason set_current_state(TASK_RUNNING); 3864f878e84SChris Mason goto again; 3874f878e84SChris Mason } 388b51912c9SChris Mason 389b51912c9SChris Mason /* 390b51912c9SChris Mason * this makes sure we get a wakeup when someone 391b51912c9SChris Mason * adds something new to the queue 392b51912c9SChris Mason */ 393b51912c9SChris Mason worker->working = 0; 394b51912c9SChris Mason spin_unlock_irq(&worker->lock); 395b51912c9SChris Mason 3969042846bSChris Mason if (!kthread_should_stop()) { 3979042846bSChris Mason schedule_timeout(HZ * 120); 3989042846bSChris Mason if (!worker->working && 3999042846bSChris Mason try_worker_shutdown(worker)) { 4009042846bSChris Mason return 0; 4019042846bSChris Mason } 4029042846bSChris Mason } 403b51912c9SChris Mason } 4048b712842SChris Mason __set_current_state(TASK_RUNNING); 4058b712842SChris Mason } 4068b712842SChris Mason } while (!kthread_should_stop()); 4078b712842SChris Mason return 0; 4088b712842SChris Mason } 4098b712842SChris Mason 4108b712842SChris Mason /* 4118b712842SChris Mason * this will wait for all the worker threads to shutdown 4128b712842SChris Mason */ 413143bede5SJeff Mahoney void btrfs_stop_workers(struct btrfs_workers *workers) 4148b712842SChris Mason { 4158b712842SChris Mason struct list_head *cur; 4168b712842SChris Mason struct btrfs_worker_thread *worker; 4179042846bSChris Mason int can_stop; 4188b712842SChris Mason 4199042846bSChris Mason spin_lock_irq(&workers->lock); 420964fb15aSIlya Dryomov workers->stopping = 1; 42135d8ba66SChris Mason list_splice_init(&workers->idle_list, &workers->worker_list); 4228b712842SChris Mason while (!list_empty(&workers->worker_list)) { 4238b712842SChris Mason cur = workers->worker_list.next; 4248b712842SChris Mason worker = list_entry(cur, struct btrfs_worker_thread, 4258b712842SChris Mason worker_list); 4269042846bSChris Mason 4279042846bSChris Mason atomic_inc(&worker->refs); 4289042846bSChris Mason workers->num_workers -= 1; 4299042846bSChris Mason if (!list_empty(&worker->worker_list)) { 4309042846bSChris Mason list_del_init(&worker->worker_list); 4319042846bSChris Mason put_worker(worker); 4329042846bSChris Mason can_stop = 1; 4339042846bSChris Mason } else 4349042846bSChris Mason can_stop = 0; 4359042846bSChris Mason spin_unlock_irq(&workers->lock); 4369042846bSChris Mason if (can_stop) 4378b712842SChris Mason kthread_stop(worker->task); 4389042846bSChris Mason spin_lock_irq(&workers->lock); 4399042846bSChris Mason put_worker(worker); 4408b712842SChris Mason } 4419042846bSChris Mason spin_unlock_irq(&workers->lock); 4428b712842SChris Mason } 4438b712842SChris Mason 4448b712842SChris Mason /* 4458b712842SChris Mason * simple init on struct btrfs_workers 4468b712842SChris Mason */ 44761d92c32SChris Mason void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 44861d92c32SChris Mason struct btrfs_workers *async_helper) 4498b712842SChris Mason { 4508b712842SChris Mason workers->num_workers = 0; 45161d92c32SChris Mason workers->num_workers_starting = 0; 4528b712842SChris Mason INIT_LIST_HEAD(&workers->worker_list); 45335d8ba66SChris Mason INIT_LIST_HEAD(&workers->idle_list); 4544a69a410SChris Mason INIT_LIST_HEAD(&workers->order_list); 455d313d7a3SChris Mason INIT_LIST_HEAD(&workers->prio_order_list); 4568b712842SChris Mason spin_lock_init(&workers->lock); 4574e3f9c50SChris Mason spin_lock_init(&workers->order_lock); 4588b712842SChris Mason workers->max_workers = max; 45961b49440SChris Mason workers->idle_thresh = 32; 4605443be45SChris Mason workers->name = name; 4614a69a410SChris Mason workers->ordered = 0; 4629042846bSChris Mason workers->atomic_start_pending = 0; 46361d92c32SChris Mason workers->atomic_worker_start = async_helper; 464964fb15aSIlya Dryomov workers->stopping = 0; 4658b712842SChris Mason } 4668b712842SChris Mason 4678b712842SChris Mason /* 4688b712842SChris Mason * starts new worker threads. This does not enforce the max worker 4698b712842SChris Mason * count in case you need to temporarily go past it. 4708b712842SChris Mason */ 4710dc3b84aSJosef Bacik static int __btrfs_start_workers(struct btrfs_workers *workers) 4728b712842SChris Mason { 4738b712842SChris Mason struct btrfs_worker_thread *worker; 4748b712842SChris Mason int ret = 0; 4758b712842SChris Mason 4768b712842SChris Mason worker = kzalloc(sizeof(*worker), GFP_NOFS); 4778b712842SChris Mason if (!worker) { 4788b712842SChris Mason ret = -ENOMEM; 4798b712842SChris Mason goto fail; 4808b712842SChris Mason } 4818b712842SChris Mason 4828b712842SChris Mason INIT_LIST_HEAD(&worker->pending); 483d313d7a3SChris Mason INIT_LIST_HEAD(&worker->prio_pending); 4848b712842SChris Mason INIT_LIST_HEAD(&worker->worker_list); 4858b712842SChris Mason spin_lock_init(&worker->lock); 4864e3f9c50SChris Mason 4878b712842SChris Mason atomic_set(&worker->num_pending, 0); 4889042846bSChris Mason atomic_set(&worker->refs, 1); 489fd0fb038SShin Hong worker->workers = workers; 490964fb15aSIlya Dryomov worker->task = kthread_create(worker_loop, worker, 4915443be45SChris Mason "btrfs-%s-%d", workers->name, 4920dc3b84aSJosef Bacik workers->num_workers + 1); 4938b712842SChris Mason if (IS_ERR(worker->task)) { 4948b712842SChris Mason ret = PTR_ERR(worker->task); 4958b712842SChris Mason goto fail; 4968b712842SChris Mason } 497964fb15aSIlya Dryomov 4988b712842SChris Mason spin_lock_irq(&workers->lock); 499964fb15aSIlya Dryomov if (workers->stopping) { 500964fb15aSIlya Dryomov spin_unlock_irq(&workers->lock); 501ba69994aSIlya Dryomov ret = -EINVAL; 502964fb15aSIlya Dryomov goto fail_kthread; 503964fb15aSIlya Dryomov } 50435d8ba66SChris Mason list_add_tail(&worker->worker_list, &workers->idle_list); 5054854ddd0SChris Mason worker->idle = 1; 5068b712842SChris Mason workers->num_workers++; 50761d92c32SChris Mason workers->num_workers_starting--; 50861d92c32SChris Mason WARN_ON(workers->num_workers_starting < 0); 5098b712842SChris Mason spin_unlock_irq(&workers->lock); 5100dc3b84aSJosef Bacik 511964fb15aSIlya Dryomov wake_up_process(worker->task); 5128b712842SChris Mason return 0; 513964fb15aSIlya Dryomov 514964fb15aSIlya Dryomov fail_kthread: 515964fb15aSIlya Dryomov kthread_stop(worker->task); 5168b712842SChris Mason fail: 517964fb15aSIlya Dryomov kfree(worker); 5180dc3b84aSJosef Bacik spin_lock_irq(&workers->lock); 5190dc3b84aSJosef Bacik workers->num_workers_starting--; 5200dc3b84aSJosef Bacik spin_unlock_irq(&workers->lock); 5218b712842SChris Mason return ret; 5228b712842SChris Mason } 5238b712842SChris Mason 5240dc3b84aSJosef Bacik int btrfs_start_workers(struct btrfs_workers *workers) 52561d92c32SChris Mason { 52661d92c32SChris Mason spin_lock_irq(&workers->lock); 5270dc3b84aSJosef Bacik workers->num_workers_starting++; 52861d92c32SChris Mason spin_unlock_irq(&workers->lock); 5290dc3b84aSJosef Bacik return __btrfs_start_workers(workers); 53061d92c32SChris Mason } 53161d92c32SChris Mason 5328b712842SChris Mason /* 5338b712842SChris Mason * run through the list and find a worker thread that doesn't have a lot 5348b712842SChris Mason * to do right now. This can return null if we aren't yet at the thread 5358b712842SChris Mason * count limit and all of the threads are busy. 5368b712842SChris Mason */ 5378b712842SChris Mason static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) 5388b712842SChris Mason { 5398b712842SChris Mason struct btrfs_worker_thread *worker; 5408b712842SChris Mason struct list_head *next; 54161d92c32SChris Mason int enforce_min; 54261d92c32SChris Mason 54361d92c32SChris Mason enforce_min = (workers->num_workers + workers->num_workers_starting) < 54461d92c32SChris Mason workers->max_workers; 5458b712842SChris Mason 5468b712842SChris Mason /* 54735d8ba66SChris Mason * if we find an idle thread, don't move it to the end of the 54835d8ba66SChris Mason * idle list. This improves the chance that the next submission 54935d8ba66SChris Mason * will reuse the same thread, and maybe catch it while it is still 55035d8ba66SChris Mason * working 5518b712842SChris Mason */ 55235d8ba66SChris Mason if (!list_empty(&workers->idle_list)) { 55335d8ba66SChris Mason next = workers->idle_list.next; 5548b712842SChris Mason worker = list_entry(next, struct btrfs_worker_thread, 5558b712842SChris Mason worker_list); 55635d8ba66SChris Mason return worker; 5578b712842SChris Mason } 55835d8ba66SChris Mason if (enforce_min || list_empty(&workers->worker_list)) 5598b712842SChris Mason return NULL; 56035d8ba66SChris Mason 56135d8ba66SChris Mason /* 56235d8ba66SChris Mason * if we pick a busy task, move the task to the end of the list. 563d352ac68SChris Mason * hopefully this will keep things somewhat evenly balanced. 564d352ac68SChris Mason * Do the move in batches based on the sequence number. This groups 565d352ac68SChris Mason * requests submitted at roughly the same time onto the same worker. 56635d8ba66SChris Mason */ 56735d8ba66SChris Mason next = workers->worker_list.next; 56835d8ba66SChris Mason worker = list_entry(next, struct btrfs_worker_thread, worker_list); 5694854ddd0SChris Mason worker->sequence++; 570d352ac68SChris Mason 57153863232SChris Mason if (worker->sequence % workers->idle_thresh == 0) 57235d8ba66SChris Mason list_move_tail(next, &workers->worker_list); 5738b712842SChris Mason return worker; 5748b712842SChris Mason } 5758b712842SChris Mason 576d352ac68SChris Mason /* 577d352ac68SChris Mason * selects a worker thread to take the next job. This will either find 578d352ac68SChris Mason * an idle worker, start a new worker up to the max count, or just return 579d352ac68SChris Mason * one of the existing busy workers. 580d352ac68SChris Mason */ 5818b712842SChris Mason static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) 5828b712842SChris Mason { 5838b712842SChris Mason struct btrfs_worker_thread *worker; 5848b712842SChris Mason unsigned long flags; 5859042846bSChris Mason struct list_head *fallback; 5860dc3b84aSJosef Bacik int ret; 5878b712842SChris Mason 5888b712842SChris Mason spin_lock_irqsave(&workers->lock, flags); 5898d532b2aSChris Mason again: 5908b712842SChris Mason worker = next_worker(workers); 5918b712842SChris Mason 5928b712842SChris Mason if (!worker) { 59361d92c32SChris Mason if (workers->num_workers + workers->num_workers_starting >= 59461d92c32SChris Mason workers->max_workers) { 5959042846bSChris Mason goto fallback; 5969042846bSChris Mason } else if (workers->atomic_worker_start) { 5979042846bSChris Mason workers->atomic_start_pending = 1; 5989042846bSChris Mason goto fallback; 5999042846bSChris Mason } else { 60061d92c32SChris Mason workers->num_workers_starting++; 6019042846bSChris Mason spin_unlock_irqrestore(&workers->lock, flags); 6029042846bSChris Mason /* we're below the limit, start another worker */ 6030dc3b84aSJosef Bacik ret = __btrfs_start_workers(workers); 6048d532b2aSChris Mason spin_lock_irqsave(&workers->lock, flags); 6050dc3b84aSJosef Bacik if (ret) 6060dc3b84aSJosef Bacik goto fallback; 6079042846bSChris Mason goto again; 6089042846bSChris Mason } 6099042846bSChris Mason } 6106e74057cSChris Mason goto found; 6119042846bSChris Mason 6129042846bSChris Mason fallback: 6139042846bSChris Mason fallback = NULL; 6148b712842SChris Mason /* 6158b712842SChris Mason * we have failed to find any workers, just 6169042846bSChris Mason * return the first one we can find. 6178b712842SChris Mason */ 61835d8ba66SChris Mason if (!list_empty(&workers->worker_list)) 61935d8ba66SChris Mason fallback = workers->worker_list.next; 62035d8ba66SChris Mason if (!list_empty(&workers->idle_list)) 62135d8ba66SChris Mason fallback = workers->idle_list.next; 62235d8ba66SChris Mason BUG_ON(!fallback); 62335d8ba66SChris Mason worker = list_entry(fallback, 6248b712842SChris Mason struct btrfs_worker_thread, worker_list); 6256e74057cSChris Mason found: 6266e74057cSChris Mason /* 6276e74057cSChris Mason * this makes sure the worker doesn't exit before it is placed 6286e74057cSChris Mason * onto a busy/idle list 6296e74057cSChris Mason */ 6306e74057cSChris Mason atomic_inc(&worker->num_pending); 6318b712842SChris Mason spin_unlock_irqrestore(&workers->lock, flags); 6328b712842SChris Mason return worker; 6338b712842SChris Mason } 6348b712842SChris Mason 6358b712842SChris Mason /* 6368b712842SChris Mason * btrfs_requeue_work just puts the work item back on the tail of the list 6378b712842SChris Mason * it was taken from. It is intended for use with long running work functions 6388b712842SChris Mason * that make some progress and want to give the cpu up for others. 6398b712842SChris Mason */ 640143bede5SJeff Mahoney void btrfs_requeue_work(struct btrfs_work *work) 6418b712842SChris Mason { 6428b712842SChris Mason struct btrfs_worker_thread *worker = work->worker; 6438b712842SChris Mason unsigned long flags; 644a6837051SChris Mason int wake = 0; 6458b712842SChris Mason 6464a69a410SChris Mason if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 647143bede5SJeff Mahoney return; 6488b712842SChris Mason 6498b712842SChris Mason spin_lock_irqsave(&worker->lock, flags); 650d313d7a3SChris Mason if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) 651d313d7a3SChris Mason list_add_tail(&work->list, &worker->prio_pending); 652d313d7a3SChris Mason else 6538b712842SChris Mason list_add_tail(&work->list, &worker->pending); 654b51912c9SChris Mason atomic_inc(&worker->num_pending); 65575ccf47dSChris Mason 65675ccf47dSChris Mason /* by definition we're busy, take ourselves off the idle 65775ccf47dSChris Mason * list 65875ccf47dSChris Mason */ 65975ccf47dSChris Mason if (worker->idle) { 66029c5e8ceSJulia Lawall spin_lock(&worker->workers->lock); 66175ccf47dSChris Mason worker->idle = 0; 66275ccf47dSChris Mason list_move_tail(&worker->worker_list, 66375ccf47dSChris Mason &worker->workers->worker_list); 66429c5e8ceSJulia Lawall spin_unlock(&worker->workers->lock); 66575ccf47dSChris Mason } 666a6837051SChris Mason if (!worker->working) { 667a6837051SChris Mason wake = 1; 668a6837051SChris Mason worker->working = 1; 669a6837051SChris Mason } 67075ccf47dSChris Mason 671a6837051SChris Mason if (wake) 672a6837051SChris Mason wake_up_process(worker->task); 6739042846bSChris Mason spin_unlock_irqrestore(&worker->lock, flags); 6748b712842SChris Mason } 6758b712842SChris Mason 676d313d7a3SChris Mason void btrfs_set_work_high_prio(struct btrfs_work *work) 677d313d7a3SChris Mason { 678d313d7a3SChris Mason set_bit(WORK_HIGH_PRIO_BIT, &work->flags); 679d313d7a3SChris Mason } 680d313d7a3SChris Mason 6818b712842SChris Mason /* 6828b712842SChris Mason * places a struct btrfs_work into the pending queue of one of the kthreads 6838b712842SChris Mason */ 6840dc3b84aSJosef Bacik void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) 6858b712842SChris Mason { 6868b712842SChris Mason struct btrfs_worker_thread *worker; 6878b712842SChris Mason unsigned long flags; 6888b712842SChris Mason int wake = 0; 6898b712842SChris Mason 6908b712842SChris Mason /* don't requeue something already on a list */ 6914a69a410SChris Mason if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 6920dc3b84aSJosef Bacik return; 6938b712842SChris Mason 6948b712842SChris Mason worker = find_worker(workers); 6954a69a410SChris Mason if (workers->ordered) { 6964e3f9c50SChris Mason /* 6974e3f9c50SChris Mason * you're not allowed to do ordered queues from an 6984e3f9c50SChris Mason * interrupt handler 6994e3f9c50SChris Mason */ 7004e3f9c50SChris Mason spin_lock(&workers->order_lock); 701d313d7a3SChris Mason if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { 702d313d7a3SChris Mason list_add_tail(&work->order_list, 703d313d7a3SChris Mason &workers->prio_order_list); 704d313d7a3SChris Mason } else { 7054a69a410SChris Mason list_add_tail(&work->order_list, &workers->order_list); 706d313d7a3SChris Mason } 7074e3f9c50SChris Mason spin_unlock(&workers->order_lock); 7084a69a410SChris Mason } else { 7094a69a410SChris Mason INIT_LIST_HEAD(&work->order_list); 7104a69a410SChris Mason } 7118b712842SChris Mason 7128b712842SChris Mason spin_lock_irqsave(&worker->lock, flags); 713a6837051SChris Mason 714d313d7a3SChris Mason if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) 715d313d7a3SChris Mason list_add_tail(&work->list, &worker->prio_pending); 716d313d7a3SChris Mason else 717b51912c9SChris Mason list_add_tail(&work->list, &worker->pending); 71835d8ba66SChris Mason check_busy_worker(worker); 7198b712842SChris Mason 7208b712842SChris Mason /* 7218b712842SChris Mason * avoid calling into wake_up_process if this thread has already 7228b712842SChris Mason * been kicked 7238b712842SChris Mason */ 7248b712842SChris Mason if (!worker->working) 7258b712842SChris Mason wake = 1; 7268b712842SChris Mason worker->working = 1; 7278b712842SChris Mason 7288b712842SChris Mason if (wake) 7298b712842SChris Mason wake_up_process(worker->task); 7309042846bSChris Mason spin_unlock_irqrestore(&worker->lock, flags); 7318b712842SChris Mason } 73208a9ff32SQu Wenruo 73308a9ff32SQu Wenruo struct btrfs_workqueue_struct { 73408a9ff32SQu Wenruo struct workqueue_struct *normal_wq; 73508a9ff32SQu Wenruo /* List head pointing to ordered work list */ 73608a9ff32SQu Wenruo struct list_head ordered_list; 73708a9ff32SQu Wenruo 73808a9ff32SQu Wenruo /* Spinlock for ordered_list */ 73908a9ff32SQu Wenruo spinlock_t list_lock; 74008a9ff32SQu Wenruo }; 74108a9ff32SQu Wenruo 74208a9ff32SQu Wenruo struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name, 74308a9ff32SQu Wenruo int flags, 74408a9ff32SQu Wenruo int max_active) 74508a9ff32SQu Wenruo { 74608a9ff32SQu Wenruo struct btrfs_workqueue_struct *ret = kzalloc(sizeof(*ret), GFP_NOFS); 74708a9ff32SQu Wenruo 74808a9ff32SQu Wenruo if (unlikely(!ret)) 74908a9ff32SQu Wenruo return NULL; 75008a9ff32SQu Wenruo 75108a9ff32SQu Wenruo ret->normal_wq = alloc_workqueue("%s-%s", flags, max_active, 75208a9ff32SQu Wenruo "btrfs", name); 75308a9ff32SQu Wenruo if (unlikely(!ret->normal_wq)) { 75408a9ff32SQu Wenruo kfree(ret); 75508a9ff32SQu Wenruo return NULL; 75608a9ff32SQu Wenruo } 75708a9ff32SQu Wenruo 75808a9ff32SQu Wenruo INIT_LIST_HEAD(&ret->ordered_list); 75908a9ff32SQu Wenruo spin_lock_init(&ret->list_lock); 76008a9ff32SQu Wenruo return ret; 76108a9ff32SQu Wenruo } 76208a9ff32SQu Wenruo 76308a9ff32SQu Wenruo static void run_ordered_work(struct btrfs_workqueue_struct *wq) 76408a9ff32SQu Wenruo { 76508a9ff32SQu Wenruo struct list_head *list = &wq->ordered_list; 76608a9ff32SQu Wenruo struct btrfs_work_struct *work; 76708a9ff32SQu Wenruo spinlock_t *lock = &wq->list_lock; 76808a9ff32SQu Wenruo unsigned long flags; 76908a9ff32SQu Wenruo 77008a9ff32SQu Wenruo while (1) { 77108a9ff32SQu Wenruo spin_lock_irqsave(lock, flags); 77208a9ff32SQu Wenruo if (list_empty(list)) 77308a9ff32SQu Wenruo break; 77408a9ff32SQu Wenruo work = list_entry(list->next, struct btrfs_work_struct, 77508a9ff32SQu Wenruo ordered_list); 77608a9ff32SQu Wenruo if (!test_bit(WORK_DONE_BIT, &work->flags)) 77708a9ff32SQu Wenruo break; 77808a9ff32SQu Wenruo 77908a9ff32SQu Wenruo /* 78008a9ff32SQu Wenruo * we are going to call the ordered done function, but 78108a9ff32SQu Wenruo * we leave the work item on the list as a barrier so 78208a9ff32SQu Wenruo * that later work items that are done don't have their 78308a9ff32SQu Wenruo * functions called before this one returns 78408a9ff32SQu Wenruo */ 78508a9ff32SQu Wenruo if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) 78608a9ff32SQu Wenruo break; 78708a9ff32SQu Wenruo spin_unlock_irqrestore(lock, flags); 78808a9ff32SQu Wenruo work->ordered_func(work); 78908a9ff32SQu Wenruo 79008a9ff32SQu Wenruo /* now take the lock again and drop our item from the list */ 79108a9ff32SQu Wenruo spin_lock_irqsave(lock, flags); 79208a9ff32SQu Wenruo list_del(&work->ordered_list); 79308a9ff32SQu Wenruo spin_unlock_irqrestore(lock, flags); 79408a9ff32SQu Wenruo 79508a9ff32SQu Wenruo /* 79608a9ff32SQu Wenruo * we don't want to call the ordered free functions 79708a9ff32SQu Wenruo * with the lock held though 79808a9ff32SQu Wenruo */ 79908a9ff32SQu Wenruo work->ordered_free(work); 80008a9ff32SQu Wenruo } 80108a9ff32SQu Wenruo spin_unlock_irqrestore(lock, flags); 80208a9ff32SQu Wenruo } 80308a9ff32SQu Wenruo 80408a9ff32SQu Wenruo static void normal_work_helper(struct work_struct *arg) 80508a9ff32SQu Wenruo { 80608a9ff32SQu Wenruo struct btrfs_work_struct *work; 80708a9ff32SQu Wenruo struct btrfs_workqueue_struct *wq; 80808a9ff32SQu Wenruo int need_order = 0; 80908a9ff32SQu Wenruo 81008a9ff32SQu Wenruo work = container_of(arg, struct btrfs_work_struct, normal_work); 81108a9ff32SQu Wenruo /* 81208a9ff32SQu Wenruo * We should not touch things inside work in the following cases: 81308a9ff32SQu Wenruo * 1) after work->func() if it has no ordered_free 81408a9ff32SQu Wenruo * Since the struct is freed in work->func(). 81508a9ff32SQu Wenruo * 2) after setting WORK_DONE_BIT 81608a9ff32SQu Wenruo * The work may be freed in other threads almost instantly. 81708a9ff32SQu Wenruo * So we save the needed things here. 81808a9ff32SQu Wenruo */ 81908a9ff32SQu Wenruo if (work->ordered_func) 82008a9ff32SQu Wenruo need_order = 1; 82108a9ff32SQu Wenruo wq = work->wq; 82208a9ff32SQu Wenruo 82308a9ff32SQu Wenruo work->func(work); 82408a9ff32SQu Wenruo if (need_order) { 82508a9ff32SQu Wenruo set_bit(WORK_DONE_BIT, &work->flags); 82608a9ff32SQu Wenruo run_ordered_work(wq); 82708a9ff32SQu Wenruo } 82808a9ff32SQu Wenruo } 82908a9ff32SQu Wenruo 83008a9ff32SQu Wenruo void btrfs_init_work(struct btrfs_work_struct *work, 83108a9ff32SQu Wenruo void (*func)(struct btrfs_work_struct *), 83208a9ff32SQu Wenruo void (*ordered_func)(struct btrfs_work_struct *), 83308a9ff32SQu Wenruo void (*ordered_free)(struct btrfs_work_struct *)) 83408a9ff32SQu Wenruo { 83508a9ff32SQu Wenruo work->func = func; 83608a9ff32SQu Wenruo work->ordered_func = ordered_func; 83708a9ff32SQu Wenruo work->ordered_free = ordered_free; 83808a9ff32SQu Wenruo INIT_WORK(&work->normal_work, normal_work_helper); 83908a9ff32SQu Wenruo INIT_LIST_HEAD(&work->ordered_list); 84008a9ff32SQu Wenruo work->flags = 0; 84108a9ff32SQu Wenruo } 84208a9ff32SQu Wenruo 84308a9ff32SQu Wenruo void btrfs_queue_work(struct btrfs_workqueue_struct *wq, 84408a9ff32SQu Wenruo struct btrfs_work_struct *work) 84508a9ff32SQu Wenruo { 84608a9ff32SQu Wenruo unsigned long flags; 84708a9ff32SQu Wenruo 84808a9ff32SQu Wenruo work->wq = wq; 84908a9ff32SQu Wenruo if (work->ordered_func) { 85008a9ff32SQu Wenruo spin_lock_irqsave(&wq->list_lock, flags); 85108a9ff32SQu Wenruo list_add_tail(&work->ordered_list, &wq->ordered_list); 85208a9ff32SQu Wenruo spin_unlock_irqrestore(&wq->list_lock, flags); 85308a9ff32SQu Wenruo } 85408a9ff32SQu Wenruo queue_work(wq->normal_wq, &work->normal_work); 85508a9ff32SQu Wenruo } 85608a9ff32SQu Wenruo 85708a9ff32SQu Wenruo void btrfs_destroy_workqueue(struct btrfs_workqueue_struct *wq) 85808a9ff32SQu Wenruo { 85908a9ff32SQu Wenruo destroy_workqueue(wq->normal_wq); 86008a9ff32SQu Wenruo kfree(wq); 86108a9ff32SQu Wenruo } 86208a9ff32SQu Wenruo 86308a9ff32SQu Wenruo void btrfs_workqueue_set_max(struct btrfs_workqueue_struct *wq, int max) 86408a9ff32SQu Wenruo { 86508a9ff32SQu Wenruo workqueue_set_max_active(wq->normal_wq, max); 86608a9ff32SQu Wenruo } 867