18b712842SChris Mason /* 28b712842SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 308a9ff32SQu Wenruo * Copyright (C) 2014 Fujitsu. All rights reserved. 48b712842SChris Mason * 58b712842SChris Mason * This program is free software; you can redistribute it and/or 68b712842SChris Mason * modify it under the terms of the GNU General Public 78b712842SChris Mason * License v2 as published by the Free Software Foundation. 88b712842SChris Mason * 98b712842SChris Mason * This program is distributed in the hope that it will be useful, 108b712842SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 118b712842SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 128b712842SChris Mason * General Public License for more details. 138b712842SChris Mason * 148b712842SChris Mason * You should have received a copy of the GNU General Public 158b712842SChris Mason * License along with this program; if not, write to the 168b712842SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 178b712842SChris Mason * Boston, MA 021110-1307, USA. 188b712842SChris Mason */ 198b712842SChris Mason 208b712842SChris Mason #include <linux/kthread.h> 215a0e3ad6STejun Heo #include <linux/slab.h> 228b712842SChris Mason #include <linux/list.h> 238b712842SChris Mason #include <linux/spinlock.h> 248b712842SChris Mason #include <linux/freezer.h> 258b712842SChris Mason #include "async-thread.h" 2652483bc2SQu Wenruo #include "ctree.h" 278b712842SChris Mason 28a046e9c8SQu Wenruo #define WORK_DONE_BIT 0 29a046e9c8SQu Wenruo #define WORK_ORDER_DONE_BIT 1 30a046e9c8SQu Wenruo #define WORK_HIGH_PRIO_BIT 2 314a69a410SChris Mason 320bd9289cSQu Wenruo #define NO_THRESHOLD (-1) 330bd9289cSQu Wenruo #define DFT_THRESHOLD (32) 340bd9289cSQu Wenruo 35d458b054SQu Wenruo struct __btrfs_workqueue { 3608a9ff32SQu Wenruo struct workqueue_struct *normal_wq; 3708a9ff32SQu Wenruo /* List head pointing to ordered work list */ 3808a9ff32SQu Wenruo struct list_head ordered_list; 3908a9ff32SQu Wenruo 4008a9ff32SQu Wenruo /* Spinlock for ordered_list */ 4108a9ff32SQu Wenruo spinlock_t list_lock; 420bd9289cSQu Wenruo 430bd9289cSQu Wenruo /* Thresholding related variants */ 440bd9289cSQu Wenruo atomic_t pending; 45c6dd6ea5SQu Wenruo 46c6dd6ea5SQu Wenruo /* Up limit of concurrency workers */ 47c6dd6ea5SQu Wenruo int limit_active; 48c6dd6ea5SQu Wenruo 49c6dd6ea5SQu Wenruo /* Current number of concurrency workers */ 50c6dd6ea5SQu Wenruo int current_active; 51c6dd6ea5SQu Wenruo 52c6dd6ea5SQu Wenruo /* Threshold to change current_active */ 530bd9289cSQu Wenruo int thresh; 540bd9289cSQu Wenruo unsigned int count; 550bd9289cSQu Wenruo spinlock_t thres_lock; 5608a9ff32SQu Wenruo }; 5708a9ff32SQu Wenruo 58d458b054SQu Wenruo struct btrfs_workqueue { 59d458b054SQu Wenruo struct __btrfs_workqueue *normal; 60d458b054SQu Wenruo struct __btrfs_workqueue *high; 611ca08976SQu Wenruo }; 621ca08976SQu Wenruo 639e0af237SLiu Bo static void normal_work_helper(struct btrfs_work *work); 649e0af237SLiu Bo 659e0af237SLiu Bo #define BTRFS_WORK_HELPER(name) \ 669e0af237SLiu Bo void btrfs_##name(struct work_struct *arg) \ 679e0af237SLiu Bo { \ 689e0af237SLiu Bo struct btrfs_work *work = container_of(arg, struct btrfs_work, \ 699e0af237SLiu Bo normal_work); \ 709e0af237SLiu Bo normal_work_helper(work); \ 719e0af237SLiu Bo } 729e0af237SLiu Bo 739e0af237SLiu Bo BTRFS_WORK_HELPER(worker_helper); 749e0af237SLiu Bo BTRFS_WORK_HELPER(delalloc_helper); 759e0af237SLiu Bo BTRFS_WORK_HELPER(flush_delalloc_helper); 769e0af237SLiu Bo BTRFS_WORK_HELPER(cache_helper); 779e0af237SLiu Bo BTRFS_WORK_HELPER(submit_helper); 789e0af237SLiu Bo BTRFS_WORK_HELPER(fixup_helper); 799e0af237SLiu Bo BTRFS_WORK_HELPER(endio_helper); 809e0af237SLiu Bo BTRFS_WORK_HELPER(endio_meta_helper); 819e0af237SLiu Bo BTRFS_WORK_HELPER(endio_meta_write_helper); 829e0af237SLiu Bo BTRFS_WORK_HELPER(endio_raid56_helper); 838b110e39SMiao Xie BTRFS_WORK_HELPER(endio_repair_helper); 849e0af237SLiu Bo BTRFS_WORK_HELPER(rmw_helper); 859e0af237SLiu Bo BTRFS_WORK_HELPER(endio_write_helper); 869e0af237SLiu Bo BTRFS_WORK_HELPER(freespace_write_helper); 879e0af237SLiu Bo BTRFS_WORK_HELPER(delayed_meta_helper); 889e0af237SLiu Bo BTRFS_WORK_HELPER(readahead_helper); 899e0af237SLiu Bo BTRFS_WORK_HELPER(qgroup_rescan_helper); 909e0af237SLiu Bo BTRFS_WORK_HELPER(extent_refs_helper); 919e0af237SLiu Bo BTRFS_WORK_HELPER(scrub_helper); 929e0af237SLiu Bo BTRFS_WORK_HELPER(scrubwrc_helper); 939e0af237SLiu Bo BTRFS_WORK_HELPER(scrubnc_helper); 9420b2e302SZhao Lei BTRFS_WORK_HELPER(scrubparity_helper); 959e0af237SLiu Bo 969e0af237SLiu Bo static struct __btrfs_workqueue * 97c6dd6ea5SQu Wenruo __btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active, 98c3a46891SQu Wenruo int thresh) 9908a9ff32SQu Wenruo { 100d458b054SQu Wenruo struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 10108a9ff32SQu Wenruo 1025d99a998SDavid Sterba if (!ret) 10308a9ff32SQu Wenruo return NULL; 10408a9ff32SQu Wenruo 105c6dd6ea5SQu Wenruo ret->limit_active = limit_active; 1060bd9289cSQu Wenruo atomic_set(&ret->pending, 0); 1070bd9289cSQu Wenruo if (thresh == 0) 1080bd9289cSQu Wenruo thresh = DFT_THRESHOLD; 1090bd9289cSQu Wenruo /* For low threshold, disabling threshold is a better choice */ 1100bd9289cSQu Wenruo if (thresh < DFT_THRESHOLD) { 111c6dd6ea5SQu Wenruo ret->current_active = limit_active; 1120bd9289cSQu Wenruo ret->thresh = NO_THRESHOLD; 1130bd9289cSQu Wenruo } else { 114c6dd6ea5SQu Wenruo /* 115c6dd6ea5SQu Wenruo * For threshold-able wq, let its concurrency grow on demand. 116c6dd6ea5SQu Wenruo * Use minimal max_active at alloc time to reduce resource 117c6dd6ea5SQu Wenruo * usage. 118c6dd6ea5SQu Wenruo */ 119c6dd6ea5SQu Wenruo ret->current_active = 1; 1200bd9289cSQu Wenruo ret->thresh = thresh; 1210bd9289cSQu Wenruo } 1220bd9289cSQu Wenruo 1231ca08976SQu Wenruo if (flags & WQ_HIGHPRI) 1241ca08976SQu Wenruo ret->normal_wq = alloc_workqueue("%s-%s-high", flags, 125c6dd6ea5SQu Wenruo ret->current_active, "btrfs", 126c6dd6ea5SQu Wenruo name); 1271ca08976SQu Wenruo else 1281ca08976SQu Wenruo ret->normal_wq = alloc_workqueue("%s-%s", flags, 129c6dd6ea5SQu Wenruo ret->current_active, "btrfs", 1300bd9289cSQu Wenruo name); 1315d99a998SDavid Sterba if (!ret->normal_wq) { 13208a9ff32SQu Wenruo kfree(ret); 13308a9ff32SQu Wenruo return NULL; 13408a9ff32SQu Wenruo } 13508a9ff32SQu Wenruo 13608a9ff32SQu Wenruo INIT_LIST_HEAD(&ret->ordered_list); 13708a9ff32SQu Wenruo spin_lock_init(&ret->list_lock); 1380bd9289cSQu Wenruo spin_lock_init(&ret->thres_lock); 139c3a46891SQu Wenruo trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); 14008a9ff32SQu Wenruo return ret; 14108a9ff32SQu Wenruo } 14208a9ff32SQu Wenruo 1431ca08976SQu Wenruo static inline void 144d458b054SQu Wenruo __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); 1451ca08976SQu Wenruo 146c3a46891SQu Wenruo struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 1476f011058SDavid Sterba unsigned int flags, 148c6dd6ea5SQu Wenruo int limit_active, 1490bd9289cSQu Wenruo int thresh) 1501ca08976SQu Wenruo { 151d458b054SQu Wenruo struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 1521ca08976SQu Wenruo 1535d99a998SDavid Sterba if (!ret) 1541ca08976SQu Wenruo return NULL; 1551ca08976SQu Wenruo 1561ca08976SQu Wenruo ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, 157c6dd6ea5SQu Wenruo limit_active, thresh); 1585d99a998SDavid Sterba if (!ret->normal) { 1591ca08976SQu Wenruo kfree(ret); 1601ca08976SQu Wenruo return NULL; 1611ca08976SQu Wenruo } 1621ca08976SQu Wenruo 1631ca08976SQu Wenruo if (flags & WQ_HIGHPRI) { 164c6dd6ea5SQu Wenruo ret->high = __btrfs_alloc_workqueue(name, flags, limit_active, 1650bd9289cSQu Wenruo thresh); 1665d99a998SDavid Sterba if (!ret->high) { 1671ca08976SQu Wenruo __btrfs_destroy_workqueue(ret->normal); 1681ca08976SQu Wenruo kfree(ret); 1691ca08976SQu Wenruo return NULL; 1701ca08976SQu Wenruo } 1711ca08976SQu Wenruo } 1721ca08976SQu Wenruo return ret; 1731ca08976SQu Wenruo } 1741ca08976SQu Wenruo 1750bd9289cSQu Wenruo /* 1760bd9289cSQu Wenruo * Hook for threshold which will be called in btrfs_queue_work. 1770bd9289cSQu Wenruo * This hook WILL be called in IRQ handler context, 1780bd9289cSQu Wenruo * so workqueue_set_max_active MUST NOT be called in this hook 1790bd9289cSQu Wenruo */ 180d458b054SQu Wenruo static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) 1810bd9289cSQu Wenruo { 1820bd9289cSQu Wenruo if (wq->thresh == NO_THRESHOLD) 1830bd9289cSQu Wenruo return; 1840bd9289cSQu Wenruo atomic_inc(&wq->pending); 1850bd9289cSQu Wenruo } 1860bd9289cSQu Wenruo 1870bd9289cSQu Wenruo /* 1880bd9289cSQu Wenruo * Hook for threshold which will be called before executing the work, 1890bd9289cSQu Wenruo * This hook is called in kthread content. 1900bd9289cSQu Wenruo * So workqueue_set_max_active is called here. 1910bd9289cSQu Wenruo */ 192d458b054SQu Wenruo static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) 1930bd9289cSQu Wenruo { 194c6dd6ea5SQu Wenruo int new_current_active; 1950bd9289cSQu Wenruo long pending; 1960bd9289cSQu Wenruo int need_change = 0; 1970bd9289cSQu Wenruo 1980bd9289cSQu Wenruo if (wq->thresh == NO_THRESHOLD) 1990bd9289cSQu Wenruo return; 2000bd9289cSQu Wenruo 2010bd9289cSQu Wenruo atomic_dec(&wq->pending); 2020bd9289cSQu Wenruo spin_lock(&wq->thres_lock); 2030bd9289cSQu Wenruo /* 2040bd9289cSQu Wenruo * Use wq->count to limit the calling frequency of 2050bd9289cSQu Wenruo * workqueue_set_max_active. 2060bd9289cSQu Wenruo */ 2070bd9289cSQu Wenruo wq->count++; 2080bd9289cSQu Wenruo wq->count %= (wq->thresh / 4); 2090bd9289cSQu Wenruo if (!wq->count) 2100bd9289cSQu Wenruo goto out; 211c6dd6ea5SQu Wenruo new_current_active = wq->current_active; 2120bd9289cSQu Wenruo 2130bd9289cSQu Wenruo /* 2140bd9289cSQu Wenruo * pending may be changed later, but it's OK since we really 2150bd9289cSQu Wenruo * don't need it so accurate to calculate new_max_active. 2160bd9289cSQu Wenruo */ 2170bd9289cSQu Wenruo pending = atomic_read(&wq->pending); 2180bd9289cSQu Wenruo if (pending > wq->thresh) 219c6dd6ea5SQu Wenruo new_current_active++; 2200bd9289cSQu Wenruo if (pending < wq->thresh / 2) 221c6dd6ea5SQu Wenruo new_current_active--; 222c6dd6ea5SQu Wenruo new_current_active = clamp_val(new_current_active, 1, wq->limit_active); 223c6dd6ea5SQu Wenruo if (new_current_active != wq->current_active) { 2240bd9289cSQu Wenruo need_change = 1; 225c6dd6ea5SQu Wenruo wq->current_active = new_current_active; 2260bd9289cSQu Wenruo } 2270bd9289cSQu Wenruo out: 2280bd9289cSQu Wenruo spin_unlock(&wq->thres_lock); 2290bd9289cSQu Wenruo 2300bd9289cSQu Wenruo if (need_change) { 231c6dd6ea5SQu Wenruo workqueue_set_max_active(wq->normal_wq, wq->current_active); 2320bd9289cSQu Wenruo } 2330bd9289cSQu Wenruo } 2340bd9289cSQu Wenruo 235d458b054SQu Wenruo static void run_ordered_work(struct __btrfs_workqueue *wq) 23608a9ff32SQu Wenruo { 23708a9ff32SQu Wenruo struct list_head *list = &wq->ordered_list; 238d458b054SQu Wenruo struct btrfs_work *work; 23908a9ff32SQu Wenruo spinlock_t *lock = &wq->list_lock; 24008a9ff32SQu Wenruo unsigned long flags; 24108a9ff32SQu Wenruo 24208a9ff32SQu Wenruo while (1) { 24308a9ff32SQu Wenruo spin_lock_irqsave(lock, flags); 24408a9ff32SQu Wenruo if (list_empty(list)) 24508a9ff32SQu Wenruo break; 246d458b054SQu Wenruo work = list_entry(list->next, struct btrfs_work, 24708a9ff32SQu Wenruo ordered_list); 24808a9ff32SQu Wenruo if (!test_bit(WORK_DONE_BIT, &work->flags)) 24908a9ff32SQu Wenruo break; 25008a9ff32SQu Wenruo 25108a9ff32SQu Wenruo /* 25208a9ff32SQu Wenruo * we are going to call the ordered done function, but 25308a9ff32SQu Wenruo * we leave the work item on the list as a barrier so 25408a9ff32SQu Wenruo * that later work items that are done don't have their 25508a9ff32SQu Wenruo * functions called before this one returns 25608a9ff32SQu Wenruo */ 25708a9ff32SQu Wenruo if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) 25808a9ff32SQu Wenruo break; 25952483bc2SQu Wenruo trace_btrfs_ordered_sched(work); 26008a9ff32SQu Wenruo spin_unlock_irqrestore(lock, flags); 26108a9ff32SQu Wenruo work->ordered_func(work); 26208a9ff32SQu Wenruo 26308a9ff32SQu Wenruo /* now take the lock again and drop our item from the list */ 26408a9ff32SQu Wenruo spin_lock_irqsave(lock, flags); 26508a9ff32SQu Wenruo list_del(&work->ordered_list); 26608a9ff32SQu Wenruo spin_unlock_irqrestore(lock, flags); 26708a9ff32SQu Wenruo 26808a9ff32SQu Wenruo /* 26908a9ff32SQu Wenruo * we don't want to call the ordered free functions 27008a9ff32SQu Wenruo * with the lock held though 27108a9ff32SQu Wenruo */ 27208a9ff32SQu Wenruo work->ordered_free(work); 27352483bc2SQu Wenruo trace_btrfs_all_work_done(work); 27408a9ff32SQu Wenruo } 27508a9ff32SQu Wenruo spin_unlock_irqrestore(lock, flags); 27608a9ff32SQu Wenruo } 27708a9ff32SQu Wenruo 2789e0af237SLiu Bo static void normal_work_helper(struct btrfs_work *work) 27908a9ff32SQu Wenruo { 280d458b054SQu Wenruo struct __btrfs_workqueue *wq; 28108a9ff32SQu Wenruo int need_order = 0; 28208a9ff32SQu Wenruo 28308a9ff32SQu Wenruo /* 28408a9ff32SQu Wenruo * We should not touch things inside work in the following cases: 28508a9ff32SQu Wenruo * 1) after work->func() if it has no ordered_free 28608a9ff32SQu Wenruo * Since the struct is freed in work->func(). 28708a9ff32SQu Wenruo * 2) after setting WORK_DONE_BIT 28808a9ff32SQu Wenruo * The work may be freed in other threads almost instantly. 28908a9ff32SQu Wenruo * So we save the needed things here. 29008a9ff32SQu Wenruo */ 29108a9ff32SQu Wenruo if (work->ordered_func) 29208a9ff32SQu Wenruo need_order = 1; 29308a9ff32SQu Wenruo wq = work->wq; 29408a9ff32SQu Wenruo 29552483bc2SQu Wenruo trace_btrfs_work_sched(work); 2960bd9289cSQu Wenruo thresh_exec_hook(wq); 29708a9ff32SQu Wenruo work->func(work); 29808a9ff32SQu Wenruo if (need_order) { 29908a9ff32SQu Wenruo set_bit(WORK_DONE_BIT, &work->flags); 30008a9ff32SQu Wenruo run_ordered_work(wq); 30108a9ff32SQu Wenruo } 30252483bc2SQu Wenruo if (!need_order) 30352483bc2SQu Wenruo trace_btrfs_all_work_done(work); 30408a9ff32SQu Wenruo } 30508a9ff32SQu Wenruo 3069e0af237SLiu Bo void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, 3076db8914fSQu Wenruo btrfs_func_t func, 3086db8914fSQu Wenruo btrfs_func_t ordered_func, 3096db8914fSQu Wenruo btrfs_func_t ordered_free) 31008a9ff32SQu Wenruo { 31108a9ff32SQu Wenruo work->func = func; 31208a9ff32SQu Wenruo work->ordered_func = ordered_func; 31308a9ff32SQu Wenruo work->ordered_free = ordered_free; 3149e0af237SLiu Bo INIT_WORK(&work->normal_work, uniq_func); 31508a9ff32SQu Wenruo INIT_LIST_HEAD(&work->ordered_list); 31608a9ff32SQu Wenruo work->flags = 0; 31708a9ff32SQu Wenruo } 31808a9ff32SQu Wenruo 319d458b054SQu Wenruo static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, 320d458b054SQu Wenruo struct btrfs_work *work) 32108a9ff32SQu Wenruo { 32208a9ff32SQu Wenruo unsigned long flags; 32308a9ff32SQu Wenruo 32408a9ff32SQu Wenruo work->wq = wq; 3250bd9289cSQu Wenruo thresh_queue_hook(wq); 32608a9ff32SQu Wenruo if (work->ordered_func) { 32708a9ff32SQu Wenruo spin_lock_irqsave(&wq->list_lock, flags); 32808a9ff32SQu Wenruo list_add_tail(&work->ordered_list, &wq->ordered_list); 32908a9ff32SQu Wenruo spin_unlock_irqrestore(&wq->list_lock, flags); 33008a9ff32SQu Wenruo } 33108a9ff32SQu Wenruo queue_work(wq->normal_wq, &work->normal_work); 33252483bc2SQu Wenruo trace_btrfs_work_queued(work); 33308a9ff32SQu Wenruo } 33408a9ff32SQu Wenruo 335d458b054SQu Wenruo void btrfs_queue_work(struct btrfs_workqueue *wq, 336d458b054SQu Wenruo struct btrfs_work *work) 3371ca08976SQu Wenruo { 338d458b054SQu Wenruo struct __btrfs_workqueue *dest_wq; 3391ca08976SQu Wenruo 3401ca08976SQu Wenruo if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) 3411ca08976SQu Wenruo dest_wq = wq->high; 3421ca08976SQu Wenruo else 3431ca08976SQu Wenruo dest_wq = wq->normal; 3441ca08976SQu Wenruo __btrfs_queue_work(dest_wq, work); 3451ca08976SQu Wenruo } 3461ca08976SQu Wenruo 3471ca08976SQu Wenruo static inline void 348d458b054SQu Wenruo __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) 34908a9ff32SQu Wenruo { 35008a9ff32SQu Wenruo destroy_workqueue(wq->normal_wq); 351c3a46891SQu Wenruo trace_btrfs_workqueue_destroy(wq); 35208a9ff32SQu Wenruo kfree(wq); 35308a9ff32SQu Wenruo } 35408a9ff32SQu Wenruo 355d458b054SQu Wenruo void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) 3561ca08976SQu Wenruo { 3571ca08976SQu Wenruo if (!wq) 3581ca08976SQu Wenruo return; 3591ca08976SQu Wenruo if (wq->high) 3601ca08976SQu Wenruo __btrfs_destroy_workqueue(wq->high); 3611ca08976SQu Wenruo __btrfs_destroy_workqueue(wq->normal); 362ef66af10SFilipe Manana kfree(wq); 3631ca08976SQu Wenruo } 3641ca08976SQu Wenruo 365c6dd6ea5SQu Wenruo void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active) 36608a9ff32SQu Wenruo { 367800ee224SSergei Trofimovich if (!wq) 368800ee224SSergei Trofimovich return; 369c6dd6ea5SQu Wenruo wq->normal->limit_active = limit_active; 3701ca08976SQu Wenruo if (wq->high) 371c6dd6ea5SQu Wenruo wq->high->limit_active = limit_active; 3721ca08976SQu Wenruo } 3731ca08976SQu Wenruo 374d458b054SQu Wenruo void btrfs_set_work_high_priority(struct btrfs_work *work) 3751ca08976SQu Wenruo { 3761ca08976SQu Wenruo set_bit(WORK_HIGH_PRIO_BIT, &work->flags); 37708a9ff32SQu Wenruo } 378