Lines Matching +full:charging +full:- +full:algorithm

1 // SPDX-License-Identifier: GPL-2.0-only
3 * fs/fs-writeback.c
14 * Additions for address_space-based writeback
28 #include <linux/backing-dev.h>
37 #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
90 set_bit(WB_has_dirty_io, &wb->state); in wb_io_lists_populated()
91 WARN_ON_ONCE(!wb->avg_write_bandwidth); in wb_io_lists_populated()
92 atomic_long_add(wb->avg_write_bandwidth, in wb_io_lists_populated()
93 &wb->bdi->tot_write_bandwidth); in wb_io_lists_populated()
100 if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) && in wb_io_lists_depopulated()
101 list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) { in wb_io_lists_depopulated()
102 clear_bit(WB_has_dirty_io, &wb->state); in wb_io_lists_depopulated()
103 WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth, in wb_io_lists_depopulated()
104 &wb->bdi->tot_write_bandwidth) < 0); in wb_io_lists_depopulated()
109 * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
112 * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
114 * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
122 assert_spin_locked(&wb->list_lock); in inode_io_list_move_locked()
123 assert_spin_locked(&inode->i_lock); in inode_io_list_move_locked()
124 WARN_ON_ONCE(inode->i_state & I_FREEING); in inode_io_list_move_locked()
126 list_move(&inode->i_io_list, head); in inode_io_list_move_locked()
129 if (head != &wb->b_dirty_time) in inode_io_list_move_locked()
138 spin_lock_irq(&wb->work_lock); in wb_wakeup()
139 if (test_bit(WB_registered, &wb->state)) in wb_wakeup()
140 mod_delayed_work(bdi_wq, &wb->dwork, 0); in wb_wakeup()
141 spin_unlock_irq(&wb->work_lock); in wb_wakeup()
147 struct wb_completion *done = work->done; in finish_writeback_work()
149 if (work->auto_free) in finish_writeback_work()
152 wait_queue_head_t *waitq = done->waitq; in finish_writeback_work()
155 if (atomic_dec_and_test(&done->cnt)) in finish_writeback_work()
165 if (work->done) in wb_queue_work()
166 atomic_inc(&work->done->cnt); in wb_queue_work()
168 spin_lock_irq(&wb->work_lock); in wb_queue_work()
170 if (test_bit(WB_registered, &wb->state)) { in wb_queue_work()
171 list_add_tail(&work->list, &wb->work_list); in wb_queue_work()
172 mod_delayed_work(bdi_wq, &wb->dwork, 0); in wb_queue_work()
176 spin_unlock_irq(&wb->work_lock); in wb_queue_work()
180 * wb_wait_for_completion - wait for completion of bdi_writeback_works
183 * Wait for one or more work items issued to @bdi with their ->done field
191 atomic_dec(&done->cnt); /* put down the initial count */ in wb_wait_for_completion()
192 wait_event(*done->waitq, !atomic_read(&done->cnt)); in wb_wait_for_completion()
209 * avoiding too aggressive flip-flops from occasional foreign writes.
221 #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */
234 #define WB_MAX_INODES_PER_ISW ((1024UL - sizeof(struct inode_switch_wbs_context)) \
260 wb = &bdi->wb; in __inode_attach_wb()
266 if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) in __inode_attach_wb()
272 * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
282 assert_spin_locked(&wb->list_lock); in inode_cgwb_move_to_attached()
283 assert_spin_locked(&inode->i_lock); in inode_cgwb_move_to_attached()
284 WARN_ON_ONCE(inode->i_state & I_FREEING); in inode_cgwb_move_to_attached()
286 inode->i_state &= ~I_SYNC_QUEUED; in inode_cgwb_move_to_attached()
287 if (wb != &wb->bdi->wb) in inode_cgwb_move_to_attached()
288 list_move(&inode->i_io_list, &wb->b_attached); in inode_cgwb_move_to_attached()
290 list_del_init(&inode->i_io_list); in inode_cgwb_move_to_attached()
295 * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
298 * Returns @inode's wb with its list_lock held. @inode->i_lock must be
304 __releases(&inode->i_lock) in locked_inode_to_wb_and_lock_list()
305 __acquires(&wb->list_lock) in locked_inode_to_wb_and_lock_list()
312 * @inode->i_lock and @wb->list_lock but list_lock nests in locked_inode_to_wb_and_lock_list()
317 spin_unlock(&inode->i_lock); in locked_inode_to_wb_and_lock_list()
318 spin_lock(&wb->list_lock); in locked_inode_to_wb_and_lock_list()
321 if (likely(wb == inode->i_wb)) { in locked_inode_to_wb_and_lock_list()
326 spin_unlock(&wb->list_lock); in locked_inode_to_wb_and_lock_list()
329 spin_lock(&inode->i_lock); in locked_inode_to_wb_and_lock_list()
334 * inode_to_wb_and_lock_list - determine an inode's wb and lock it
337 * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
341 __acquires(&wb->list_lock) in inode_to_wb_and_lock_list()
343 spin_lock(&inode->i_lock); in inode_to_wb_and_lock_list()
354 * the first part, all inode pointers are placed into a NULL-terminated
356 * an inode could be left in a non-consistent state.
364 down_write(&bdi->wb_switch_rwsem); in bdi_down_write_wb_switch_rwsem()
369 up_write(&bdi->wb_switch_rwsem); in bdi_up_write_wb_switch_rwsem()
376 struct address_space *mapping = inode->i_mapping; in inode_do_switch_wbs()
377 XA_STATE(xas, &mapping->i_pages, 0); in inode_do_switch_wbs()
381 spin_lock(&inode->i_lock); in inode_do_switch_wbs()
382 xa_lock_irq(&mapping->i_pages); in inode_do_switch_wbs()
386 * path owns the inode and we shouldn't modify ->i_io_list. in inode_do_switch_wbs()
388 if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE))) in inode_do_switch_wbs()
401 wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr); in inode_do_switch_wbs()
410 wb_stat_mod(old_wb, WB_WRITEBACK, -nr); in inode_do_switch_wbs()
415 atomic_dec(&old_wb->writeback_inodes); in inode_do_switch_wbs()
416 atomic_inc(&new_wb->writeback_inodes); in inode_do_switch_wbs()
424 * ->b_dirty which is always correct including from ->b_dirty_time. in inode_do_switch_wbs()
425 * The transfer preserves @inode->dirtied_when ordering. If the @inode in inode_do_switch_wbs()
429 if (!list_empty(&inode->i_io_list)) { in inode_do_switch_wbs()
430 inode->i_wb = new_wb; in inode_do_switch_wbs()
432 if (inode->i_state & I_DIRTY_ALL) { in inode_do_switch_wbs()
435 list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) in inode_do_switch_wbs()
436 if (time_after_eq(inode->dirtied_when, in inode_do_switch_wbs()
437 pos->dirtied_when)) in inode_do_switch_wbs()
440 pos->i_io_list.prev); in inode_do_switch_wbs()
445 inode->i_wb = new_wb; in inode_do_switch_wbs()
448 /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */ in inode_do_switch_wbs()
449 inode->i_wb_frn_winner = 0; in inode_do_switch_wbs()
450 inode->i_wb_frn_avg_time = 0; in inode_do_switch_wbs()
451 inode->i_wb_frn_history = 0; in inode_do_switch_wbs()
458 smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); in inode_do_switch_wbs()
460 xa_unlock_irq(&mapping->i_pages); in inode_do_switch_wbs()
461 spin_unlock(&inode->i_lock); in inode_do_switch_wbs()
470 struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]); in inode_switch_wbs_work_fn()
471 struct bdi_writeback *old_wb = isw->inodes[0]->i_wb; in inode_switch_wbs_work_fn()
472 struct bdi_writeback *new_wb = isw->new_wb; in inode_switch_wbs_work_fn()
480 down_read(&bdi->wb_switch_rwsem); in inode_switch_wbs_work_fn()
488 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock in inode_switch_wbs_work_fn()
493 spin_lock(&old_wb->list_lock); in inode_switch_wbs_work_fn()
494 spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING); in inode_switch_wbs_work_fn()
496 spin_lock(&new_wb->list_lock); in inode_switch_wbs_work_fn()
497 spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); in inode_switch_wbs_work_fn()
500 for (inodep = isw->inodes; *inodep; inodep++) { in inode_switch_wbs_work_fn()
501 WARN_ON_ONCE((*inodep)->i_wb != old_wb); in inode_switch_wbs_work_fn()
506 spin_unlock(&new_wb->list_lock); in inode_switch_wbs_work_fn()
507 spin_unlock(&old_wb->list_lock); in inode_switch_wbs_work_fn()
509 up_read(&bdi->wb_switch_rwsem); in inode_switch_wbs_work_fn()
516 for (inodep = isw->inodes; *inodep; inodep++) in inode_switch_wbs_work_fn()
538 spin_lock(&inode->i_lock); in inode_prepare_wbs_switch()
539 if (!(inode->i_sb->s_flags & SB_ACTIVE) || in inode_prepare_wbs_switch()
540 inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) || in inode_prepare_wbs_switch()
542 spin_unlock(&inode->i_lock); in inode_prepare_wbs_switch()
545 inode->i_state |= I_WB_SWITCH; in inode_prepare_wbs_switch()
547 spin_unlock(&inode->i_lock); in inode_prepare_wbs_switch()
553 * inode_switch_wbs - change the wb association of an inode
567 if (inode->i_state & I_WB_SWITCH) in inode_switch_wbs()
589 isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); in inode_switch_wbs()
591 if (!isw->new_wb) in inode_switch_wbs()
594 if (!inode_prepare_wbs_switch(inode, isw->new_wb)) in inode_switch_wbs()
597 isw->inodes[0] = inode; in inode_switch_wbs()
605 INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); in inode_switch_wbs()
606 queue_rcu_work(isw_wq, &isw->work); in inode_switch_wbs()
611 if (isw->new_wb) in inode_switch_wbs()
612 wb_put(isw->new_wb); in inode_switch_wbs()
622 if (!inode_prepare_wbs_switch(inode, isw->new_wb)) in isw_prepare_wbs_switch()
625 isw->inodes[*nr] = inode; in isw_prepare_wbs_switch()
628 if (*nr >= WB_MAX_INODES_PER_ISW - 1) in isw_prepare_wbs_switch()
635 * cleanup_offline_cgwb - detach associated inodes
656 for (memcg_css = wb->memcg_css->parent; memcg_css; in cleanup_offline_cgwb()
657 memcg_css = memcg_css->parent) { in cleanup_offline_cgwb()
658 isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL); in cleanup_offline_cgwb()
659 if (isw->new_wb) in cleanup_offline_cgwb()
662 if (unlikely(!isw->new_wb)) in cleanup_offline_cgwb()
663 isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */ in cleanup_offline_cgwb()
666 spin_lock(&wb->list_lock); in cleanup_offline_cgwb()
675 restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr); in cleanup_offline_cgwb()
677 restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr); in cleanup_offline_cgwb()
678 spin_unlock(&wb->list_lock); in cleanup_offline_cgwb()
683 wb_put(isw->new_wb); in cleanup_offline_cgwb()
694 INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); in cleanup_offline_cgwb()
695 queue_rcu_work(isw_wq, &isw->work); in cleanup_offline_cgwb()
701 * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
714 spin_unlock(&inode->i_lock); in wbc_attach_and_unlock_inode()
718 wbc->wb = inode_to_wb(inode); in wbc_attach_and_unlock_inode()
719 wbc->inode = inode; in wbc_attach_and_unlock_inode()
721 wbc->wb_id = wbc->wb->memcg_css->id; in wbc_attach_and_unlock_inode()
722 wbc->wb_lcand_id = inode->i_wb_frn_winner; in wbc_attach_and_unlock_inode()
723 wbc->wb_tcand_id = 0; in wbc_attach_and_unlock_inode()
724 wbc->wb_bytes = 0; in wbc_attach_and_unlock_inode()
725 wbc->wb_lcand_bytes = 0; in wbc_attach_and_unlock_inode()
726 wbc->wb_tcand_bytes = 0; in wbc_attach_and_unlock_inode()
728 wb_get(wbc->wb); in wbc_attach_and_unlock_inode()
729 spin_unlock(&inode->i_lock); in wbc_attach_and_unlock_inode()
738 if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) in wbc_attach_and_unlock_inode()
739 inode_switch_wbs(inode, wbc->wb_id); in wbc_attach_and_unlock_inode()
744 * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
751 * memcg only tracks page ownership on first-use basis severely confining
753 * per-inode. While the support for concurrent write sharing of an inode
756 * charging only by first-use can too readily lead to grossly incorrect
767 * using Boyer-Moore majority vote algorithm. In addition to the byte
771 * candidate). Keeping track of the historical winner helps the algorithm
772 * to semi-reliably detect the most active writer even when it's not the
777 * inode->i_wb_frn_history. If the amount of recorded foreign IO time is
782 struct bdi_writeback *wb = wbc->wb; in wbc_detach_inode()
783 struct inode *inode = wbc->inode; in wbc_detach_inode()
791 history = inode->i_wb_frn_history; in wbc_detach_inode()
792 avg_time = inode->i_wb_frn_avg_time; in wbc_detach_inode()
795 if (wbc->wb_bytes >= wbc->wb_lcand_bytes && in wbc_detach_inode()
796 wbc->wb_bytes >= wbc->wb_tcand_bytes) { in wbc_detach_inode()
797 max_id = wbc->wb_id; in wbc_detach_inode()
798 max_bytes = wbc->wb_bytes; in wbc_detach_inode()
799 } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) { in wbc_detach_inode()
800 max_id = wbc->wb_lcand_id; in wbc_detach_inode()
801 max_bytes = wbc->wb_lcand_bytes; in wbc_detach_inode()
803 max_id = wbc->wb_tcand_id; in wbc_detach_inode()
804 max_bytes = wbc->wb_tcand_bytes; in wbc_detach_inode()
811 * deciding whether to switch or not. This is to prevent one-off in wbc_detach_inode()
815 wb->avg_write_bandwidth); in wbc_detach_inode()
817 avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) - in wbc_detach_inode()
836 if (wbc->wb_id != max_id) in wbc_detach_inode()
837 history |= (1U << slots) - 1; in wbc_detach_inode()
857 inode->i_wb_frn_winner = max_id; in wbc_detach_inode()
858 inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX); in wbc_detach_inode()
859 inode->i_wb_frn_history = history; in wbc_detach_inode()
861 wb_put(wbc->wb); in wbc_detach_inode()
862 wbc->wb = NULL; in wbc_detach_inode()
867 * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
889 if (!wbc->wb || wbc->no_cgroup_owner) in wbc_account_cgroup_owner()
895 if (!(css->flags & CSS_ONLINE)) in wbc_account_cgroup_owner()
898 id = css->id; in wbc_account_cgroup_owner()
900 if (id == wbc->wb_id) { in wbc_account_cgroup_owner()
901 wbc->wb_bytes += bytes; in wbc_account_cgroup_owner()
905 if (id == wbc->wb_lcand_id) in wbc_account_cgroup_owner()
906 wbc->wb_lcand_bytes += bytes; in wbc_account_cgroup_owner()
908 /* Boyer-Moore majority vote algorithm */ in wbc_account_cgroup_owner()
909 if (!wbc->wb_tcand_bytes) in wbc_account_cgroup_owner()
910 wbc->wb_tcand_id = id; in wbc_account_cgroup_owner()
911 if (id == wbc->wb_tcand_id) in wbc_account_cgroup_owner()
912 wbc->wb_tcand_bytes += bytes; in wbc_account_cgroup_owner()
914 wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); in wbc_account_cgroup_owner()
919 * wb_split_bdi_pages - split nr_pages to write according to bandwidth
925 * @wb->bdi.
929 unsigned long this_bw = wb->avg_write_bandwidth; in wb_split_bdi_pages()
930 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); in wb_split_bdi_pages()
947 * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
953 * have dirty inodes. If @base_work->nr_page isn't %LONG_MAX, it's
962 struct bdi_writeback *wb = list_entry(&bdi->wb_list, in bdi_split_work_to_wbs()
968 list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) { in bdi_split_work_to_wbs()
981 (base_work->sync_mode == WB_SYNC_NONE || in bdi_split_work_to_wbs()
982 list_empty(&wb->b_dirty_time))) in bdi_split_work_to_wbs()
987 nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages); in bdi_split_work_to_wbs()
992 work->nr_pages = nr_pages; in bdi_split_work_to_wbs()
993 work->auto_free = 1; in bdi_split_work_to_wbs()
1001 * Pin @wb so that it stays on @bdi->wb_list. This allows in bdi_split_work_to_wbs()
1008 /* alloc failed, execute synchronously using on-stack fallback */ in bdi_split_work_to_wbs()
1011 work->nr_pages = nr_pages; in bdi_split_work_to_wbs()
1012 work->auto_free = 0; in bdi_split_work_to_wbs()
1013 work->done = &fallback_work_done; in bdi_split_work_to_wbs()
1029 * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
1051 return -ENOENT; in cgroup_writeback_by_id()
1059 ret = -ENOENT; in cgroup_writeback_by_id()
1069 ret = -ENOENT; in cgroup_writeback_by_id()
1080 * BTW the memcg stats are flushed periodically and this is best-effort in cgroup_writeback_by_id()
1089 work->nr_pages = dirty; in cgroup_writeback_by_id()
1090 work->sync_mode = WB_SYNC_NONE; in cgroup_writeback_by_id()
1091 work->range_cyclic = 1; in cgroup_writeback_by_id()
1092 work->reason = reason; in cgroup_writeback_by_id()
1093 work->done = done; in cgroup_writeback_by_id()
1094 work->auto_free = 1; in cgroup_writeback_by_id()
1098 ret = -ENOMEM; in cgroup_writeback_by_id()
1110 * cgroup_writeback_umount - flush inode wb switches for umount
1113 * flushes in-flight inode wb switches. An inode wb switch goes through
1130 * ensure that all in-flight wb switches are in the workqueue. in cgroup_writeback_umount()
1141 return -ENOMEM; in cgroup_writeback_init()
1154 assert_spin_locked(&wb->list_lock); in inode_cgwb_move_to_attached()
1155 assert_spin_locked(&inode->i_lock); in inode_cgwb_move_to_attached()
1156 WARN_ON_ONCE(inode->i_state & I_FREEING); in inode_cgwb_move_to_attached()
1158 inode->i_state &= ~I_SYNC_QUEUED; in inode_cgwb_move_to_attached()
1159 list_del_init(&inode->i_io_list); in inode_cgwb_move_to_attached()
1165 __releases(&inode->i_lock) in locked_inode_to_wb_and_lock_list()
1166 __acquires(&wb->list_lock) in locked_inode_to_wb_and_lock_list()
1170 spin_unlock(&inode->i_lock); in locked_inode_to_wb_and_lock_list()
1171 spin_lock(&wb->list_lock); in locked_inode_to_wb_and_lock_list()
1176 __acquires(&wb->list_lock) in inode_to_wb_and_lock_list()
1180 spin_lock(&wb->list_lock); in inode_to_wb_and_lock_list()
1195 if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { in bdi_split_work_to_wbs()
1196 base_work->auto_free = 0; in bdi_split_work_to_wbs()
1197 wb_queue_work(&bdi->wb, base_work); in bdi_split_work_to_wbs()
1226 if (test_bit(WB_start_all, &wb->state) || in wb_start_writeback()
1227 test_and_set_bit(WB_start_all, &wb->state)) in wb_start_writeback()
1230 wb->start_all_reason = reason; in wb_start_writeback()
1235 * wb_start_background_writeback - start background writeback
1262 spin_lock(&inode->i_lock); in inode_io_list_del()
1264 inode->i_state &= ~I_SYNC_QUEUED; in inode_io_list_del()
1265 list_del_init(&inode->i_io_list); in inode_io_list_del()
1268 spin_unlock(&inode->i_lock); in inode_io_list_del()
1269 spin_unlock(&wb->list_lock); in inode_io_list_del()
1278 struct super_block *sb = inode->i_sb; in sb_mark_inode_writeback()
1281 if (list_empty(&inode->i_wb_list)) { in sb_mark_inode_writeback()
1282 spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); in sb_mark_inode_writeback()
1283 if (list_empty(&inode->i_wb_list)) { in sb_mark_inode_writeback()
1284 list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb); in sb_mark_inode_writeback()
1287 spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); in sb_mark_inode_writeback()
1296 struct super_block *sb = inode->i_sb; in sb_clear_inode_writeback()
1299 if (!list_empty(&inode->i_wb_list)) { in sb_clear_inode_writeback()
1300 spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); in sb_clear_inode_writeback()
1301 if (!list_empty(&inode->i_wb_list)) { in sb_clear_inode_writeback()
1302 list_del_init(&inode->i_wb_list); in sb_clear_inode_writeback()
1305 spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); in sb_clear_inode_writeback()
1310 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
1311 * furthest end of its superblock's dirty-inode list.
1313 * Before stamping the inode's ->dirtied_when, we check to see whether it is
1314 * already the most-recently-dirtied inode on the b_dirty list. If that is
1320 assert_spin_locked(&inode->i_lock); in redirty_tail_locked()
1322 inode->i_state &= ~I_SYNC_QUEUED; in redirty_tail_locked()
1328 if (inode->i_state & I_FREEING) { in redirty_tail_locked()
1329 list_del_init(&inode->i_io_list); in redirty_tail_locked()
1333 if (!list_empty(&wb->b_dirty)) { in redirty_tail_locked()
1336 tail = wb_inode(wb->b_dirty.next); in redirty_tail_locked()
1337 if (time_before(inode->dirtied_when, tail->dirtied_when)) in redirty_tail_locked()
1338 inode->dirtied_when = jiffies; in redirty_tail_locked()
1340 inode_io_list_move_locked(inode, wb, &wb->b_dirty); in redirty_tail_locked()
1345 spin_lock(&inode->i_lock); in redirty_tail()
1347 spin_unlock(&inode->i_lock); in redirty_tail()
1351 * requeue inode for re-scanning after bdi->b_io list is exhausted.
1355 inode_io_list_move_locked(inode, wb, &wb->b_more_io); in requeue_io()
1360 inode->i_state &= ~I_SYNC; in inode_sync_complete()
1365 wake_up_bit(&inode->i_state, __I_SYNC); in inode_sync_complete()
1370 bool ret = time_after(inode->dirtied_when, t); in inode_dirtied_after()
1375 * This test is necessary to prevent such wrapped-around relative times in inode_dirtied_after()
1378 ret = ret && time_before_eq(inode->dirtied_when, jiffies); in inode_dirtied_after()
1399 inode = wb_inode(delaying_queue->prev); in move_expired_inodes()
1402 spin_lock(&inode->i_lock); in move_expired_inodes()
1403 list_move(&inode->i_io_list, &tmp); in move_expired_inodes()
1405 inode->i_state |= I_SYNC_QUEUED; in move_expired_inodes()
1406 spin_unlock(&inode->i_lock); in move_expired_inodes()
1407 if (sb_is_blkdev_sb(inode->i_sb)) in move_expired_inodes()
1409 if (sb && sb != inode->i_sb) in move_expired_inodes()
1411 sb = inode->i_sb; in move_expired_inodes()
1422 * we don't take inode->i_lock here because it is just a pointless overhead. in move_expired_inodes()
1427 sb = wb_inode(tmp.prev)->i_sb; in move_expired_inodes()
1430 if (inode->i_sb == sb) in move_expired_inodes()
1431 list_move(&inode->i_io_list, dispatch_queue); in move_expired_inodes()
1447 * +--> dequeue for IO
1455 assert_spin_locked(&wb->list_lock); in queue_io()
1456 list_splice_init(&wb->b_more_io, &wb->b_io); in queue_io()
1457 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); in queue_io()
1458 if (!work->for_sync) in queue_io()
1459 time_expire_jif = jiffies - dirtytime_expire_interval * HZ; in queue_io()
1460 moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, in queue_io()
1471 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { in write_inode()
1473 ret = inode->i_sb->s_op->write_inode(inode, wbc); in write_inode()
1485 __releases(inode->i_lock) in __inode_wait_for_writeback()
1486 __acquires(inode->i_lock) in __inode_wait_for_writeback()
1488 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); in __inode_wait_for_writeback()
1491 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); in __inode_wait_for_writeback()
1492 while (inode->i_state & I_SYNC) { in __inode_wait_for_writeback()
1493 spin_unlock(&inode->i_lock); in __inode_wait_for_writeback()
1496 spin_lock(&inode->i_lock); in __inode_wait_for_writeback()
1505 spin_lock(&inode->i_lock); in inode_wait_for_writeback()
1507 spin_unlock(&inode->i_lock); in inode_wait_for_writeback()
1516 __releases(inode->i_lock) in inode_sleep_on_writeback()
1519 wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); in inode_sleep_on_writeback()
1523 sleep = inode->i_state & I_SYNC; in inode_sleep_on_writeback()
1524 spin_unlock(&inode->i_lock); in inode_sleep_on_writeback()
1534 * inodes. This function can be called only by flusher thread - noone else
1541 if (inode->i_state & I_FREEING) in requeue_inode()
1549 if ((inode->i_state & I_DIRTY) && in requeue_inode()
1550 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) in requeue_inode()
1551 inode->dirtied_when = jiffies; in requeue_inode()
1553 if (wbc->pages_skipped) { in requeue_inode()
1560 if (inode->i_state & I_DIRTY_ALL) in requeue_inode()
1567 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { in requeue_inode()
1572 if (wbc->nr_to_write <= 0) { in requeue_inode()
1585 } else if (inode->i_state & I_DIRTY) { in requeue_inode()
1592 } else if (inode->i_state & I_DIRTY_TIME) { in requeue_inode()
1593 inode->dirtied_when = jiffies; in requeue_inode()
1594 inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); in requeue_inode()
1595 inode->i_state &= ~I_SYNC_QUEUED; in requeue_inode()
1604 * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
1616 struct address_space *mapping = inode->i_mapping; in __writeback_single_inode()
1617 long nr_to_write = wbc->nr_to_write; in __writeback_single_inode()
1621 WARN_ON(!(inode->i_state & I_SYNC)); in __writeback_single_inode()
1631 * separate, external IO completion path and ->sync_fs for guaranteeing in __writeback_single_inode()
1634 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { in __writeback_single_inode()
1645 if ((inode->i_state & I_DIRTY_TIME) && in __writeback_single_inode()
1646 (wbc->sync_mode == WB_SYNC_ALL || in __writeback_single_inode()
1647 time_after(jiffies, inode->dirtied_time_when + in __writeback_single_inode()
1659 spin_lock(&inode->i_lock); in __writeback_single_inode()
1660 dirty = inode->i_state & I_DIRTY; in __writeback_single_inode()
1661 inode->i_state &= ~dirty; in __writeback_single_inode()
1665 * __mark_inode_dirty() to test i_state without grabbing i_lock - in __writeback_single_inode()
1677 inode->i_state |= I_DIRTY_PAGES; in __writeback_single_inode()
1678 else if (unlikely(inode->i_state & I_PINNING_FSCACHE_WB)) { in __writeback_single_inode()
1679 if (!(inode->i_state & I_DIRTY_PAGES)) { in __writeback_single_inode()
1680 inode->i_state &= ~I_PINNING_FSCACHE_WB; in __writeback_single_inode()
1681 wbc->unpinned_fscache_wb = true; in __writeback_single_inode()
1686 spin_unlock(&inode->i_lock); in __writeback_single_inode()
1694 wbc->unpinned_fscache_wb = false; in __writeback_single_inode()
1700 * Write out an inode's dirty data and metadata on-demand, i.e. separately from
1703 * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
1714 spin_lock(&inode->i_lock); in writeback_single_inode()
1715 if (!atomic_read(&inode->i_count)) in writeback_single_inode()
1716 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); in writeback_single_inode()
1718 WARN_ON(inode->i_state & I_WILL_FREE); in writeback_single_inode()
1720 if (inode->i_state & I_SYNC) { in writeback_single_inode()
1727 if (wbc->sync_mode != WB_SYNC_ALL) in writeback_single_inode()
1731 WARN_ON(inode->i_state & I_SYNC); in writeback_single_inode()
1735 * For data-integrity syncs we also need to check whether any pages are in writeback_single_inode()
1739 if (!(inode->i_state & I_DIRTY_ALL) && in writeback_single_inode()
1740 (wbc->sync_mode != WB_SYNC_ALL || in writeback_single_inode()
1741 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) in writeback_single_inode()
1743 inode->i_state |= I_SYNC; in writeback_single_inode()
1751 spin_lock(&inode->i_lock); in writeback_single_inode()
1756 if (!(inode->i_state & I_FREEING)) { in writeback_single_inode()
1762 if (!(inode->i_state & I_DIRTY_ALL)) in writeback_single_inode()
1764 else if (!(inode->i_state & I_SYNC_QUEUED)) { in writeback_single_inode()
1765 if ((inode->i_state & I_DIRTY)) in writeback_single_inode()
1767 else if (inode->i_state & I_DIRTY_TIME) { in writeback_single_inode()
1768 inode->dirtied_when = jiffies; in writeback_single_inode()
1771 &wb->b_dirty_time); in writeback_single_inode()
1776 spin_unlock(&wb->list_lock); in writeback_single_inode()
1779 spin_unlock(&inode->i_lock); in writeback_single_inode()
1801 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) in writeback_chunk_size()
1804 pages = min(wb->avg_write_bandwidth / 2, in writeback_chunk_size()
1806 pages = min(pages, work->nr_pages); in writeback_chunk_size()
1819 * NOTE! This is called with wb->list_lock held, and will
1828 .sync_mode = work->sync_mode, in writeback_sb_inodes()
1829 .tagged_writepages = work->tagged_writepages, in writeback_sb_inodes()
1830 .for_kupdate = work->for_kupdate, in writeback_sb_inodes()
1831 .for_background = work->for_background, in writeback_sb_inodes()
1832 .for_sync = work->for_sync, in writeback_sb_inodes()
1833 .range_cyclic = work->range_cyclic, in writeback_sb_inodes()
1841 while (!list_empty(&wb->b_io)) { in writeback_sb_inodes()
1842 struct inode *inode = wb_inode(wb->b_io.prev); in writeback_sb_inodes()
1846 if (inode->i_sb != sb) { in writeback_sb_inodes()
1847 if (work->sb) { in writeback_sb_inodes()
1870 spin_lock(&inode->i_lock); in writeback_sb_inodes()
1871 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { in writeback_sb_inodes()
1873 spin_unlock(&inode->i_lock); in writeback_sb_inodes()
1876 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { in writeback_sb_inodes()
1879 * doing writeback-for-data-integrity, move it to in writeback_sb_inodes()
1887 spin_unlock(&inode->i_lock); in writeback_sb_inodes()
1891 spin_unlock(&wb->list_lock); in writeback_sb_inodes()
1898 if (inode->i_state & I_SYNC) { in writeback_sb_inodes()
1902 spin_lock(&wb->list_lock); in writeback_sb_inodes()
1905 inode->i_state |= I_SYNC; in writeback_sb_inodes()
1919 work->nr_pages -= write_chunk - wbc.nr_to_write; in writeback_sb_inodes()
1920 wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; in writeback_sb_inodes()
1933 blk_flush_plug(current->plug, false); in writeback_sb_inodes()
1942 spin_lock(&inode->i_lock); in writeback_sb_inodes()
1943 if (!(inode->i_state & I_DIRTY_ALL)) in writeback_sb_inodes()
1947 spin_unlock(&inode->i_lock); in writeback_sb_inodes()
1950 spin_unlock(&tmp_wb->list_lock); in writeback_sb_inodes()
1951 spin_lock(&wb->list_lock); in writeback_sb_inodes()
1961 if (work->nr_pages <= 0) in writeback_sb_inodes()
1974 while (!list_empty(&wb->b_io)) { in __writeback_inodes_wb()
1975 struct inode *inode = wb_inode(wb->b_io.prev); in __writeback_inodes_wb()
1976 struct super_block *sb = inode->i_sb; in __writeback_inodes_wb()
1988 up_read(&sb->s_umount); in __writeback_inodes_wb()
1994 if (work->nr_pages <= 0) in __writeback_inodes_wb()
2014 spin_lock(&wb->list_lock); in writeback_inodes_wb()
2015 if (list_empty(&wb->b_io)) in writeback_inodes_wb()
2018 spin_unlock(&wb->list_lock); in writeback_inodes_wb()
2021 return nr_pages - work.nr_pages; in writeback_inodes_wb()
2028 * dirtying-time in the inode's address_space. So this periodic writeback code
2034 * one-second gap.
2042 long nr_pages = work->nr_pages; in wb_writeback()
2054 if (work->nr_pages <= 0) in wb_writeback()
2058 * Background writeout and kupdate-style writeback may in wb_writeback()
2063 if ((work->for_background || work->for_kupdate) && in wb_writeback()
2064 !list_empty(&wb->work_list)) in wb_writeback()
2071 if (work->for_background && !wb_over_bg_thresh(wb)) in wb_writeback()
2075 spin_lock(&wb->list_lock); in wb_writeback()
2083 if (work->for_kupdate) { in wb_writeback()
2084 dirtied_before = jiffies - in wb_writeback()
2086 } else if (work->for_background) in wb_writeback()
2090 if (list_empty(&wb->b_io)) { in wb_writeback()
2094 if (work->sb) in wb_writeback()
2095 progress = writeback_sb_inodes(work->sb, wb, work); in wb_writeback()
2109 spin_unlock(&wb->list_lock); in wb_writeback()
2116 if (list_empty(&wb->b_more_io)) { in wb_writeback()
2117 spin_unlock(&wb->list_lock); in wb_writeback()
2127 inode = wb_inode(wb->b_more_io.prev); in wb_writeback()
2128 spin_lock(&inode->i_lock); in wb_writeback()
2129 spin_unlock(&wb->list_lock); in wb_writeback()
2135 return nr_pages - work->nr_pages; in wb_writeback()
2145 spin_lock_irq(&wb->work_lock); in get_next_work_item()
2146 if (!list_empty(&wb->work_list)) { in get_next_work_item()
2147 work = list_entry(wb->work_list.next, in get_next_work_item()
2149 list_del_init(&work->list); in get_next_work_item()
2151 spin_unlock_irq(&wb->work_lock); in get_next_work_item()
2184 expired = wb->last_old_flush + in wb_check_old_data_flush()
2189 wb->last_old_flush = jiffies; in wb_check_old_data_flush()
2211 if (!test_bit(WB_start_all, &wb->state)) in wb_check_start_all()
2220 .reason = wb->start_all_reason, in wb_check_start_all()
2226 clear_bit(WB_start_all, &wb->state); in wb_check_start_all()
2239 set_bit(WB_writeback_running, &wb->state); in wb_do_writeback()
2247 * Check for a flush-everything request in wb_do_writeback()
2256 clear_bit(WB_writeback_running, &wb->state); in wb_do_writeback()
2271 set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); in wb_workfn()
2274 !test_bit(WB_registered, &wb->state))) { in wb_workfn()
2284 } while (!list_empty(&wb->work_list)); in wb_workfn()
2296 if (!list_empty(&wb->work_list)) in wb_workfn()
2314 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) in __wakeup_flusher_threads_bdi()
2336 blk_flush_plug(current->plug, true); in wakeup_flusher_threads()
2370 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) in wakeup_dirtytime_writeback()
2371 if (!list_empty(&wb->b_dirty_time)) in wakeup_dirtytime_writeback()
2397 * __mark_inode_dirty - internal function to mark an inode dirty
2416 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
2417 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
2418 * the kernel-internal blockdev inode represents the dirtying time of the
2420 * page->mapping->host, so the page-dirtying time is recorded in the internal
2425 struct super_block *sb = inode->i_sb; in __mark_inode_dirty()
2434 * We tell ->dirty_inode callback that timestamps need to in __mark_inode_dirty()
2437 if (inode->i_state & I_DIRTY_TIME) { in __mark_inode_dirty()
2438 spin_lock(&inode->i_lock); in __mark_inode_dirty()
2439 if (inode->i_state & I_DIRTY_TIME) { in __mark_inode_dirty()
2440 inode->i_state &= ~I_DIRTY_TIME; in __mark_inode_dirty()
2443 spin_unlock(&inode->i_lock); in __mark_inode_dirty()
2448 * (if needed) it can update on-disk fields and journal the in __mark_inode_dirty()
2454 if (sb->s_op->dirty_inode) in __mark_inode_dirty()
2455 sb->s_op->dirty_inode(inode, in __mark_inode_dirty()
2477 if ((inode->i_state & flags) == flags) in __mark_inode_dirty()
2480 spin_lock(&inode->i_lock); in __mark_inode_dirty()
2481 if ((inode->i_state & flags) != flags) { in __mark_inode_dirty()
2482 const int was_dirty = inode->i_state & I_DIRTY; in __mark_inode_dirty()
2486 inode->i_state |= flags; in __mark_inode_dirty()
2496 spin_lock(&inode->i_lock); in __mark_inode_dirty()
2505 if (inode->i_state & I_SYNC_QUEUED) in __mark_inode_dirty()
2512 if (!S_ISBLK(inode->i_mode)) { in __mark_inode_dirty()
2516 if (inode->i_state & I_FREEING) in __mark_inode_dirty()
2521 * reposition it (that would break b_dirty time-ordering). in __mark_inode_dirty()
2527 inode->dirtied_when = jiffies; in __mark_inode_dirty()
2529 inode->dirtied_time_when = jiffies; in __mark_inode_dirty()
2531 if (inode->i_state & I_DIRTY) in __mark_inode_dirty()
2532 dirty_list = &wb->b_dirty; in __mark_inode_dirty()
2534 dirty_list = &wb->b_dirty_time; in __mark_inode_dirty()
2539 spin_unlock(&wb->list_lock); in __mark_inode_dirty()
2540 spin_unlock(&inode->i_lock); in __mark_inode_dirty()
2545 * we have to wake-up the corresponding bdi thread in __mark_inode_dirty()
2546 * to make sure background write-back happens in __mark_inode_dirty()
2550 (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) in __mark_inode_dirty()
2557 spin_unlock(&wb->list_lock); in __mark_inode_dirty()
2558 spin_unlock(&inode->i_lock); in __mark_inode_dirty()
2579 WARN_ON(!rwsem_is_locked(&sb->s_umount)); in wait_sb_inodes()
2581 mutex_lock(&sb->s_sync_lock); in wait_sb_inodes()
2588 * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as in wait_sb_inodes()
2593 spin_lock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2594 list_splice_init(&sb->s_inodes_wb, &sync_list); in wait_sb_inodes()
2606 struct address_space *mapping = inode->i_mapping; in wait_sb_inodes()
2614 list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb); in wait_sb_inodes()
2617 * The mapping can appear untagged while still on-list since we in wait_sb_inodes()
2624 spin_unlock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2626 spin_lock(&inode->i_lock); in wait_sb_inodes()
2627 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { in wait_sb_inodes()
2628 spin_unlock(&inode->i_lock); in wait_sb_inodes()
2630 spin_lock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2634 spin_unlock(&inode->i_lock); in wait_sb_inodes()
2649 spin_lock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2651 spin_unlock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2653 mutex_unlock(&sb->s_sync_lock); in wait_sb_inodes()
2659 struct backing_dev_info *bdi = sb->s_bdi; in __writeback_inodes_sb_nr()
2672 WARN_ON(!rwsem_is_locked(&sb->s_umount)); in __writeback_inodes_sb_nr()
2674 bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy); in __writeback_inodes_sb_nr()
2679 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
2697 * writeback_inodes_sb - writeback dirty inodes from given super_block
2712 * try_to_writeback_inodes_sb - try to start writeback if none underway
2720 if (!down_read_trylock(&sb->s_umount)) in try_to_writeback_inodes_sb()
2724 up_read(&sb->s_umount); in try_to_writeback_inodes_sb()
2729 * sync_inodes_sb - sync sb inode pages
2737 struct backing_dev_info *bdi = sb->s_bdi; in sync_inodes_sb()
2756 WARN_ON(!rwsem_is_locked(&sb->s_umount)); in sync_inodes_sb()
2769 * write_inode_now - write an inode to disk
2787 if (!mapping_can_writeback(inode->i_mapping)) in write_inode_now()
2796 * sync_inode_metadata - write an inode to disk
2808 .nr_to_write = 0, /* metadata-only */ in sync_inode_metadata()