xref: /openbmc/linux/fs/btrfs/discard.c (revision bb5167e6)
1b0643e59SDennis Zhou // SPDX-License-Identifier: GPL-2.0
2b0643e59SDennis Zhou 
3b0643e59SDennis Zhou #include <linux/jiffies.h>
4b0643e59SDennis Zhou #include <linux/kernel.h>
5b0643e59SDennis Zhou #include <linux/ktime.h>
6b0643e59SDennis Zhou #include <linux/list.h>
7e93591bbSDennis Zhou #include <linux/math64.h>
8b0643e59SDennis Zhou #include <linux/sizes.h>
9b0643e59SDennis Zhou #include <linux/workqueue.h>
10b0643e59SDennis Zhou #include "ctree.h"
11b0643e59SDennis Zhou #include "block-group.h"
12b0643e59SDennis Zhou #include "discard.h"
13b0643e59SDennis Zhou #include "free-space-cache.h"
14fc97a410SJosef Bacik #include "fs.h"
15b0643e59SDennis Zhou 
16dbc2a8c9SDennis Zhou /*
17dbc2a8c9SDennis Zhou  * This contains the logic to handle async discard.
18dbc2a8c9SDennis Zhou  *
19dbc2a8c9SDennis Zhou  * Async discard manages trimming of free space outside of transaction commit.
20dbc2a8c9SDennis Zhou  * Discarding is done by managing the block_groups on a LRU list based on free
21dbc2a8c9SDennis Zhou  * space recency.  Two passes are used to first prioritize discarding extents
22dbc2a8c9SDennis Zhou  * and then allow for trimming in the bitmap the best opportunity to coalesce.
23dbc2a8c9SDennis Zhou  * The block_groups are maintained on multiple lists to allow for multiple
24dbc2a8c9SDennis Zhou  * passes with different discard filter requirements.  A delayed work item is
25dbc2a8c9SDennis Zhou  * used to manage discarding with timeout determined by a max of the delay
26dbc2a8c9SDennis Zhou  * incurred by the iops rate limit, the byte rate limit, and the max delay of
27dbc2a8c9SDennis Zhou  * BTRFS_DISCARD_MAX_DELAY.
28dbc2a8c9SDennis Zhou  *
29dbc2a8c9SDennis Zhou  * Note, this only keeps track of block_groups that are explicitly for data.
30dbc2a8c9SDennis Zhou  * Mixed block_groups are not supported.
31dbc2a8c9SDennis Zhou  *
32dbc2a8c9SDennis Zhou  * The first list is special to manage discarding of fully free block groups.
33dbc2a8c9SDennis Zhou  * This is necessary because we issue a final trim for a full free block group
34dbc2a8c9SDennis Zhou  * after forgetting it.  When a block group becomes unused, instead of directly
35dbc2a8c9SDennis Zhou  * being added to the unused_bgs list, we add it to this first list.  Then
36dbc2a8c9SDennis Zhou  * from there, if it becomes fully discarded, we place it onto the unused_bgs
37dbc2a8c9SDennis Zhou  * list.
38dbc2a8c9SDennis Zhou  *
39dbc2a8c9SDennis Zhou  * The in-memory free space cache serves as the backing state for discard.
40dbc2a8c9SDennis Zhou  * Consequently this means there is no persistence.  We opt to load all the
41dbc2a8c9SDennis Zhou  * block groups in as not discarded, so the mount case degenerates to the
42dbc2a8c9SDennis Zhou  * crashing case.
43dbc2a8c9SDennis Zhou  *
44dbc2a8c9SDennis Zhou  * As the free space cache uses bitmaps, there exists a tradeoff between
45dbc2a8c9SDennis Zhou  * ease/efficiency for find_free_extent() and the accuracy of discard state.
46dbc2a8c9SDennis Zhou  * Here we opt to let untrimmed regions merge with everything while only letting
47dbc2a8c9SDennis Zhou  * trimmed regions merge with other trimmed regions.  This can cause
48dbc2a8c9SDennis Zhou  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
49dbc2a8c9SDennis Zhou  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
50dbc2a8c9SDennis Zhou  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
51dbc2a8c9SDennis Zhou  * this resets the state and we will retry trimming the whole bitmap.  This is a
52dbc2a8c9SDennis Zhou  * tradeoff between discard state accuracy and the cost of accounting.
53dbc2a8c9SDennis Zhou  */
54dbc2a8c9SDennis Zhou 
55b0643e59SDennis Zhou /* This is an initial delay to give some chance for block reuse */
56b0643e59SDennis Zhou #define BTRFS_DISCARD_DELAY		(120ULL * NSEC_PER_SEC)
576e80d4f8SDennis Zhou #define BTRFS_DISCARD_UNUSED_DELAY	(10ULL * NSEC_PER_SEC)
58b0643e59SDennis Zhou 
59a2309300SDennis Zhou #define BTRFS_DISCARD_MIN_DELAY_MSEC	(1UL)
60a2309300SDennis Zhou #define BTRFS_DISCARD_MAX_DELAY_MSEC	(1000UL)
61e9f59429SBoris Burkov #define BTRFS_DISCARD_MAX_IOPS		(1000U)
62a2309300SDennis Zhou 
6343dd529aSDavid Sterba /* Monotonically decreasing minimum length filters after index 0 */
647fe6d45eSDennis Zhou static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
657fe6d45eSDennis Zhou 	0,
667fe6d45eSDennis Zhou 	BTRFS_ASYNC_DISCARD_MAX_FILTER,
677fe6d45eSDennis Zhou 	BTRFS_ASYNC_DISCARD_MIN_FILTER
687fe6d45eSDennis Zhou };
697fe6d45eSDennis Zhou 
get_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)70b0643e59SDennis Zhou static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
71b0643e59SDennis Zhou 					  struct btrfs_block_group *block_group)
72b0643e59SDennis Zhou {
73b0643e59SDennis Zhou 	return &discard_ctl->discard_list[block_group->discard_index];
74b0643e59SDennis Zhou }
75b0643e59SDennis Zhou 
76*bb5167e6SJohannes Thumshirn /*
77*bb5167e6SJohannes Thumshirn  * Determine if async discard should be running.
78*bb5167e6SJohannes Thumshirn  *
79*bb5167e6SJohannes Thumshirn  * @discard_ctl: discard control
80*bb5167e6SJohannes Thumshirn  *
81*bb5167e6SJohannes Thumshirn  * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
82*bb5167e6SJohannes Thumshirn  */
btrfs_run_discard_work(struct btrfs_discard_ctl * discard_ctl)83*bb5167e6SJohannes Thumshirn static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
84*bb5167e6SJohannes Thumshirn {
85*bb5167e6SJohannes Thumshirn 	struct btrfs_fs_info *fs_info = container_of(discard_ctl,
86*bb5167e6SJohannes Thumshirn 						     struct btrfs_fs_info,
87*bb5167e6SJohannes Thumshirn 						     discard_ctl);
88*bb5167e6SJohannes Thumshirn 
89*bb5167e6SJohannes Thumshirn 	return (!(fs_info->sb->s_flags & SB_RDONLY) &&
90*bb5167e6SJohannes Thumshirn 		test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
91*bb5167e6SJohannes Thumshirn }
92*bb5167e6SJohannes Thumshirn 
__add_to_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)932bee7eb8SDennis Zhou static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
94b0643e59SDennis Zhou 				  struct btrfs_block_group *block_group)
95b0643e59SDennis Zhou {
962b5463fcSBoris Burkov 	lockdep_assert_held(&discard_ctl->lock);
972bee7eb8SDennis Zhou 	if (!btrfs_run_discard_work(discard_ctl))
98b0643e59SDennis Zhou 		return;
99b0643e59SDennis Zhou 
1006e80d4f8SDennis Zhou 	if (list_empty(&block_group->discard_list) ||
1016e80d4f8SDennis Zhou 	    block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
1026e80d4f8SDennis Zhou 		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
1036e80d4f8SDennis Zhou 			block_group->discard_index = BTRFS_DISCARD_INDEX_START;
104b0643e59SDennis Zhou 		block_group->discard_eligible_time = (ktime_get_ns() +
105b0643e59SDennis Zhou 						      BTRFS_DISCARD_DELAY);
1062bee7eb8SDennis Zhou 		block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
1076e80d4f8SDennis Zhou 	}
1082b5463fcSBoris Burkov 	if (list_empty(&block_group->discard_list))
1092b5463fcSBoris Burkov 		btrfs_get_block_group(block_group);
110b0643e59SDennis Zhou 
111b0643e59SDennis Zhou 	list_move_tail(&block_group->discard_list,
112b0643e59SDennis Zhou 		       get_discard_list(discard_ctl, block_group));
1132bee7eb8SDennis Zhou }
114b0643e59SDennis Zhou 
add_to_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)1152bee7eb8SDennis Zhou static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
1162bee7eb8SDennis Zhou 				struct btrfs_block_group *block_group)
1172bee7eb8SDennis Zhou {
1185cb0724eSDennis Zhou 	if (!btrfs_is_block_group_data_only(block_group))
1195cb0724eSDennis Zhou 		return;
1205cb0724eSDennis Zhou 
1212bee7eb8SDennis Zhou 	spin_lock(&discard_ctl->lock);
1222bee7eb8SDennis Zhou 	__add_to_discard_list(discard_ctl, block_group);
123b0643e59SDennis Zhou 	spin_unlock(&discard_ctl->lock);
124b0643e59SDennis Zhou }
125b0643e59SDennis Zhou 
add_to_discard_unused_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)1266e80d4f8SDennis Zhou static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
1276e80d4f8SDennis Zhou 				       struct btrfs_block_group *block_group)
1286e80d4f8SDennis Zhou {
1292b5463fcSBoris Burkov 	bool queued;
1302b5463fcSBoris Burkov 
1316e80d4f8SDennis Zhou 	spin_lock(&discard_ctl->lock);
1326e80d4f8SDennis Zhou 
1332b5463fcSBoris Burkov 	queued = !list_empty(&block_group->discard_list);
1342b5463fcSBoris Burkov 
1356e80d4f8SDennis Zhou 	if (!btrfs_run_discard_work(discard_ctl)) {
1366e80d4f8SDennis Zhou 		spin_unlock(&discard_ctl->lock);
1376e80d4f8SDennis Zhou 		return;
1386e80d4f8SDennis Zhou 	}
1396e80d4f8SDennis Zhou 
1406e80d4f8SDennis Zhou 	list_del_init(&block_group->discard_list);
1416e80d4f8SDennis Zhou 
1426e80d4f8SDennis Zhou 	block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
1436e80d4f8SDennis Zhou 	block_group->discard_eligible_time = (ktime_get_ns() +
1446e80d4f8SDennis Zhou 					      BTRFS_DISCARD_UNUSED_DELAY);
1452bee7eb8SDennis Zhou 	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
1462b5463fcSBoris Burkov 	if (!queued)
1472b5463fcSBoris Burkov 		btrfs_get_block_group(block_group);
1486e80d4f8SDennis Zhou 	list_add_tail(&block_group->discard_list,
1496e80d4f8SDennis Zhou 		      &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
1506e80d4f8SDennis Zhou 
1516e80d4f8SDennis Zhou 	spin_unlock(&discard_ctl->lock);
1526e80d4f8SDennis Zhou }
1536e80d4f8SDennis Zhou 
remove_from_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)154b0643e59SDennis Zhou static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
155b0643e59SDennis Zhou 				     struct btrfs_block_group *block_group)
156b0643e59SDennis Zhou {
157b0643e59SDennis Zhou 	bool running = false;
1582b5463fcSBoris Burkov 	bool queued = false;
159b0643e59SDennis Zhou 
160b0643e59SDennis Zhou 	spin_lock(&discard_ctl->lock);
161b0643e59SDennis Zhou 
162b0643e59SDennis Zhou 	if (block_group == discard_ctl->block_group) {
163b0643e59SDennis Zhou 		running = true;
164b0643e59SDennis Zhou 		discard_ctl->block_group = NULL;
165b0643e59SDennis Zhou 	}
166b0643e59SDennis Zhou 
167b0643e59SDennis Zhou 	block_group->discard_eligible_time = 0;
1682b5463fcSBoris Burkov 	queued = !list_empty(&block_group->discard_list);
169b0643e59SDennis Zhou 	list_del_init(&block_group->discard_list);
1702b5463fcSBoris Burkov 	/*
1712b5463fcSBoris Burkov 	 * If the block group is currently running in the discard workfn, we
1722b5463fcSBoris Burkov 	 * don't want to deref it, since it's still being used by the workfn.
1732b5463fcSBoris Burkov 	 * The workfn will notice this case and deref the block group when it is
1742b5463fcSBoris Burkov 	 * finished.
1752b5463fcSBoris Burkov 	 */
1762b5463fcSBoris Burkov 	if (queued && !running)
1772b5463fcSBoris Burkov 		btrfs_put_block_group(block_group);
178b0643e59SDennis Zhou 
179b0643e59SDennis Zhou 	spin_unlock(&discard_ctl->lock);
180b0643e59SDennis Zhou 
181b0643e59SDennis Zhou 	return running;
182b0643e59SDennis Zhou }
183b0643e59SDennis Zhou 
18443dd529aSDavid Sterba /*
18543dd529aSDavid Sterba  * Find block_group that's up next for discarding.
18643dd529aSDavid Sterba  *
187b0643e59SDennis Zhou  * @discard_ctl:  discard control
188b0643e59SDennis Zhou  * @now:          current time
189b0643e59SDennis Zhou  *
190b0643e59SDennis Zhou  * Iterate over the discard lists to find the next block_group up for
191b0643e59SDennis Zhou  * discarding checking the discard_eligible_time of block_group.
192b0643e59SDennis Zhou  */
find_next_block_group(struct btrfs_discard_ctl * discard_ctl,u64 now)193b0643e59SDennis Zhou static struct btrfs_block_group *find_next_block_group(
194b0643e59SDennis Zhou 					struct btrfs_discard_ctl *discard_ctl,
195b0643e59SDennis Zhou 					u64 now)
196b0643e59SDennis Zhou {
197b0643e59SDennis Zhou 	struct btrfs_block_group *ret_block_group = NULL, *block_group;
198b0643e59SDennis Zhou 	int i;
199b0643e59SDennis Zhou 
200b0643e59SDennis Zhou 	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
201b0643e59SDennis Zhou 		struct list_head *discard_list = &discard_ctl->discard_list[i];
202b0643e59SDennis Zhou 
203b0643e59SDennis Zhou 		if (!list_empty(discard_list)) {
204b0643e59SDennis Zhou 			block_group = list_first_entry(discard_list,
205b0643e59SDennis Zhou 						       struct btrfs_block_group,
206b0643e59SDennis Zhou 						       discard_list);
207b0643e59SDennis Zhou 
208b0643e59SDennis Zhou 			if (!ret_block_group)
209b0643e59SDennis Zhou 				ret_block_group = block_group;
210b0643e59SDennis Zhou 
211b0643e59SDennis Zhou 			if (ret_block_group->discard_eligible_time < now)
212b0643e59SDennis Zhou 				break;
213b0643e59SDennis Zhou 
214b0643e59SDennis Zhou 			if (ret_block_group->discard_eligible_time >
215b0643e59SDennis Zhou 			    block_group->discard_eligible_time)
216b0643e59SDennis Zhou 				ret_block_group = block_group;
217b0643e59SDennis Zhou 		}
218b0643e59SDennis Zhou 	}
219b0643e59SDennis Zhou 
220b0643e59SDennis Zhou 	return ret_block_group;
221b0643e59SDennis Zhou }
222b0643e59SDennis Zhou 
22343dd529aSDavid Sterba /*
22443dd529aSDavid Sterba  * Look up next block group and set it for use.
22592419695SNikolay Borisov  *
226b0643e59SDennis Zhou  * @discard_ctl:   discard control
2272bee7eb8SDennis Zhou  * @discard_state: the discard_state of the block_group after state management
2287fe6d45eSDennis Zhou  * @discard_index: the discard_index of the block_group after state management
22992419695SNikolay Borisov  * @now:           time when discard was invoked, in ns
230b0643e59SDennis Zhou  *
23143dd529aSDavid Sterba  * Wrap find_next_block_group() and set the block_group to be in use.
23243dd529aSDavid Sterba  * @discard_state's control flow is managed here.  Variables related to
23343dd529aSDavid Sterba  * @discard_state are reset here as needed (eg. @discard_cursor).  @discard_state
2347fe6d45eSDennis Zhou  * and @discard_index are remembered as it may change while we're discarding,
2357fe6d45eSDennis Zhou  * but we want the discard to execute in the context determined here.
236b0643e59SDennis Zhou  */
peek_discard_list(struct btrfs_discard_ctl * discard_ctl,enum btrfs_discard_state * discard_state,int * discard_index,u64 now)237b0643e59SDennis Zhou static struct btrfs_block_group *peek_discard_list(
2382bee7eb8SDennis Zhou 					struct btrfs_discard_ctl *discard_ctl,
2397fe6d45eSDennis Zhou 					enum btrfs_discard_state *discard_state,
240ea9ed87cSPavel Begunkov 					int *discard_index, u64 now)
241b0643e59SDennis Zhou {
242b0643e59SDennis Zhou 	struct btrfs_block_group *block_group;
243b0643e59SDennis Zhou 
244b0643e59SDennis Zhou 	spin_lock(&discard_ctl->lock);
2452bee7eb8SDennis Zhou again:
246b0643e59SDennis Zhou 	block_group = find_next_block_group(discard_ctl, now);
247b0643e59SDennis Zhou 
248ea9ed87cSPavel Begunkov 	if (block_group && now >= block_group->discard_eligible_time) {
2492bee7eb8SDennis Zhou 		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
2502bee7eb8SDennis Zhou 		    block_group->used != 0) {
2512b5463fcSBoris Burkov 			if (btrfs_is_block_group_data_only(block_group)) {
2522bee7eb8SDennis Zhou 				__add_to_discard_list(discard_ctl, block_group);
2532b5463fcSBoris Burkov 			} else {
2545cb0724eSDennis Zhou 				list_del_init(&block_group->discard_list);
2552b5463fcSBoris Burkov 				btrfs_put_block_group(block_group);
2562b5463fcSBoris Burkov 			}
2572bee7eb8SDennis Zhou 			goto again;
2582bee7eb8SDennis Zhou 		}
2592bee7eb8SDennis Zhou 		if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
2602bee7eb8SDennis Zhou 			block_group->discard_cursor = block_group->start;
2612bee7eb8SDennis Zhou 			block_group->discard_state = BTRFS_DISCARD_EXTENTS;
2622bee7eb8SDennis Zhou 		}
263b0643e59SDennis Zhou 		discard_ctl->block_group = block_group;
264ea9ed87cSPavel Begunkov 	}
265ea9ed87cSPavel Begunkov 	if (block_group) {
2662bee7eb8SDennis Zhou 		*discard_state = block_group->discard_state;
2677fe6d45eSDennis Zhou 		*discard_index = block_group->discard_index;
2682bee7eb8SDennis Zhou 	}
269b0643e59SDennis Zhou 	spin_unlock(&discard_ctl->lock);
270b0643e59SDennis Zhou 
271b0643e59SDennis Zhou 	return block_group;
272b0643e59SDennis Zhou }
273b0643e59SDennis Zhou 
27443dd529aSDavid Sterba /*
27543dd529aSDavid Sterba  * Update a block group's filters.
27643dd529aSDavid Sterba  *
2777fe6d45eSDennis Zhou  * @block_group:  block group of interest
2787fe6d45eSDennis Zhou  * @bytes:        recently freed region size after coalescing
2797fe6d45eSDennis Zhou  *
2807fe6d45eSDennis Zhou  * Async discard maintains multiple lists with progressively smaller filters
2817fe6d45eSDennis Zhou  * to prioritize discarding based on size.  Should a free space that matches
2827fe6d45eSDennis Zhou  * a larger filter be returned to the free_space_cache, prioritize that discard
2837fe6d45eSDennis Zhou  * by moving @block_group to the proper filter.
2847fe6d45eSDennis Zhou  */
btrfs_discard_check_filter(struct btrfs_block_group * block_group,u64 bytes)2857fe6d45eSDennis Zhou void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
2867fe6d45eSDennis Zhou 				u64 bytes)
2877fe6d45eSDennis Zhou {
2887fe6d45eSDennis Zhou 	struct btrfs_discard_ctl *discard_ctl;
2897fe6d45eSDennis Zhou 
2907fe6d45eSDennis Zhou 	if (!block_group ||
2917fe6d45eSDennis Zhou 	    !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2927fe6d45eSDennis Zhou 		return;
2937fe6d45eSDennis Zhou 
2947fe6d45eSDennis Zhou 	discard_ctl = &block_group->fs_info->discard_ctl;
2957fe6d45eSDennis Zhou 
2967fe6d45eSDennis Zhou 	if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
2977fe6d45eSDennis Zhou 	    bytes >= discard_minlen[block_group->discard_index - 1]) {
2987fe6d45eSDennis Zhou 		int i;
2997fe6d45eSDennis Zhou 
3007fe6d45eSDennis Zhou 		remove_from_discard_list(discard_ctl, block_group);
3017fe6d45eSDennis Zhou 
3027fe6d45eSDennis Zhou 		for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
3037fe6d45eSDennis Zhou 		     i++) {
3047fe6d45eSDennis Zhou 			if (bytes >= discard_minlen[i]) {
3057fe6d45eSDennis Zhou 				block_group->discard_index = i;
3067fe6d45eSDennis Zhou 				add_to_discard_list(discard_ctl, block_group);
3077fe6d45eSDennis Zhou 				break;
3087fe6d45eSDennis Zhou 			}
3097fe6d45eSDennis Zhou 		}
3107fe6d45eSDennis Zhou 	}
3117fe6d45eSDennis Zhou }
3127fe6d45eSDennis Zhou 
31343dd529aSDavid Sterba /*
31443dd529aSDavid Sterba  * Move a block group along the discard lists.
31543dd529aSDavid Sterba  *
3167fe6d45eSDennis Zhou  * @discard_ctl: discard control
3177fe6d45eSDennis Zhou  * @block_group: block_group of interest
3187fe6d45eSDennis Zhou  *
3197fe6d45eSDennis Zhou  * Increment @block_group's discard_index.  If it falls of the list, let it be.
3207fe6d45eSDennis Zhou  * Otherwise add it back to the appropriate list.
3217fe6d45eSDennis Zhou  */
btrfs_update_discard_index(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)3227fe6d45eSDennis Zhou static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
3237fe6d45eSDennis Zhou 				       struct btrfs_block_group *block_group)
3247fe6d45eSDennis Zhou {
3257fe6d45eSDennis Zhou 	block_group->discard_index++;
3267fe6d45eSDennis Zhou 	if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
3277fe6d45eSDennis Zhou 		block_group->discard_index = 1;
3287fe6d45eSDennis Zhou 		return;
3297fe6d45eSDennis Zhou 	}
3307fe6d45eSDennis Zhou 
3317fe6d45eSDennis Zhou 	add_to_discard_list(discard_ctl, block_group);
3327fe6d45eSDennis Zhou }
3337fe6d45eSDennis Zhou 
33443dd529aSDavid Sterba /*
33543dd529aSDavid Sterba  * Remove a block_group from the discard lists.
33643dd529aSDavid Sterba  *
337b0643e59SDennis Zhou  * @discard_ctl: discard control
338b0643e59SDennis Zhou  * @block_group: block_group of interest
339b0643e59SDennis Zhou  *
34043dd529aSDavid Sterba  * Remove @block_group from the discard lists.  If necessary, wait on the
34143dd529aSDavid Sterba  * current work and then reschedule the delayed work.
342b0643e59SDennis Zhou  */
btrfs_discard_cancel_work(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)343b0643e59SDennis Zhou void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
344b0643e59SDennis Zhou 			       struct btrfs_block_group *block_group)
345b0643e59SDennis Zhou {
346b0643e59SDennis Zhou 	if (remove_from_discard_list(discard_ctl, block_group)) {
347b0643e59SDennis Zhou 		cancel_delayed_work_sync(&discard_ctl->work);
348b0643e59SDennis Zhou 		btrfs_discard_schedule_work(discard_ctl, true);
349b0643e59SDennis Zhou 	}
350b0643e59SDennis Zhou }
351b0643e59SDennis Zhou 
35243dd529aSDavid Sterba /*
35343dd529aSDavid Sterba  * Handles queuing the block_groups.
35443dd529aSDavid Sterba  *
355b0643e59SDennis Zhou  * @discard_ctl: discard control
356b0643e59SDennis Zhou  * @block_group: block_group of interest
357b0643e59SDennis Zhou  *
35843dd529aSDavid Sterba  * Maintain the LRU order of the discard lists.
359b0643e59SDennis Zhou  */
btrfs_discard_queue_work(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)360b0643e59SDennis Zhou void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
361b0643e59SDennis Zhou 			      struct btrfs_block_group *block_group)
362b0643e59SDennis Zhou {
363b0643e59SDennis Zhou 	if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
364b0643e59SDennis Zhou 		return;
365b0643e59SDennis Zhou 
3666e80d4f8SDennis Zhou 	if (block_group->used == 0)
3676e80d4f8SDennis Zhou 		add_to_discard_unused_list(discard_ctl, block_group);
3686e80d4f8SDennis Zhou 	else
369b0643e59SDennis Zhou 		add_to_discard_list(discard_ctl, block_group);
370b0643e59SDennis Zhou 
371b0643e59SDennis Zhou 	if (!delayed_work_pending(&discard_ctl->work))
372b0643e59SDennis Zhou 		btrfs_discard_schedule_work(discard_ctl, false);
373b0643e59SDennis Zhou }
374b0643e59SDennis Zhou 
__btrfs_discard_schedule_work(struct btrfs_discard_ctl * discard_ctl,u64 now,bool override)3758fc05859SPavel Begunkov static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
3768fc05859SPavel Begunkov 					  u64 now, bool override)
377b0643e59SDennis Zhou {
378b0643e59SDennis Zhou 	struct btrfs_block_group *block_group;
379b0643e59SDennis Zhou 
380b0643e59SDennis Zhou 	if (!btrfs_run_discard_work(discard_ctl))
3818fc05859SPavel Begunkov 		return;
382b0643e59SDennis Zhou 	if (!override && delayed_work_pending(&discard_ctl->work))
3838fc05859SPavel Begunkov 		return;
384b0643e59SDennis Zhou 
385b0643e59SDennis Zhou 	block_group = find_next_block_group(discard_ctl, now);
386b0643e59SDennis Zhou 	if (block_group) {
3876e88f116SPavel Begunkov 		u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
388e93591bbSDennis Zhou 		u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
389e93591bbSDennis Zhou 
390e93591bbSDennis Zhou 		/*
391e93591bbSDennis Zhou 		 * A single delayed workqueue item is responsible for
392e93591bbSDennis Zhou 		 * discarding, so we can manage the bytes rate limit by keeping
393e93591bbSDennis Zhou 		 * track of the previous discard.
394e93591bbSDennis Zhou 		 */
395e93591bbSDennis Zhou 		if (kbps_limit && discard_ctl->prev_discard) {
396e93591bbSDennis Zhou 			u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
397e93591bbSDennis Zhou 			u64 bps_delay = div64_u64(discard_ctl->prev_discard *
3986e88f116SPavel Begunkov 						  NSEC_PER_SEC, bps_limit);
399e93591bbSDennis Zhou 
4006e88f116SPavel Begunkov 			delay = max(delay, bps_delay);
401e93591bbSDennis Zhou 		}
402b0643e59SDennis Zhou 
403a2309300SDennis Zhou 		/*
404a2309300SDennis Zhou 		 * This timeout is to hopefully prevent immediate discarding
405a2309300SDennis Zhou 		 * in a recently allocated block group.
406a2309300SDennis Zhou 		 */
407a2309300SDennis Zhou 		if (now < block_group->discard_eligible_time) {
408a2309300SDennis Zhou 			u64 bg_timeout = block_group->discard_eligible_time - now;
409a2309300SDennis Zhou 
4106e88f116SPavel Begunkov 			delay = max(delay, bg_timeout);
411a2309300SDennis Zhou 		}
412b0643e59SDennis Zhou 
413df903e5dSPavel Begunkov 		if (override && discard_ctl->prev_discard) {
414df903e5dSPavel Begunkov 			u64 elapsed = now - discard_ctl->prev_discard_time;
415df903e5dSPavel Begunkov 
416df903e5dSPavel Begunkov 			if (delay > elapsed)
417df903e5dSPavel Begunkov 				delay -= elapsed;
418df903e5dSPavel Begunkov 			else
419df903e5dSPavel Begunkov 				delay = 0;
420df903e5dSPavel Begunkov 		}
421df903e5dSPavel Begunkov 
422b0643e59SDennis Zhou 		mod_delayed_work(discard_ctl->discard_workers,
4236e88f116SPavel Begunkov 				 &discard_ctl->work, nsecs_to_jiffies(delay));
424b0643e59SDennis Zhou 	}
4258fc05859SPavel Begunkov }
4268fc05859SPavel Begunkov 
4278fc05859SPavel Begunkov /*
42843dd529aSDavid Sterba  * Responsible for scheduling the discard work.
42943dd529aSDavid Sterba  *
4308fc05859SPavel Begunkov  * @discard_ctl:  discard control
4318fc05859SPavel Begunkov  * @override:     override the current timer
4328fc05859SPavel Begunkov  *
4338fc05859SPavel Begunkov  * Discards are issued by a delayed workqueue item.  @override is used to
4348fc05859SPavel Begunkov  * update the current delay as the baseline delay interval is reevaluated on
4358fc05859SPavel Begunkov  * transaction commit.  This is also maxed with any other rate limit.
4368fc05859SPavel Begunkov  */
btrfs_discard_schedule_work(struct btrfs_discard_ctl * discard_ctl,bool override)4378fc05859SPavel Begunkov void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
4388fc05859SPavel Begunkov 				 bool override)
4398fc05859SPavel Begunkov {
4408fc05859SPavel Begunkov 	const u64 now = ktime_get_ns();
4418fc05859SPavel Begunkov 
4428fc05859SPavel Begunkov 	spin_lock(&discard_ctl->lock);
4438fc05859SPavel Begunkov 	__btrfs_discard_schedule_work(discard_ctl, now, override);
444b0643e59SDennis Zhou 	spin_unlock(&discard_ctl->lock);
445b0643e59SDennis Zhou }
446b0643e59SDennis Zhou 
44743dd529aSDavid Sterba /*
44843dd529aSDavid Sterba  * Determine next step of a block_group.
44943dd529aSDavid Sterba  *
4506e80d4f8SDennis Zhou  * @discard_ctl: discard control
4516e80d4f8SDennis Zhou  * @block_group: block_group of interest
4526e80d4f8SDennis Zhou  *
45343dd529aSDavid Sterba  * Determine the next step for a block group after it's finished going through
45443dd529aSDavid Sterba  * a pass on a discard list.  If it is unused and fully trimmed, we can mark it
45543dd529aSDavid Sterba  * unused and send it to the unused_bgs path.  Otherwise, pass it onto the
45643dd529aSDavid Sterba  * appropriate filter list or let it fall off.
4576e80d4f8SDennis Zhou  */
btrfs_finish_discard_pass(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)4586e80d4f8SDennis Zhou static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
4596e80d4f8SDennis Zhou 				      struct btrfs_block_group *block_group)
4606e80d4f8SDennis Zhou {
4616e80d4f8SDennis Zhou 	remove_from_discard_list(discard_ctl, block_group);
4626e80d4f8SDennis Zhou 
4636e80d4f8SDennis Zhou 	if (block_group->used == 0) {
4646e80d4f8SDennis Zhou 		if (btrfs_is_free_space_trimmed(block_group))
4656e80d4f8SDennis Zhou 			btrfs_mark_bg_unused(block_group);
4666e80d4f8SDennis Zhou 		else
4676e80d4f8SDennis Zhou 			add_to_discard_unused_list(discard_ctl, block_group);
4687fe6d45eSDennis Zhou 	} else {
4697fe6d45eSDennis Zhou 		btrfs_update_discard_index(discard_ctl, block_group);
4706e80d4f8SDennis Zhou 	}
4716e80d4f8SDennis Zhou }
4726e80d4f8SDennis Zhou 
47343dd529aSDavid Sterba /*
47443dd529aSDavid Sterba  * Discard work queue callback
47543dd529aSDavid Sterba  *
476b0643e59SDennis Zhou  * @work: work
477b0643e59SDennis Zhou  *
47843dd529aSDavid Sterba  * Find the next block_group to start discarding and then discard a single
47943dd529aSDavid Sterba  * region.  It does this in a two-pass fashion: first extents and second
4802bee7eb8SDennis Zhou  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
481b0643e59SDennis Zhou  */
btrfs_discard_workfn(struct work_struct * work)482b0643e59SDennis Zhou static void btrfs_discard_workfn(struct work_struct *work)
483b0643e59SDennis Zhou {
484b0643e59SDennis Zhou 	struct btrfs_discard_ctl *discard_ctl;
485b0643e59SDennis Zhou 	struct btrfs_block_group *block_group;
4862bee7eb8SDennis Zhou 	enum btrfs_discard_state discard_state;
4877fe6d45eSDennis Zhou 	int discard_index = 0;
488b0643e59SDennis Zhou 	u64 trimmed = 0;
4897fe6d45eSDennis Zhou 	u64 minlen = 0;
490ea9ed87cSPavel Begunkov 	u64 now = ktime_get_ns();
491b0643e59SDennis Zhou 
492b0643e59SDennis Zhou 	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
493b0643e59SDennis Zhou 
4947fe6d45eSDennis Zhou 	block_group = peek_discard_list(discard_ctl, &discard_state,
495ea9ed87cSPavel Begunkov 					&discard_index, now);
496b0643e59SDennis Zhou 	if (!block_group || !btrfs_run_discard_work(discard_ctl))
497b0643e59SDennis Zhou 		return;
498ea9ed87cSPavel Begunkov 	if (now < block_group->discard_eligible_time) {
499ea9ed87cSPavel Begunkov 		btrfs_discard_schedule_work(discard_ctl, false);
500ea9ed87cSPavel Begunkov 		return;
501ea9ed87cSPavel Begunkov 	}
502b0643e59SDennis Zhou 
5032bee7eb8SDennis Zhou 	/* Perform discarding */
5047fe6d45eSDennis Zhou 	minlen = discard_minlen[discard_index];
5057fe6d45eSDennis Zhou 
5067fe6d45eSDennis Zhou 	if (discard_state == BTRFS_DISCARD_BITMAPS) {
5077fe6d45eSDennis Zhou 		u64 maxlen = 0;
5087fe6d45eSDennis Zhou 
5097fe6d45eSDennis Zhou 		/*
5107fe6d45eSDennis Zhou 		 * Use the previous levels minimum discard length as the max
5117fe6d45eSDennis Zhou 		 * length filter.  In the case something is added to make a
5127fe6d45eSDennis Zhou 		 * region go beyond the max filter, the entire bitmap is set
5137fe6d45eSDennis Zhou 		 * back to BTRFS_TRIM_STATE_UNTRIMMED.
5147fe6d45eSDennis Zhou 		 */
5157fe6d45eSDennis Zhou 		if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
5167fe6d45eSDennis Zhou 			maxlen = discard_minlen[discard_index - 1];
5177fe6d45eSDennis Zhou 
5182bee7eb8SDennis Zhou 		btrfs_trim_block_group_bitmaps(block_group, &trimmed,
5192bee7eb8SDennis Zhou 				       block_group->discard_cursor,
5202bee7eb8SDennis Zhou 				       btrfs_block_group_end(block_group),
5217fe6d45eSDennis Zhou 				       minlen, maxlen, true);
5229ddf648fSDennis Zhou 		discard_ctl->discard_bitmap_bytes += trimmed;
5237fe6d45eSDennis Zhou 	} else {
5242bee7eb8SDennis Zhou 		btrfs_trim_block_group_extents(block_group, &trimmed,
5252bee7eb8SDennis Zhou 				       block_group->discard_cursor,
5262bee7eb8SDennis Zhou 				       btrfs_block_group_end(block_group),
5277fe6d45eSDennis Zhou 				       minlen, true);
5289ddf648fSDennis Zhou 		discard_ctl->discard_extent_bytes += trimmed;
5297fe6d45eSDennis Zhou 	}
530b0643e59SDennis Zhou 
5312bee7eb8SDennis Zhou 	/* Determine next steps for a block_group */
5322bee7eb8SDennis Zhou 	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
5332bee7eb8SDennis Zhou 		if (discard_state == BTRFS_DISCARD_BITMAPS) {
5346e80d4f8SDennis Zhou 			btrfs_finish_discard_pass(discard_ctl, block_group);
5352bee7eb8SDennis Zhou 		} else {
5362bee7eb8SDennis Zhou 			block_group->discard_cursor = block_group->start;
5372bee7eb8SDennis Zhou 			spin_lock(&discard_ctl->lock);
5382bee7eb8SDennis Zhou 			if (block_group->discard_state !=
5392bee7eb8SDennis Zhou 			    BTRFS_DISCARD_RESET_CURSOR)
5402bee7eb8SDennis Zhou 				block_group->discard_state =
5412bee7eb8SDennis Zhou 							BTRFS_DISCARD_BITMAPS;
5422bee7eb8SDennis Zhou 			spin_unlock(&discard_ctl->lock);
5432bee7eb8SDennis Zhou 		}
5442bee7eb8SDennis Zhou 	}
5452bee7eb8SDennis Zhou 
5461ea2872fSPavel Begunkov 	now = ktime_get_ns();
5472bee7eb8SDennis Zhou 	spin_lock(&discard_ctl->lock);
5481ea2872fSPavel Begunkov 	discard_ctl->prev_discard = trimmed;
5491ea2872fSPavel Begunkov 	discard_ctl->prev_discard_time = now;
5502b5463fcSBoris Burkov 	/*
5512b5463fcSBoris Burkov 	 * If the block group was removed from the discard list while it was
5522b5463fcSBoris Burkov 	 * running in this workfn, then we didn't deref it, since this function
5532b5463fcSBoris Burkov 	 * still owned that reference. But we set the discard_ctl->block_group
5542b5463fcSBoris Burkov 	 * back to NULL, so we can use that condition to know that now we need
5552b5463fcSBoris Burkov 	 * to deref the block_group.
5562b5463fcSBoris Burkov 	 */
5572b5463fcSBoris Burkov 	if (discard_ctl->block_group == NULL)
5582b5463fcSBoris Burkov 		btrfs_put_block_group(block_group);
5592bee7eb8SDennis Zhou 	discard_ctl->block_group = NULL;
5608fc05859SPavel Begunkov 	__btrfs_discard_schedule_work(discard_ctl, now, false);
5612bee7eb8SDennis Zhou 	spin_unlock(&discard_ctl->lock);
562b0643e59SDennis Zhou }
563b0643e59SDennis Zhou 
56443dd529aSDavid Sterba /*
56543dd529aSDavid Sterba  * Recalculate the base delay.
56643dd529aSDavid Sterba  *
567a2309300SDennis Zhou  * @discard_ctl: discard control
568a2309300SDennis Zhou  *
569a2309300SDennis Zhou  * Recalculate the base delay which is based off the total number of
570a2309300SDennis Zhou  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
571a2309300SDennis Zhou  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
572a2309300SDennis Zhou  */
btrfs_discard_calc_delay(struct btrfs_discard_ctl * discard_ctl)573a2309300SDennis Zhou void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
574a2309300SDennis Zhou {
575a2309300SDennis Zhou 	s32 discardable_extents;
57681b29a3bSDennis Zhou 	s64 discardable_bytes;
577a2309300SDennis Zhou 	u32 iops_limit;
578ef9cddfeSBoris Burkov 	unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
579a2309300SDennis Zhou 	unsigned long delay;
580a2309300SDennis Zhou 
581a2309300SDennis Zhou 	discardable_extents = atomic_read(&discard_ctl->discardable_extents);
582a2309300SDennis Zhou 	if (!discardable_extents)
583a2309300SDennis Zhou 		return;
584a2309300SDennis Zhou 
585a2309300SDennis Zhou 	spin_lock(&discard_ctl->lock);
586a2309300SDennis Zhou 
58781b29a3bSDennis Zhou 	/*
58843dd529aSDavid Sterba 	 * The following is to fix a potential -1 discrepancy that we're not
58981b29a3bSDennis Zhou 	 * sure how to reproduce. But given that this is the only place that
59081b29a3bSDennis Zhou 	 * utilizes these numbers and this is only called by from
59181b29a3bSDennis Zhou 	 * btrfs_finish_extent_commit() which is synchronized, we can correct
59281b29a3bSDennis Zhou 	 * here.
59381b29a3bSDennis Zhou 	 */
59481b29a3bSDennis Zhou 	if (discardable_extents < 0)
59581b29a3bSDennis Zhou 		atomic_add(-discardable_extents,
59681b29a3bSDennis Zhou 			   &discard_ctl->discardable_extents);
59781b29a3bSDennis Zhou 
59881b29a3bSDennis Zhou 	discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
59981b29a3bSDennis Zhou 	if (discardable_bytes < 0)
60081b29a3bSDennis Zhou 		atomic64_add(-discardable_bytes,
60181b29a3bSDennis Zhou 			     &discard_ctl->discardable_bytes);
60281b29a3bSDennis Zhou 
60381b29a3bSDennis Zhou 	if (discardable_extents <= 0) {
60481b29a3bSDennis Zhou 		spin_unlock(&discard_ctl->lock);
60581b29a3bSDennis Zhou 		return;
60681b29a3bSDennis Zhou 	}
60781b29a3bSDennis Zhou 
608a2309300SDennis Zhou 	iops_limit = READ_ONCE(discard_ctl->iops_limit);
609e50404a8SPavel Begunkov 
610ef9cddfeSBoris Burkov 	if (iops_limit) {
611ef9cddfeSBoris Burkov 		delay = MSEC_PER_SEC / iops_limit;
612ef9cddfeSBoris Burkov 	} else {
613ef9cddfeSBoris Burkov 		/*
614ef9cddfeSBoris Burkov 		 * Unset iops_limit means go as fast as possible, so allow a
615ef9cddfeSBoris Burkov 		 * delay of 0.
616ef9cddfeSBoris Burkov 		 */
617ef9cddfeSBoris Burkov 		delay = 0;
618ef9cddfeSBoris Burkov 		min_delay = 0;
619ef9cddfeSBoris Burkov 	}
620ef9cddfeSBoris Burkov 
621ef9cddfeSBoris Burkov 	delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
6226e88f116SPavel Begunkov 	discard_ctl->delay_ms = delay;
623a2309300SDennis Zhou 
624a2309300SDennis Zhou 	spin_unlock(&discard_ctl->lock);
625a2309300SDennis Zhou }
626a2309300SDennis Zhou 
62743dd529aSDavid Sterba /*
62843dd529aSDavid Sterba  * Propagate discard counters.
62943dd529aSDavid Sterba  *
630dfb79ddbSDennis Zhou  * @block_group: block_group of interest
631dfb79ddbSDennis Zhou  *
63243dd529aSDavid Sterba  * Propagate deltas of counters up to the discard_ctl.  It maintains a current
63343dd529aSDavid Sterba  * counter and a previous counter passing the delta up to the global stat.
63443dd529aSDavid Sterba  * Then the current counter value becomes the previous counter value.
635dfb79ddbSDennis Zhou  */
btrfs_discard_update_discardable(struct btrfs_block_group * block_group)63666b53baeSJosef Bacik void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
637dfb79ddbSDennis Zhou {
63866b53baeSJosef Bacik 	struct btrfs_free_space_ctl *ctl;
639dfb79ddbSDennis Zhou 	struct btrfs_discard_ctl *discard_ctl;
640dfb79ddbSDennis Zhou 	s32 extents_delta;
6415dc7c10bSDennis Zhou 	s64 bytes_delta;
642dfb79ddbSDennis Zhou 
6435cb0724eSDennis Zhou 	if (!block_group ||
6445cb0724eSDennis Zhou 	    !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
6455cb0724eSDennis Zhou 	    !btrfs_is_block_group_data_only(block_group))
646dfb79ddbSDennis Zhou 		return;
647dfb79ddbSDennis Zhou 
64866b53baeSJosef Bacik 	ctl = block_group->free_space_ctl;
649dfb79ddbSDennis Zhou 	discard_ctl = &block_group->fs_info->discard_ctl;
650dfb79ddbSDennis Zhou 
65166b53baeSJosef Bacik 	lockdep_assert_held(&ctl->tree_lock);
652dfb79ddbSDennis Zhou 	extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
653dfb79ddbSDennis Zhou 			ctl->discardable_extents[BTRFS_STAT_PREV];
654dfb79ddbSDennis Zhou 	if (extents_delta) {
655dfb79ddbSDennis Zhou 		atomic_add(extents_delta, &discard_ctl->discardable_extents);
656dfb79ddbSDennis Zhou 		ctl->discardable_extents[BTRFS_STAT_PREV] =
657dfb79ddbSDennis Zhou 			ctl->discardable_extents[BTRFS_STAT_CURR];
658dfb79ddbSDennis Zhou 	}
6595dc7c10bSDennis Zhou 
6605dc7c10bSDennis Zhou 	bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
6615dc7c10bSDennis Zhou 		      ctl->discardable_bytes[BTRFS_STAT_PREV];
6625dc7c10bSDennis Zhou 	if (bytes_delta) {
6635dc7c10bSDennis Zhou 		atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
6645dc7c10bSDennis Zhou 		ctl->discardable_bytes[BTRFS_STAT_PREV] =
6655dc7c10bSDennis Zhou 			ctl->discardable_bytes[BTRFS_STAT_CURR];
6665dc7c10bSDennis Zhou 	}
667dfb79ddbSDennis Zhou }
668dfb79ddbSDennis Zhou 
66943dd529aSDavid Sterba /*
67043dd529aSDavid Sterba  * Punt unused_bgs list to discard lists.
67143dd529aSDavid Sterba  *
6726e80d4f8SDennis Zhou  * @fs_info: fs_info of interest
6736e80d4f8SDennis Zhou  *
6746e80d4f8SDennis Zhou  * The unused_bgs list needs to be punted to the discard lists because the
6751a9fd417SDavid Sterba  * order of operations is changed.  In the normal synchronous discard path, the
6766e80d4f8SDennis Zhou  * block groups are trimmed via a single large trim in transaction commit.  This
6776e80d4f8SDennis Zhou  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
6786e80d4f8SDennis Zhou  * it must be done before going down the unused_bgs path.
6796e80d4f8SDennis Zhou  */
btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info * fs_info)6806e80d4f8SDennis Zhou void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
6816e80d4f8SDennis Zhou {
6826e80d4f8SDennis Zhou 	struct btrfs_block_group *block_group, *next;
6836e80d4f8SDennis Zhou 
6846e80d4f8SDennis Zhou 	spin_lock(&fs_info->unused_bgs_lock);
6856e80d4f8SDennis Zhou 	/* We enabled async discard, so punt all to the queue */
6866e80d4f8SDennis Zhou 	list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
6876e80d4f8SDennis Zhou 				 bg_list) {
6886e80d4f8SDennis Zhou 		list_del_init(&block_group->bg_list);
6896e80d4f8SDennis Zhou 		btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
6902b5463fcSBoris Burkov 		/*
6912b5463fcSBoris Burkov 		 * This put is for the get done by btrfs_mark_bg_unused.
6922b5463fcSBoris Burkov 		 * Queueing discard incremented it for discard's reference.
6932b5463fcSBoris Burkov 		 */
6942b5463fcSBoris Burkov 		btrfs_put_block_group(block_group);
6956e80d4f8SDennis Zhou 	}
6966e80d4f8SDennis Zhou 	spin_unlock(&fs_info->unused_bgs_lock);
6976e80d4f8SDennis Zhou }
6986e80d4f8SDennis Zhou 
69943dd529aSDavid Sterba /*
70043dd529aSDavid Sterba  * Purge discard lists.
70143dd529aSDavid Sterba  *
7026e80d4f8SDennis Zhou  * @discard_ctl: discard control
7036e80d4f8SDennis Zhou  *
7046e80d4f8SDennis Zhou  * If we are disabling async discard, we may have intercepted block groups that
7056e80d4f8SDennis Zhou  * are completely free and ready for the unused_bgs path.  As discarding will
7066e80d4f8SDennis Zhou  * now happen in transaction commit or not at all, we can safely mark the
7076e80d4f8SDennis Zhou  * corresponding block groups as unused and they will be sent on their merry
7086e80d4f8SDennis Zhou  * way to the unused_bgs list.
7096e80d4f8SDennis Zhou  */
btrfs_discard_purge_list(struct btrfs_discard_ctl * discard_ctl)7106e80d4f8SDennis Zhou static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
7116e80d4f8SDennis Zhou {
7126e80d4f8SDennis Zhou 	struct btrfs_block_group *block_group, *next;
7136e80d4f8SDennis Zhou 	int i;
7146e80d4f8SDennis Zhou 
7156e80d4f8SDennis Zhou 	spin_lock(&discard_ctl->lock);
7166e80d4f8SDennis Zhou 	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
7176e80d4f8SDennis Zhou 		list_for_each_entry_safe(block_group, next,
7186e80d4f8SDennis Zhou 					 &discard_ctl->discard_list[i],
7196e80d4f8SDennis Zhou 					 discard_list) {
7206e80d4f8SDennis Zhou 			list_del_init(&block_group->discard_list);
7216e80d4f8SDennis Zhou 			spin_unlock(&discard_ctl->lock);
7226e80d4f8SDennis Zhou 			if (block_group->used == 0)
7236e80d4f8SDennis Zhou 				btrfs_mark_bg_unused(block_group);
7246e80d4f8SDennis Zhou 			spin_lock(&discard_ctl->lock);
7252b5463fcSBoris Burkov 			btrfs_put_block_group(block_group);
7266e80d4f8SDennis Zhou 		}
7276e80d4f8SDennis Zhou 	}
7286e80d4f8SDennis Zhou 	spin_unlock(&discard_ctl->lock);
7296e80d4f8SDennis Zhou }
7306e80d4f8SDennis Zhou 
btrfs_discard_resume(struct btrfs_fs_info * fs_info)731b0643e59SDennis Zhou void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
732b0643e59SDennis Zhou {
733b0643e59SDennis Zhou 	if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
734b0643e59SDennis Zhou 		btrfs_discard_cleanup(fs_info);
735b0643e59SDennis Zhou 		return;
736b0643e59SDennis Zhou 	}
737b0643e59SDennis Zhou 
7386e80d4f8SDennis Zhou 	btrfs_discard_punt_unused_bgs_list(fs_info);
7396e80d4f8SDennis Zhou 
740b0643e59SDennis Zhou 	set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
741b0643e59SDennis Zhou }
742b0643e59SDennis Zhou 
btrfs_discard_stop(struct btrfs_fs_info * fs_info)743b0643e59SDennis Zhou void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
744b0643e59SDennis Zhou {
745b0643e59SDennis Zhou 	clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
746b0643e59SDennis Zhou }
747b0643e59SDennis Zhou 
btrfs_discard_init(struct btrfs_fs_info * fs_info)748b0643e59SDennis Zhou void btrfs_discard_init(struct btrfs_fs_info *fs_info)
749b0643e59SDennis Zhou {
750b0643e59SDennis Zhou 	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
751b0643e59SDennis Zhou 	int i;
752b0643e59SDennis Zhou 
753b0643e59SDennis Zhou 	spin_lock_init(&discard_ctl->lock);
754b0643e59SDennis Zhou 	INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
755b0643e59SDennis Zhou 
756b0643e59SDennis Zhou 	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
757b0643e59SDennis Zhou 		INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
758dfb79ddbSDennis Zhou 
759e93591bbSDennis Zhou 	discard_ctl->prev_discard = 0;
760df903e5dSPavel Begunkov 	discard_ctl->prev_discard_time = 0;
761dfb79ddbSDennis Zhou 	atomic_set(&discard_ctl->discardable_extents, 0);
7625dc7c10bSDennis Zhou 	atomic64_set(&discard_ctl->discardable_bytes, 0);
76319b2a2c7SDennis Zhou 	discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
7646e88f116SPavel Begunkov 	discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
765a2309300SDennis Zhou 	discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
766e93591bbSDennis Zhou 	discard_ctl->kbps_limit = 0;
7679ddf648fSDennis Zhou 	discard_ctl->discard_extent_bytes = 0;
7689ddf648fSDennis Zhou 	discard_ctl->discard_bitmap_bytes = 0;
7699ddf648fSDennis Zhou 	atomic64_set(&discard_ctl->discard_bytes_saved, 0);
770b0643e59SDennis Zhou }
771b0643e59SDennis Zhou 
btrfs_discard_cleanup(struct btrfs_fs_info * fs_info)772b0643e59SDennis Zhou void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
773b0643e59SDennis Zhou {
774b0643e59SDennis Zhou 	btrfs_discard_stop(fs_info);
775b0643e59SDennis Zhou 	cancel_delayed_work_sync(&fs_info->discard_ctl.work);
7766e80d4f8SDennis Zhou 	btrfs_discard_purge_list(&fs_info->discard_ctl);
777b0643e59SDennis Zhou }
778