1b0643e59SDennis Zhou // SPDX-License-Identifier: GPL-2.0
2b0643e59SDennis Zhou
3b0643e59SDennis Zhou #include <linux/jiffies.h>
4b0643e59SDennis Zhou #include <linux/kernel.h>
5b0643e59SDennis Zhou #include <linux/ktime.h>
6b0643e59SDennis Zhou #include <linux/list.h>
7e93591bbSDennis Zhou #include <linux/math64.h>
8b0643e59SDennis Zhou #include <linux/sizes.h>
9b0643e59SDennis Zhou #include <linux/workqueue.h>
10b0643e59SDennis Zhou #include "ctree.h"
11b0643e59SDennis Zhou #include "block-group.h"
12b0643e59SDennis Zhou #include "discard.h"
13b0643e59SDennis Zhou #include "free-space-cache.h"
14fc97a410SJosef Bacik #include "fs.h"
15b0643e59SDennis Zhou
16dbc2a8c9SDennis Zhou /*
17dbc2a8c9SDennis Zhou * This contains the logic to handle async discard.
18dbc2a8c9SDennis Zhou *
19dbc2a8c9SDennis Zhou * Async discard manages trimming of free space outside of transaction commit.
20dbc2a8c9SDennis Zhou * Discarding is done by managing the block_groups on a LRU list based on free
21dbc2a8c9SDennis Zhou * space recency. Two passes are used to first prioritize discarding extents
22dbc2a8c9SDennis Zhou * and then allow for trimming in the bitmap the best opportunity to coalesce.
23dbc2a8c9SDennis Zhou * The block_groups are maintained on multiple lists to allow for multiple
24dbc2a8c9SDennis Zhou * passes with different discard filter requirements. A delayed work item is
25dbc2a8c9SDennis Zhou * used to manage discarding with timeout determined by a max of the delay
26dbc2a8c9SDennis Zhou * incurred by the iops rate limit, the byte rate limit, and the max delay of
27dbc2a8c9SDennis Zhou * BTRFS_DISCARD_MAX_DELAY.
28dbc2a8c9SDennis Zhou *
29dbc2a8c9SDennis Zhou * Note, this only keeps track of block_groups that are explicitly for data.
30dbc2a8c9SDennis Zhou * Mixed block_groups are not supported.
31dbc2a8c9SDennis Zhou *
32dbc2a8c9SDennis Zhou * The first list is special to manage discarding of fully free block groups.
33dbc2a8c9SDennis Zhou * This is necessary because we issue a final trim for a full free block group
34dbc2a8c9SDennis Zhou * after forgetting it. When a block group becomes unused, instead of directly
35dbc2a8c9SDennis Zhou * being added to the unused_bgs list, we add it to this first list. Then
36dbc2a8c9SDennis Zhou * from there, if it becomes fully discarded, we place it onto the unused_bgs
37dbc2a8c9SDennis Zhou * list.
38dbc2a8c9SDennis Zhou *
39dbc2a8c9SDennis Zhou * The in-memory free space cache serves as the backing state for discard.
40dbc2a8c9SDennis Zhou * Consequently this means there is no persistence. We opt to load all the
41dbc2a8c9SDennis Zhou * block groups in as not discarded, so the mount case degenerates to the
42dbc2a8c9SDennis Zhou * crashing case.
43dbc2a8c9SDennis Zhou *
44dbc2a8c9SDennis Zhou * As the free space cache uses bitmaps, there exists a tradeoff between
45dbc2a8c9SDennis Zhou * ease/efficiency for find_free_extent() and the accuracy of discard state.
46dbc2a8c9SDennis Zhou * Here we opt to let untrimmed regions merge with everything while only letting
47dbc2a8c9SDennis Zhou * trimmed regions merge with other trimmed regions. This can cause
48dbc2a8c9SDennis Zhou * overtrimming, but the coalescing benefit seems to be worth it. Additionally,
49dbc2a8c9SDennis Zhou * bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
50dbc2a8c9SDennis Zhou * the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
51dbc2a8c9SDennis Zhou * this resets the state and we will retry trimming the whole bitmap. This is a
52dbc2a8c9SDennis Zhou * tradeoff between discard state accuracy and the cost of accounting.
53dbc2a8c9SDennis Zhou */
54dbc2a8c9SDennis Zhou
55b0643e59SDennis Zhou /* This is an initial delay to give some chance for block reuse */
56b0643e59SDennis Zhou #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
576e80d4f8SDennis Zhou #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
58b0643e59SDennis Zhou
59a2309300SDennis Zhou #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
60a2309300SDennis Zhou #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
61e9f59429SBoris Burkov #define BTRFS_DISCARD_MAX_IOPS (1000U)
62a2309300SDennis Zhou
6343dd529aSDavid Sterba /* Monotonically decreasing minimum length filters after index 0 */
647fe6d45eSDennis Zhou static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
657fe6d45eSDennis Zhou 0,
667fe6d45eSDennis Zhou BTRFS_ASYNC_DISCARD_MAX_FILTER,
677fe6d45eSDennis Zhou BTRFS_ASYNC_DISCARD_MIN_FILTER
687fe6d45eSDennis Zhou };
697fe6d45eSDennis Zhou
get_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)70b0643e59SDennis Zhou static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
71b0643e59SDennis Zhou struct btrfs_block_group *block_group)
72b0643e59SDennis Zhou {
73b0643e59SDennis Zhou return &discard_ctl->discard_list[block_group->discard_index];
74b0643e59SDennis Zhou }
75b0643e59SDennis Zhou
762bee7eb8SDennis Zhou /*
77b0643e59SDennis Zhou * Determine if async discard should be running.
78b0643e59SDennis Zhou *
792b5463fcSBoris Burkov * @discard_ctl: discard control
802bee7eb8SDennis Zhou *
81b0643e59SDennis Zhou * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
82b0643e59SDennis Zhou */
btrfs_run_discard_work(struct btrfs_discard_ctl * discard_ctl)836e80d4f8SDennis Zhou static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
846e80d4f8SDennis Zhou {
856e80d4f8SDennis Zhou struct btrfs_fs_info *fs_info = container_of(discard_ctl,
866e80d4f8SDennis Zhou struct btrfs_fs_info,
87b0643e59SDennis Zhou discard_ctl);
88b0643e59SDennis Zhou
892bee7eb8SDennis Zhou return (!(fs_info->sb->s_flags & SB_RDONLY) &&
906e80d4f8SDennis Zhou test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
912b5463fcSBoris Burkov }
922b5463fcSBoris Burkov
__add_to_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)93b0643e59SDennis Zhou static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
94b0643e59SDennis Zhou struct btrfs_block_group *block_group)
95b0643e59SDennis Zhou {
962bee7eb8SDennis Zhou lockdep_assert_held(&discard_ctl->lock);
97b0643e59SDennis Zhou if (!btrfs_run_discard_work(discard_ctl))
982bee7eb8SDennis Zhou return;
992bee7eb8SDennis Zhou
1002bee7eb8SDennis Zhou if (list_empty(&block_group->discard_list) ||
1015cb0724eSDennis Zhou block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
1025cb0724eSDennis Zhou if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
1035cb0724eSDennis Zhou block_group->discard_index = BTRFS_DISCARD_INDEX_START;
1042bee7eb8SDennis Zhou block_group->discard_eligible_time = (ktime_get_ns() +
1052bee7eb8SDennis Zhou BTRFS_DISCARD_DELAY);
106b0643e59SDennis Zhou block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
107b0643e59SDennis Zhou }
108b0643e59SDennis Zhou if (list_empty(&block_group->discard_list))
1096e80d4f8SDennis Zhou btrfs_get_block_group(block_group);
1106e80d4f8SDennis Zhou
1116e80d4f8SDennis Zhou list_move_tail(&block_group->discard_list,
1122b5463fcSBoris Burkov get_discard_list(discard_ctl, block_group));
1132b5463fcSBoris Burkov }
1146e80d4f8SDennis Zhou
add_to_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)1156e80d4f8SDennis Zhou static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
1162b5463fcSBoris Burkov struct btrfs_block_group *block_group)
1172b5463fcSBoris Burkov {
1186e80d4f8SDennis Zhou if (!btrfs_is_block_group_data_only(block_group))
1196e80d4f8SDennis Zhou return;
1206e80d4f8SDennis Zhou
1216e80d4f8SDennis Zhou spin_lock(&discard_ctl->lock);
1226e80d4f8SDennis Zhou __add_to_discard_list(discard_ctl, block_group);
1236e80d4f8SDennis Zhou spin_unlock(&discard_ctl->lock);
1246e80d4f8SDennis Zhou }
1256e80d4f8SDennis Zhou
add_to_discard_unused_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)1266e80d4f8SDennis Zhou static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
1276e80d4f8SDennis Zhou struct btrfs_block_group *block_group)
1282bee7eb8SDennis Zhou {
1292b5463fcSBoris Burkov bool queued;
1302b5463fcSBoris Burkov
1316e80d4f8SDennis Zhou spin_lock(&discard_ctl->lock);
1326e80d4f8SDennis Zhou
1336e80d4f8SDennis Zhou queued = !list_empty(&block_group->discard_list);
1346e80d4f8SDennis Zhou
1356e80d4f8SDennis Zhou if (!btrfs_run_discard_work(discard_ctl)) {
1366e80d4f8SDennis Zhou spin_unlock(&discard_ctl->lock);
137b0643e59SDennis Zhou return;
138b0643e59SDennis Zhou }
139b0643e59SDennis Zhou
140b0643e59SDennis Zhou list_del_init(&block_group->discard_list);
1412b5463fcSBoris Burkov
142b0643e59SDennis Zhou block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
143b0643e59SDennis Zhou block_group->discard_eligible_time = (ktime_get_ns() +
144b0643e59SDennis Zhou BTRFS_DISCARD_UNUSED_DELAY);
145b0643e59SDennis Zhou block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
146b0643e59SDennis Zhou if (!queued)
147b0643e59SDennis Zhou btrfs_get_block_group(block_group);
148b0643e59SDennis Zhou list_add_tail(&block_group->discard_list,
149b0643e59SDennis Zhou &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
150b0643e59SDennis Zhou
1512b5463fcSBoris Burkov spin_unlock(&discard_ctl->lock);
152b0643e59SDennis Zhou }
1532b5463fcSBoris Burkov
remove_from_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)1542b5463fcSBoris Burkov static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
1552b5463fcSBoris Burkov struct btrfs_block_group *block_group)
1562b5463fcSBoris Burkov {
1572b5463fcSBoris Burkov bool running = false;
1582b5463fcSBoris Burkov bool queued = false;
1592b5463fcSBoris Burkov
1602b5463fcSBoris Burkov spin_lock(&discard_ctl->lock);
161b0643e59SDennis Zhou
162b0643e59SDennis Zhou if (block_group == discard_ctl->block_group) {
163b0643e59SDennis Zhou running = true;
164b0643e59SDennis Zhou discard_ctl->block_group = NULL;
165b0643e59SDennis Zhou }
166b0643e59SDennis Zhou
16743dd529aSDavid Sterba block_group->discard_eligible_time = 0;
16843dd529aSDavid Sterba queued = !list_empty(&block_group->discard_list);
16943dd529aSDavid Sterba list_del_init(&block_group->discard_list);
170b0643e59SDennis Zhou /*
171b0643e59SDennis Zhou * If the block group is currently running in the discard workfn, we
172b0643e59SDennis Zhou * don't want to deref it, since it's still being used by the workfn.
173b0643e59SDennis Zhou * The workfn will notice this case and deref the block group when it is
174b0643e59SDennis Zhou * finished.
175b0643e59SDennis Zhou */
176b0643e59SDennis Zhou if (queued && !running)
177b0643e59SDennis Zhou btrfs_put_block_group(block_group);
178b0643e59SDennis Zhou
179b0643e59SDennis Zhou spin_unlock(&discard_ctl->lock);
180b0643e59SDennis Zhou
181b0643e59SDennis Zhou return running;
182b0643e59SDennis Zhou }
183b0643e59SDennis Zhou
184b0643e59SDennis Zhou /*
185b0643e59SDennis Zhou * Find block_group that's up next for discarding.
186b0643e59SDennis Zhou *
187b0643e59SDennis Zhou * @discard_ctl: discard control
188b0643e59SDennis Zhou * @now: current time
189b0643e59SDennis Zhou *
190b0643e59SDennis Zhou * Iterate over the discard lists to find the next block_group up for
191b0643e59SDennis Zhou * discarding checking the discard_eligible_time of block_group.
192b0643e59SDennis Zhou */
find_next_block_group(struct btrfs_discard_ctl * discard_ctl,u64 now)193b0643e59SDennis Zhou static struct btrfs_block_group *find_next_block_group(
194b0643e59SDennis Zhou struct btrfs_discard_ctl *discard_ctl,
195b0643e59SDennis Zhou u64 now)
196b0643e59SDennis Zhou {
197b0643e59SDennis Zhou struct btrfs_block_group *ret_block_group = NULL, *block_group;
198b0643e59SDennis Zhou int i;
199b0643e59SDennis Zhou
200b0643e59SDennis Zhou for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
201b0643e59SDennis Zhou struct list_head *discard_list = &discard_ctl->discard_list[i];
202b0643e59SDennis Zhou
203b0643e59SDennis Zhou if (!list_empty(discard_list)) {
204b0643e59SDennis Zhou block_group = list_first_entry(discard_list,
205b0643e59SDennis Zhou struct btrfs_block_group,
20643dd529aSDavid Sterba discard_list);
20743dd529aSDavid Sterba
20892419695SNikolay Borisov if (!ret_block_group)
209b0643e59SDennis Zhou ret_block_group = block_group;
2102bee7eb8SDennis Zhou
2117fe6d45eSDennis Zhou if (ret_block_group->discard_eligible_time < now)
21292419695SNikolay Borisov break;
213b0643e59SDennis Zhou
21443dd529aSDavid Sterba if (ret_block_group->discard_eligible_time >
21543dd529aSDavid Sterba block_group->discard_eligible_time)
21643dd529aSDavid Sterba ret_block_group = block_group;
2177fe6d45eSDennis Zhou }
2187fe6d45eSDennis Zhou }
219b0643e59SDennis Zhou
220b0643e59SDennis Zhou return ret_block_group;
2212bee7eb8SDennis Zhou }
2227fe6d45eSDennis Zhou
223ea9ed87cSPavel Begunkov /*
224b0643e59SDennis Zhou * Look up next block group and set it for use.
225b0643e59SDennis Zhou *
226b0643e59SDennis Zhou * @discard_ctl: discard control
227b0643e59SDennis Zhou * @discard_state: the discard_state of the block_group after state management
2282bee7eb8SDennis Zhou * @discard_index: the discard_index of the block_group after state management
229b0643e59SDennis Zhou * @now: time when discard was invoked, in ns
230b0643e59SDennis Zhou *
231ea9ed87cSPavel Begunkov * Wrap find_next_block_group() and set the block_group to be in use.
2322bee7eb8SDennis Zhou * @discard_state's control flow is managed here. Variables related to
2332bee7eb8SDennis Zhou * @discard_state are reset here as needed (eg. @discard_cursor). @discard_state
2342b5463fcSBoris Burkov * and @discard_index are remembered as it may change while we're discarding,
2352bee7eb8SDennis Zhou * but we want the discard to execute in the context determined here.
2362b5463fcSBoris Burkov */
peek_discard_list(struct btrfs_discard_ctl * discard_ctl,enum btrfs_discard_state * discard_state,int * discard_index,u64 now)2375cb0724eSDennis Zhou static struct btrfs_block_group *peek_discard_list(
2382b5463fcSBoris Burkov struct btrfs_discard_ctl *discard_ctl,
2392b5463fcSBoris Burkov enum btrfs_discard_state *discard_state,
2402bee7eb8SDennis Zhou int *discard_index, u64 now)
2412bee7eb8SDennis Zhou {
2422bee7eb8SDennis Zhou struct btrfs_block_group *block_group;
2432bee7eb8SDennis Zhou
2442bee7eb8SDennis Zhou spin_lock(&discard_ctl->lock);
2452bee7eb8SDennis Zhou again:
246b0643e59SDennis Zhou block_group = find_next_block_group(discard_ctl, now);
247ea9ed87cSPavel Begunkov
248ea9ed87cSPavel Begunkov if (block_group && now >= block_group->discard_eligible_time) {
2492bee7eb8SDennis Zhou if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
2507fe6d45eSDennis Zhou block_group->used != 0) {
2512bee7eb8SDennis Zhou if (btrfs_is_block_group_data_only(block_group)) {
252b0643e59SDennis Zhou __add_to_discard_list(discard_ctl, block_group);
253b0643e59SDennis Zhou } else {
254b0643e59SDennis Zhou list_del_init(&block_group->discard_list);
255b0643e59SDennis Zhou btrfs_put_block_group(block_group);
256b0643e59SDennis Zhou }
25743dd529aSDavid Sterba goto again;
25843dd529aSDavid Sterba }
25943dd529aSDavid Sterba if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
2607fe6d45eSDennis Zhou block_group->discard_cursor = block_group->start;
2617fe6d45eSDennis Zhou block_group->discard_state = BTRFS_DISCARD_EXTENTS;
2627fe6d45eSDennis Zhou }
2637fe6d45eSDennis Zhou discard_ctl->block_group = block_group;
2647fe6d45eSDennis Zhou }
2657fe6d45eSDennis Zhou if (block_group) {
2667fe6d45eSDennis Zhou *discard_state = block_group->discard_state;
2677fe6d45eSDennis Zhou *discard_index = block_group->discard_index;
2687fe6d45eSDennis Zhou }
2697fe6d45eSDennis Zhou spin_unlock(&discard_ctl->lock);
2707fe6d45eSDennis Zhou
2717fe6d45eSDennis Zhou return block_group;
2727fe6d45eSDennis Zhou }
2737fe6d45eSDennis Zhou
2747fe6d45eSDennis Zhou /*
2757fe6d45eSDennis Zhou * Update a block group's filters.
2767fe6d45eSDennis Zhou *
2777fe6d45eSDennis Zhou * @block_group: block group of interest
2787fe6d45eSDennis Zhou * @bytes: recently freed region size after coalescing
2797fe6d45eSDennis Zhou *
2807fe6d45eSDennis Zhou * Async discard maintains multiple lists with progressively smaller filters
2817fe6d45eSDennis Zhou * to prioritize discarding based on size. Should a free space that matches
2827fe6d45eSDennis Zhou * a larger filter be returned to the free_space_cache, prioritize that discard
2837fe6d45eSDennis Zhou * by moving @block_group to the proper filter.
2847fe6d45eSDennis Zhou */
btrfs_discard_check_filter(struct btrfs_block_group * block_group,u64 bytes)2857fe6d45eSDennis Zhou void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
2867fe6d45eSDennis Zhou u64 bytes)
2877fe6d45eSDennis Zhou {
2887fe6d45eSDennis Zhou struct btrfs_discard_ctl *discard_ctl;
2897fe6d45eSDennis Zhou
2907fe6d45eSDennis Zhou if (!block_group ||
2917fe6d45eSDennis Zhou !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2927fe6d45eSDennis Zhou return;
2937fe6d45eSDennis Zhou
2947fe6d45eSDennis Zhou discard_ctl = &block_group->fs_info->discard_ctl;
2957fe6d45eSDennis Zhou
29643dd529aSDavid Sterba if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
29743dd529aSDavid Sterba bytes >= discard_minlen[block_group->discard_index - 1]) {
29843dd529aSDavid Sterba int i;
2997fe6d45eSDennis Zhou
3007fe6d45eSDennis Zhou remove_from_discard_list(discard_ctl, block_group);
3017fe6d45eSDennis Zhou
3027fe6d45eSDennis Zhou for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
3037fe6d45eSDennis Zhou i++) {
3047fe6d45eSDennis Zhou if (bytes >= discard_minlen[i]) {
3057fe6d45eSDennis Zhou block_group->discard_index = i;
3067fe6d45eSDennis Zhou add_to_discard_list(discard_ctl, block_group);
3077fe6d45eSDennis Zhou break;
3087fe6d45eSDennis Zhou }
3097fe6d45eSDennis Zhou }
3107fe6d45eSDennis Zhou }
3117fe6d45eSDennis Zhou }
3127fe6d45eSDennis Zhou
3137fe6d45eSDennis Zhou /*
3147fe6d45eSDennis Zhou * Move a block group along the discard lists.
3157fe6d45eSDennis Zhou *
3167fe6d45eSDennis Zhou * @discard_ctl: discard control
31743dd529aSDavid Sterba * @block_group: block_group of interest
31843dd529aSDavid Sterba *
31943dd529aSDavid Sterba * Increment @block_group's discard_index. If it falls of the list, let it be.
320b0643e59SDennis Zhou * Otherwise add it back to the appropriate list.
321b0643e59SDennis Zhou */
btrfs_update_discard_index(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)322b0643e59SDennis Zhou static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
32343dd529aSDavid Sterba struct btrfs_block_group *block_group)
32443dd529aSDavid Sterba {
325b0643e59SDennis Zhou block_group->discard_index++;
326b0643e59SDennis Zhou if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
327b0643e59SDennis Zhou block_group->discard_index = 1;
328b0643e59SDennis Zhou return;
329b0643e59SDennis Zhou }
330b0643e59SDennis Zhou
331b0643e59SDennis Zhou add_to_discard_list(discard_ctl, block_group);
332b0643e59SDennis Zhou }
333b0643e59SDennis Zhou
334b0643e59SDennis Zhou /*
33543dd529aSDavid Sterba * Remove a block_group from the discard lists.
33643dd529aSDavid Sterba *
33743dd529aSDavid Sterba * @discard_ctl: discard control
338b0643e59SDennis Zhou * @block_group: block_group of interest
339b0643e59SDennis Zhou *
340b0643e59SDennis Zhou * Remove @block_group from the discard lists. If necessary, wait on the
34143dd529aSDavid Sterba * current work and then reschedule the delayed work.
342b0643e59SDennis Zhou */
btrfs_discard_cancel_work(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)343b0643e59SDennis Zhou void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
344b0643e59SDennis Zhou struct btrfs_block_group *block_group)
345b0643e59SDennis Zhou {
346b0643e59SDennis Zhou if (remove_from_discard_list(discard_ctl, block_group)) {
347b0643e59SDennis Zhou cancel_delayed_work_sync(&discard_ctl->work);
348b0643e59SDennis Zhou btrfs_discard_schedule_work(discard_ctl, true);
3496e80d4f8SDennis Zhou }
3506e80d4f8SDennis Zhou }
3516e80d4f8SDennis Zhou
352b0643e59SDennis Zhou /*
353b0643e59SDennis Zhou * Handles queuing the block_groups.
354b0643e59SDennis Zhou *
355b0643e59SDennis Zhou * @discard_ctl: discard control
356b0643e59SDennis Zhou * @block_group: block_group of interest
357b0643e59SDennis Zhou *
3588fc05859SPavel Begunkov * Maintain the LRU order of the discard lists.
3598fc05859SPavel Begunkov */
btrfs_discard_queue_work(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)360b0643e59SDennis Zhou void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
361b0643e59SDennis Zhou struct btrfs_block_group *block_group)
362b0643e59SDennis Zhou {
363b0643e59SDennis Zhou if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
3648fc05859SPavel Begunkov return;
365b0643e59SDennis Zhou
3668fc05859SPavel Begunkov if (block_group->used == 0)
367b0643e59SDennis Zhou add_to_discard_unused_list(discard_ctl, block_group);
368b0643e59SDennis Zhou else
369b0643e59SDennis Zhou add_to_discard_list(discard_ctl, block_group);
3706e88f116SPavel Begunkov
371e93591bbSDennis Zhou if (!delayed_work_pending(&discard_ctl->work))
372e93591bbSDennis Zhou btrfs_discard_schedule_work(discard_ctl, false);
373e93591bbSDennis Zhou }
374e93591bbSDennis Zhou
__btrfs_discard_schedule_work(struct btrfs_discard_ctl * discard_ctl,u64 now,bool override)375e93591bbSDennis Zhou static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
376e93591bbSDennis Zhou u64 now, bool override)
377e93591bbSDennis Zhou {
378e93591bbSDennis Zhou struct btrfs_block_group *block_group;
379e93591bbSDennis Zhou
380e93591bbSDennis Zhou if (!btrfs_run_discard_work(discard_ctl))
3816e88f116SPavel Begunkov return;
382e93591bbSDennis Zhou if (!override && delayed_work_pending(&discard_ctl->work))
3836e88f116SPavel Begunkov return;
384e93591bbSDennis Zhou
385b0643e59SDennis Zhou block_group = find_next_block_group(discard_ctl, now);
386a2309300SDennis Zhou if (block_group) {
387a2309300SDennis Zhou u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
388a2309300SDennis Zhou u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
389a2309300SDennis Zhou
390a2309300SDennis Zhou /*
391a2309300SDennis Zhou * A single delayed workqueue item is responsible for
392a2309300SDennis Zhou * discarding, so we can manage the bytes rate limit by keeping
3936e88f116SPavel Begunkov * track of the previous discard.
394a2309300SDennis Zhou */
395b0643e59SDennis Zhou if (kbps_limit && discard_ctl->prev_discard) {
396df903e5dSPavel Begunkov u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
397df903e5dSPavel Begunkov u64 bps_delay = div64_u64(discard_ctl->prev_discard *
398df903e5dSPavel Begunkov NSEC_PER_SEC, bps_limit);
399df903e5dSPavel Begunkov
400df903e5dSPavel Begunkov delay = max(delay, bps_delay);
401df903e5dSPavel Begunkov }
402df903e5dSPavel Begunkov
403df903e5dSPavel Begunkov /*
404df903e5dSPavel Begunkov * This timeout is to hopefully prevent immediate discarding
405b0643e59SDennis Zhou * in a recently allocated block group.
4066e88f116SPavel Begunkov */
407b0643e59SDennis Zhou if (now < block_group->discard_eligible_time) {
4088fc05859SPavel Begunkov u64 bg_timeout = block_group->discard_eligible_time - now;
4098fc05859SPavel Begunkov
4108fc05859SPavel Begunkov delay = max(delay, bg_timeout);
41143dd529aSDavid Sterba }
41243dd529aSDavid Sterba
4138fc05859SPavel Begunkov if (override && discard_ctl->prev_discard) {
4148fc05859SPavel Begunkov u64 elapsed = now - discard_ctl->prev_discard_time;
4158fc05859SPavel Begunkov
4168fc05859SPavel Begunkov if (delay > elapsed)
4178fc05859SPavel Begunkov delay -= elapsed;
4188fc05859SPavel Begunkov else
4198fc05859SPavel Begunkov delay = 0;
4208fc05859SPavel Begunkov }
4218fc05859SPavel Begunkov
4228fc05859SPavel Begunkov mod_delayed_work(discard_ctl->discard_workers,
4238fc05859SPavel Begunkov &discard_ctl->work, nsecs_to_jiffies(delay));
4248fc05859SPavel Begunkov }
4258fc05859SPavel Begunkov }
4268fc05859SPavel Begunkov
427b0643e59SDennis Zhou /*
428b0643e59SDennis Zhou * Responsible for scheduling the discard work.
429b0643e59SDennis Zhou *
43043dd529aSDavid Sterba * @discard_ctl: discard control
43143dd529aSDavid Sterba * @override: override the current timer
43243dd529aSDavid Sterba *
4336e80d4f8SDennis Zhou * Discards are issued by a delayed workqueue item. @override is used to
4346e80d4f8SDennis Zhou * update the current delay as the baseline delay interval is reevaluated on
4356e80d4f8SDennis Zhou * transaction commit. This is also maxed with any other rate limit.
43643dd529aSDavid Sterba */
btrfs_discard_schedule_work(struct btrfs_discard_ctl * discard_ctl,bool override)43743dd529aSDavid Sterba void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
43843dd529aSDavid Sterba bool override)
43943dd529aSDavid Sterba {
4406e80d4f8SDennis Zhou const u64 now = ktime_get_ns();
4416e80d4f8SDennis Zhou
4426e80d4f8SDennis Zhou spin_lock(&discard_ctl->lock);
4436e80d4f8SDennis Zhou __btrfs_discard_schedule_work(discard_ctl, now, override);
4446e80d4f8SDennis Zhou spin_unlock(&discard_ctl->lock);
4456e80d4f8SDennis Zhou }
4466e80d4f8SDennis Zhou
4476e80d4f8SDennis Zhou /*
4486e80d4f8SDennis Zhou * Determine next step of a block_group.
4496e80d4f8SDennis Zhou *
4506e80d4f8SDennis Zhou * @discard_ctl: discard control
4517fe6d45eSDennis Zhou * @block_group: block_group of interest
4527fe6d45eSDennis Zhou *
4536e80d4f8SDennis Zhou * Determine the next step for a block group after it's finished going through
4546e80d4f8SDennis Zhou * a pass on a discard list. If it is unused and fully trimmed, we can mark it
4556e80d4f8SDennis Zhou * unused and send it to the unused_bgs path. Otherwise, pass it onto the
45643dd529aSDavid Sterba * appropriate filter list or let it fall off.
45743dd529aSDavid Sterba */
btrfs_finish_discard_pass(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)45843dd529aSDavid Sterba static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
459b0643e59SDennis Zhou struct btrfs_block_group *block_group)
460b0643e59SDennis Zhou {
46143dd529aSDavid Sterba remove_from_discard_list(discard_ctl, block_group);
46243dd529aSDavid Sterba
4632bee7eb8SDennis Zhou if (block_group->used == 0) {
464b0643e59SDennis Zhou if (btrfs_is_free_space_trimmed(block_group))
465b0643e59SDennis Zhou btrfs_mark_bg_unused(block_group);
466b0643e59SDennis Zhou else
467b0643e59SDennis Zhou add_to_discard_unused_list(discard_ctl, block_group);
468b0643e59SDennis Zhou } else {
4692bee7eb8SDennis Zhou btrfs_update_discard_index(discard_ctl, block_group);
4707fe6d45eSDennis Zhou }
471b0643e59SDennis Zhou }
4727fe6d45eSDennis Zhou
473ea9ed87cSPavel Begunkov /*
474b0643e59SDennis Zhou * Discard work queue callback
475b0643e59SDennis Zhou *
476b0643e59SDennis Zhou * @work: work
4777fe6d45eSDennis Zhou *
478ea9ed87cSPavel Begunkov * Find the next block_group to start discarding and then discard a single
479b0643e59SDennis Zhou * region. It does this in a two-pass fashion: first extents and second
480b0643e59SDennis Zhou * bitmaps. Completely discarded block groups are sent to the unused_bgs path.
481ea9ed87cSPavel Begunkov */
btrfs_discard_workfn(struct work_struct * work)482ea9ed87cSPavel Begunkov static void btrfs_discard_workfn(struct work_struct *work)
483ea9ed87cSPavel Begunkov {
484ea9ed87cSPavel Begunkov struct btrfs_discard_ctl *discard_ctl;
485b0643e59SDennis Zhou struct btrfs_block_group *block_group;
4862bee7eb8SDennis Zhou enum btrfs_discard_state discard_state;
4877fe6d45eSDennis Zhou int discard_index = 0;
4887fe6d45eSDennis Zhou u64 trimmed = 0;
4897fe6d45eSDennis Zhou u64 minlen = 0;
4907fe6d45eSDennis Zhou u64 now = ktime_get_ns();
4917fe6d45eSDennis Zhou
4927fe6d45eSDennis Zhou discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
4937fe6d45eSDennis Zhou
4947fe6d45eSDennis Zhou block_group = peek_discard_list(discard_ctl, &discard_state,
4957fe6d45eSDennis Zhou &discard_index, now);
4967fe6d45eSDennis Zhou if (!block_group || !btrfs_run_discard_work(discard_ctl))
4977fe6d45eSDennis Zhou return;
4987fe6d45eSDennis Zhou if (now < block_group->discard_eligible_time) {
4997fe6d45eSDennis Zhou btrfs_discard_schedule_work(discard_ctl, false);
5007fe6d45eSDennis Zhou return;
5012bee7eb8SDennis Zhou }
5022bee7eb8SDennis Zhou
5032bee7eb8SDennis Zhou /* Perform discarding */
5047fe6d45eSDennis Zhou minlen = discard_minlen[discard_index];
5059ddf648fSDennis Zhou
5067fe6d45eSDennis Zhou if (discard_state == BTRFS_DISCARD_BITMAPS) {
5072bee7eb8SDennis Zhou u64 maxlen = 0;
5082bee7eb8SDennis Zhou
5092bee7eb8SDennis Zhou /*
5107fe6d45eSDennis Zhou * Use the previous levels minimum discard length as the max
5119ddf648fSDennis Zhou * length filter. In the case something is added to make a
5127fe6d45eSDennis Zhou * region go beyond the max filter, the entire bitmap is set
513b0643e59SDennis Zhou * back to BTRFS_TRIM_STATE_UNTRIMMED.
5142bee7eb8SDennis Zhou */
5152bee7eb8SDennis Zhou if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
5162bee7eb8SDennis Zhou maxlen = discard_minlen[discard_index - 1];
5176e80d4f8SDennis Zhou
5182bee7eb8SDennis Zhou btrfs_trim_block_group_bitmaps(block_group, &trimmed,
5192bee7eb8SDennis Zhou block_group->discard_cursor,
5202bee7eb8SDennis Zhou btrfs_block_group_end(block_group),
5212bee7eb8SDennis Zhou minlen, maxlen, true);
5222bee7eb8SDennis Zhou discard_ctl->discard_bitmap_bytes += trimmed;
5232bee7eb8SDennis Zhou } else {
5242bee7eb8SDennis Zhou btrfs_trim_block_group_extents(block_group, &trimmed,
5252bee7eb8SDennis Zhou block_group->discard_cursor,
5262bee7eb8SDennis Zhou btrfs_block_group_end(block_group),
5272bee7eb8SDennis Zhou minlen, true);
5282bee7eb8SDennis Zhou discard_ctl->discard_extent_bytes += trimmed;
5291ea2872fSPavel Begunkov }
5302bee7eb8SDennis Zhou
5311ea2872fSPavel Begunkov /* Determine next steps for a block_group */
5321ea2872fSPavel Begunkov if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
5332b5463fcSBoris Burkov if (discard_state == BTRFS_DISCARD_BITMAPS) {
5342b5463fcSBoris Burkov btrfs_finish_discard_pass(discard_ctl, block_group);
5352b5463fcSBoris Burkov } else {
5362b5463fcSBoris Burkov block_group->discard_cursor = block_group->start;
5372b5463fcSBoris Burkov spin_lock(&discard_ctl->lock);
5382b5463fcSBoris Burkov if (block_group->discard_state !=
5392b5463fcSBoris Burkov BTRFS_DISCARD_RESET_CURSOR)
5402b5463fcSBoris Burkov block_group->discard_state =
5412b5463fcSBoris Burkov BTRFS_DISCARD_BITMAPS;
5422bee7eb8SDennis Zhou spin_unlock(&discard_ctl->lock);
5438fc05859SPavel Begunkov }
5442bee7eb8SDennis Zhou }
545b0643e59SDennis Zhou
546b0643e59SDennis Zhou now = ktime_get_ns();
54743dd529aSDavid Sterba spin_lock(&discard_ctl->lock);
54843dd529aSDavid Sterba discard_ctl->prev_discard = trimmed;
54943dd529aSDavid Sterba discard_ctl->prev_discard_time = now;
550b0643e59SDennis Zhou /*
551b0643e59SDennis Zhou * If the block group was removed from the discard list while it was
55243dd529aSDavid Sterba * running in this workfn, then we didn't deref it, since this function
553b0643e59SDennis Zhou * still owned that reference. But we set the discard_ctl->block_group
554b0643e59SDennis Zhou * back to NULL, so we can use that condition to know that now we need
555b0643e59SDennis Zhou * to deref the block_group.
556b0643e59SDennis Zhou */
557b0643e59SDennis Zhou if (discard_ctl->block_group == NULL)
558b0643e59SDennis Zhou btrfs_put_block_group(block_group);
559b0643e59SDennis Zhou discard_ctl->block_group = NULL;
560b0643e59SDennis Zhou __btrfs_discard_schedule_work(discard_ctl, now, false);
561b0643e59SDennis Zhou spin_unlock(&discard_ctl->lock);
562b0643e59SDennis Zhou }
563b0643e59SDennis Zhou
56443dd529aSDavid Sterba /*
56543dd529aSDavid Sterba * Recalculate the base delay.
56643dd529aSDavid Sterba *
567a2309300SDennis Zhou * @discard_ctl: discard control
568a2309300SDennis Zhou *
569a2309300SDennis Zhou * Recalculate the base delay which is based off the total number of
570a2309300SDennis Zhou * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
571a2309300SDennis Zhou * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
572a2309300SDennis Zhou */
btrfs_discard_calc_delay(struct btrfs_discard_ctl * discard_ctl)573a2309300SDennis Zhou void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
574a2309300SDennis Zhou {
575a2309300SDennis Zhou s32 discardable_extents;
57681b29a3bSDennis Zhou s64 discardable_bytes;
577a2309300SDennis Zhou u32 iops_limit;
578*ef9cddfeSBoris Burkov unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
579a2309300SDennis Zhou unsigned long delay;
580a2309300SDennis Zhou
581a2309300SDennis Zhou discardable_extents = atomic_read(&discard_ctl->discardable_extents);
582a2309300SDennis Zhou if (!discardable_extents)
583a2309300SDennis Zhou return;
584a2309300SDennis Zhou
585a2309300SDennis Zhou spin_lock(&discard_ctl->lock);
586a2309300SDennis Zhou
58781b29a3bSDennis Zhou /*
58843dd529aSDavid Sterba * The following is to fix a potential -1 discrepancy that we're not
58981b29a3bSDennis Zhou * sure how to reproduce. But given that this is the only place that
59081b29a3bSDennis Zhou * utilizes these numbers and this is only called by from
59181b29a3bSDennis Zhou * btrfs_finish_extent_commit() which is synchronized, we can correct
59281b29a3bSDennis Zhou * here.
59381b29a3bSDennis Zhou */
59481b29a3bSDennis Zhou if (discardable_extents < 0)
59581b29a3bSDennis Zhou atomic_add(-discardable_extents,
59681b29a3bSDennis Zhou &discard_ctl->discardable_extents);
59781b29a3bSDennis Zhou
59881b29a3bSDennis Zhou discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
59981b29a3bSDennis Zhou if (discardable_bytes < 0)
60081b29a3bSDennis Zhou atomic64_add(-discardable_bytes,
60181b29a3bSDennis Zhou &discard_ctl->discardable_bytes);
60281b29a3bSDennis Zhou
60381b29a3bSDennis Zhou if (discardable_extents <= 0) {
60481b29a3bSDennis Zhou spin_unlock(&discard_ctl->lock);
60581b29a3bSDennis Zhou return;
60681b29a3bSDennis Zhou }
60781b29a3bSDennis Zhou
608a2309300SDennis Zhou iops_limit = READ_ONCE(discard_ctl->iops_limit);
609e50404a8SPavel Begunkov
610*ef9cddfeSBoris Burkov if (iops_limit) {
611*ef9cddfeSBoris Burkov delay = MSEC_PER_SEC / iops_limit;
612*ef9cddfeSBoris Burkov } else {
613*ef9cddfeSBoris Burkov /*
614*ef9cddfeSBoris Burkov * Unset iops_limit means go as fast as possible, so allow a
615*ef9cddfeSBoris Burkov * delay of 0.
616*ef9cddfeSBoris Burkov */
617*ef9cddfeSBoris Burkov delay = 0;
618*ef9cddfeSBoris Burkov min_delay = 0;
619*ef9cddfeSBoris Burkov }
620*ef9cddfeSBoris Burkov
621*ef9cddfeSBoris Burkov delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
6226e88f116SPavel Begunkov discard_ctl->delay_ms = delay;
623a2309300SDennis Zhou
624a2309300SDennis Zhou spin_unlock(&discard_ctl->lock);
625a2309300SDennis Zhou }
626a2309300SDennis Zhou
62743dd529aSDavid Sterba /*
62843dd529aSDavid Sterba * Propagate discard counters.
62943dd529aSDavid Sterba *
630dfb79ddbSDennis Zhou * @block_group: block_group of interest
631dfb79ddbSDennis Zhou *
63243dd529aSDavid Sterba * Propagate deltas of counters up to the discard_ctl. It maintains a current
63343dd529aSDavid Sterba * counter and a previous counter passing the delta up to the global stat.
63443dd529aSDavid Sterba * Then the current counter value becomes the previous counter value.
635dfb79ddbSDennis Zhou */
btrfs_discard_update_discardable(struct btrfs_block_group * block_group)63666b53baeSJosef Bacik void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
637dfb79ddbSDennis Zhou {
63866b53baeSJosef Bacik struct btrfs_free_space_ctl *ctl;
639dfb79ddbSDennis Zhou struct btrfs_discard_ctl *discard_ctl;
640dfb79ddbSDennis Zhou s32 extents_delta;
6415dc7c10bSDennis Zhou s64 bytes_delta;
642dfb79ddbSDennis Zhou
6435cb0724eSDennis Zhou if (!block_group ||
6445cb0724eSDennis Zhou !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
6455cb0724eSDennis Zhou !btrfs_is_block_group_data_only(block_group))
646dfb79ddbSDennis Zhou return;
647dfb79ddbSDennis Zhou
64866b53baeSJosef Bacik ctl = block_group->free_space_ctl;
649dfb79ddbSDennis Zhou discard_ctl = &block_group->fs_info->discard_ctl;
650dfb79ddbSDennis Zhou
65166b53baeSJosef Bacik lockdep_assert_held(&ctl->tree_lock);
652dfb79ddbSDennis Zhou extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
653dfb79ddbSDennis Zhou ctl->discardable_extents[BTRFS_STAT_PREV];
654dfb79ddbSDennis Zhou if (extents_delta) {
655dfb79ddbSDennis Zhou atomic_add(extents_delta, &discard_ctl->discardable_extents);
656dfb79ddbSDennis Zhou ctl->discardable_extents[BTRFS_STAT_PREV] =
657dfb79ddbSDennis Zhou ctl->discardable_extents[BTRFS_STAT_CURR];
658dfb79ddbSDennis Zhou }
6595dc7c10bSDennis Zhou
6605dc7c10bSDennis Zhou bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
6615dc7c10bSDennis Zhou ctl->discardable_bytes[BTRFS_STAT_PREV];
6625dc7c10bSDennis Zhou if (bytes_delta) {
6635dc7c10bSDennis Zhou atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
6645dc7c10bSDennis Zhou ctl->discardable_bytes[BTRFS_STAT_PREV] =
6655dc7c10bSDennis Zhou ctl->discardable_bytes[BTRFS_STAT_CURR];
6665dc7c10bSDennis Zhou }
667dfb79ddbSDennis Zhou }
668dfb79ddbSDennis Zhou
66943dd529aSDavid Sterba /*
67043dd529aSDavid Sterba * Punt unused_bgs list to discard lists.
67143dd529aSDavid Sterba *
6726e80d4f8SDennis Zhou * @fs_info: fs_info of interest
6736e80d4f8SDennis Zhou *
6746e80d4f8SDennis Zhou * The unused_bgs list needs to be punted to the discard lists because the
6751a9fd417SDavid Sterba * order of operations is changed. In the normal synchronous discard path, the
6766e80d4f8SDennis Zhou * block groups are trimmed via a single large trim in transaction commit. This
6776e80d4f8SDennis Zhou * is ultimately what we are trying to avoid with asynchronous discard. Thus,
6786e80d4f8SDennis Zhou * it must be done before going down the unused_bgs path.
6796e80d4f8SDennis Zhou */
btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info * fs_info)6806e80d4f8SDennis Zhou void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
6816e80d4f8SDennis Zhou {
6826e80d4f8SDennis Zhou struct btrfs_block_group *block_group, *next;
6836e80d4f8SDennis Zhou
6846e80d4f8SDennis Zhou spin_lock(&fs_info->unused_bgs_lock);
6856e80d4f8SDennis Zhou /* We enabled async discard, so punt all to the queue */
6866e80d4f8SDennis Zhou list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
6876e80d4f8SDennis Zhou bg_list) {
6886e80d4f8SDennis Zhou list_del_init(&block_group->bg_list);
6896e80d4f8SDennis Zhou btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
6902b5463fcSBoris Burkov /*
6912b5463fcSBoris Burkov * This put is for the get done by btrfs_mark_bg_unused.
6922b5463fcSBoris Burkov * Queueing discard incremented it for discard's reference.
6932b5463fcSBoris Burkov */
6942b5463fcSBoris Burkov btrfs_put_block_group(block_group);
6956e80d4f8SDennis Zhou }
6966e80d4f8SDennis Zhou spin_unlock(&fs_info->unused_bgs_lock);
6976e80d4f8SDennis Zhou }
6986e80d4f8SDennis Zhou
69943dd529aSDavid Sterba /*
70043dd529aSDavid Sterba * Purge discard lists.
70143dd529aSDavid Sterba *
7026e80d4f8SDennis Zhou * @discard_ctl: discard control
7036e80d4f8SDennis Zhou *
7046e80d4f8SDennis Zhou * If we are disabling async discard, we may have intercepted block groups that
7056e80d4f8SDennis Zhou * are completely free and ready for the unused_bgs path. As discarding will
7066e80d4f8SDennis Zhou * now happen in transaction commit or not at all, we can safely mark the
7076e80d4f8SDennis Zhou * corresponding block groups as unused and they will be sent on their merry
7086e80d4f8SDennis Zhou * way to the unused_bgs list.
7096e80d4f8SDennis Zhou */
btrfs_discard_purge_list(struct btrfs_discard_ctl * discard_ctl)7106e80d4f8SDennis Zhou static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
7116e80d4f8SDennis Zhou {
7126e80d4f8SDennis Zhou struct btrfs_block_group *block_group, *next;
7136e80d4f8SDennis Zhou int i;
7146e80d4f8SDennis Zhou
7156e80d4f8SDennis Zhou spin_lock(&discard_ctl->lock);
7166e80d4f8SDennis Zhou for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
7176e80d4f8SDennis Zhou list_for_each_entry_safe(block_group, next,
7186e80d4f8SDennis Zhou &discard_ctl->discard_list[i],
7196e80d4f8SDennis Zhou discard_list) {
7206e80d4f8SDennis Zhou list_del_init(&block_group->discard_list);
7216e80d4f8SDennis Zhou spin_unlock(&discard_ctl->lock);
7226e80d4f8SDennis Zhou if (block_group->used == 0)
7236e80d4f8SDennis Zhou btrfs_mark_bg_unused(block_group);
7246e80d4f8SDennis Zhou spin_lock(&discard_ctl->lock);
7252b5463fcSBoris Burkov btrfs_put_block_group(block_group);
7266e80d4f8SDennis Zhou }
7276e80d4f8SDennis Zhou }
7286e80d4f8SDennis Zhou spin_unlock(&discard_ctl->lock);
7296e80d4f8SDennis Zhou }
7306e80d4f8SDennis Zhou
btrfs_discard_resume(struct btrfs_fs_info * fs_info)731b0643e59SDennis Zhou void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
732b0643e59SDennis Zhou {
733b0643e59SDennis Zhou if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
734b0643e59SDennis Zhou btrfs_discard_cleanup(fs_info);
735b0643e59SDennis Zhou return;
736b0643e59SDennis Zhou }
737b0643e59SDennis Zhou
7386e80d4f8SDennis Zhou btrfs_discard_punt_unused_bgs_list(fs_info);
7396e80d4f8SDennis Zhou
740b0643e59SDennis Zhou set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
741b0643e59SDennis Zhou }
742b0643e59SDennis Zhou
btrfs_discard_stop(struct btrfs_fs_info * fs_info)743b0643e59SDennis Zhou void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
744b0643e59SDennis Zhou {
745b0643e59SDennis Zhou clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
746b0643e59SDennis Zhou }
747b0643e59SDennis Zhou
btrfs_discard_init(struct btrfs_fs_info * fs_info)748b0643e59SDennis Zhou void btrfs_discard_init(struct btrfs_fs_info *fs_info)
749b0643e59SDennis Zhou {
750b0643e59SDennis Zhou struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
751b0643e59SDennis Zhou int i;
752b0643e59SDennis Zhou
753b0643e59SDennis Zhou spin_lock_init(&discard_ctl->lock);
754b0643e59SDennis Zhou INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
755b0643e59SDennis Zhou
756b0643e59SDennis Zhou for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
757b0643e59SDennis Zhou INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
758dfb79ddbSDennis Zhou
759e93591bbSDennis Zhou discard_ctl->prev_discard = 0;
760df903e5dSPavel Begunkov discard_ctl->prev_discard_time = 0;
761dfb79ddbSDennis Zhou atomic_set(&discard_ctl->discardable_extents, 0);
7625dc7c10bSDennis Zhou atomic64_set(&discard_ctl->discardable_bytes, 0);
76319b2a2c7SDennis Zhou discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
7646e88f116SPavel Begunkov discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
765a2309300SDennis Zhou discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
766e93591bbSDennis Zhou discard_ctl->kbps_limit = 0;
7679ddf648fSDennis Zhou discard_ctl->discard_extent_bytes = 0;
7689ddf648fSDennis Zhou discard_ctl->discard_bitmap_bytes = 0;
7699ddf648fSDennis Zhou atomic64_set(&discard_ctl->discard_bytes_saved, 0);
770b0643e59SDennis Zhou }
771b0643e59SDennis Zhou
btrfs_discard_cleanup(struct btrfs_fs_info * fs_info)772b0643e59SDennis Zhou void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
773b0643e59SDennis Zhou {
774b0643e59SDennis Zhou btrfs_discard_stop(fs_info);
775b0643e59SDennis Zhou cancel_delayed_work_sync(&fs_info->discard_ctl.work);
7766e80d4f8SDennis Zhou btrfs_discard_purge_list(&fs_info->discard_ctl);
777b0643e59SDennis Zhou }
778