1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/jiffies.h> 4 #include <linux/kernel.h> 5 #include <linux/ktime.h> 6 #include <linux/list.h> 7 #include <linux/math64.h> 8 #include <linux/sizes.h> 9 #include <linux/workqueue.h> 10 #include "ctree.h" 11 #include "block-group.h" 12 #include "discard.h" 13 #include "free-space-cache.h" 14 15 /* 16 * This contains the logic to handle async discard. 17 * 18 * Async discard manages trimming of free space outside of transaction commit. 19 * Discarding is done by managing the block_groups on a LRU list based on free 20 * space recency. Two passes are used to first prioritize discarding extents 21 * and then allow for trimming in the bitmap the best opportunity to coalesce. 22 * The block_groups are maintained on multiple lists to allow for multiple 23 * passes with different discard filter requirements. A delayed work item is 24 * used to manage discarding with timeout determined by a max of the delay 25 * incurred by the iops rate limit, the byte rate limit, and the max delay of 26 * BTRFS_DISCARD_MAX_DELAY. 27 * 28 * Note, this only keeps track of block_groups that are explicitly for data. 29 * Mixed block_groups are not supported. 30 * 31 * The first list is special to manage discarding of fully free block groups. 32 * This is necessary because we issue a final trim for a full free block group 33 * after forgetting it. When a block group becomes unused, instead of directly 34 * being added to the unused_bgs list, we add it to this first list. Then 35 * from there, if it becomes fully discarded, we place it onto the unused_bgs 36 * list. 37 * 38 * The in-memory free space cache serves as the backing state for discard. 39 * Consequently this means there is no persistence. We opt to load all the 40 * block groups in as not discarded, so the mount case degenerates to the 41 * crashing case. 42 * 43 * As the free space cache uses bitmaps, there exists a tradeoff between 44 * ease/efficiency for find_free_extent() and the accuracy of discard state. 45 * Here we opt to let untrimmed regions merge with everything while only letting 46 * trimmed regions merge with other trimmed regions. This can cause 47 * overtrimming, but the coalescing benefit seems to be worth it. Additionally, 48 * bitmap state is tracked as a whole. If we're able to fully trim a bitmap, 49 * the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in, 50 * this resets the state and we will retry trimming the whole bitmap. This is a 51 * tradeoff between discard state accuracy and the cost of accounting. 52 */ 53 54 /* This is an initial delay to give some chance for block reuse */ 55 #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC) 56 #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC) 57 58 /* Target completion latency of discarding all discardable extents */ 59 #define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC) 60 #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL) 61 #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL) 62 #define BTRFS_DISCARD_MAX_IOPS (10U) 63 64 /* Montonically decreasing minimum length filters after index 0 */ 65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = { 66 0, 67 BTRFS_ASYNC_DISCARD_MAX_FILTER, 68 BTRFS_ASYNC_DISCARD_MIN_FILTER 69 }; 70 71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, 72 struct btrfs_block_group *block_group) 73 { 74 return &discard_ctl->discard_list[block_group->discard_index]; 75 } 76 77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 78 struct btrfs_block_group *block_group) 79 { 80 if (!btrfs_run_discard_work(discard_ctl)) 81 return; 82 83 if (list_empty(&block_group->discard_list) || 84 block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { 85 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) 86 block_group->discard_index = BTRFS_DISCARD_INDEX_START; 87 block_group->discard_eligible_time = (ktime_get_ns() + 88 BTRFS_DISCARD_DELAY); 89 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 90 } 91 92 list_move_tail(&block_group->discard_list, 93 get_discard_list(discard_ctl, block_group)); 94 } 95 96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 97 struct btrfs_block_group *block_group) 98 { 99 if (!btrfs_is_block_group_data_only(block_group)) 100 return; 101 102 spin_lock(&discard_ctl->lock); 103 __add_to_discard_list(discard_ctl, block_group); 104 spin_unlock(&discard_ctl->lock); 105 } 106 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, 108 struct btrfs_block_group *block_group) 109 { 110 spin_lock(&discard_ctl->lock); 111 112 if (!btrfs_run_discard_work(discard_ctl)) { 113 spin_unlock(&discard_ctl->lock); 114 return; 115 } 116 117 list_del_init(&block_group->discard_list); 118 119 block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; 120 block_group->discard_eligible_time = (ktime_get_ns() + 121 BTRFS_DISCARD_UNUSED_DELAY); 122 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 123 list_add_tail(&block_group->discard_list, 124 &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); 125 126 spin_unlock(&discard_ctl->lock); 127 } 128 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, 130 struct btrfs_block_group *block_group) 131 { 132 bool running = false; 133 134 spin_lock(&discard_ctl->lock); 135 136 if (block_group == discard_ctl->block_group) { 137 running = true; 138 discard_ctl->block_group = NULL; 139 } 140 141 block_group->discard_eligible_time = 0; 142 list_del_init(&block_group->discard_list); 143 144 spin_unlock(&discard_ctl->lock); 145 146 return running; 147 } 148 149 /** 150 * find_next_block_group - find block_group that's up next for discarding 151 * @discard_ctl: discard control 152 * @now: current time 153 * 154 * Iterate over the discard lists to find the next block_group up for 155 * discarding checking the discard_eligible_time of block_group. 156 */ 157 static struct btrfs_block_group *find_next_block_group( 158 struct btrfs_discard_ctl *discard_ctl, 159 u64 now) 160 { 161 struct btrfs_block_group *ret_block_group = NULL, *block_group; 162 int i; 163 164 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 165 struct list_head *discard_list = &discard_ctl->discard_list[i]; 166 167 if (!list_empty(discard_list)) { 168 block_group = list_first_entry(discard_list, 169 struct btrfs_block_group, 170 discard_list); 171 172 if (!ret_block_group) 173 ret_block_group = block_group; 174 175 if (ret_block_group->discard_eligible_time < now) 176 break; 177 178 if (ret_block_group->discard_eligible_time > 179 block_group->discard_eligible_time) 180 ret_block_group = block_group; 181 } 182 } 183 184 return ret_block_group; 185 } 186 187 /** 188 * peek_discard_list - wrap find_next_block_group() 189 * @discard_ctl: discard control 190 * @discard_state: the discard_state of the block_group after state management 191 * @discard_index: the discard_index of the block_group after state management 192 * 193 * This wraps find_next_block_group() and sets the block_group to be in use. 194 * discard_state's control flow is managed here. Variables related to 195 * discard_state are reset here as needed (eg discard_cursor). @discard_state 196 * and @discard_index are remembered as it may change while we're discarding, 197 * but we want the discard to execute in the context determined here. 198 */ 199 static struct btrfs_block_group *peek_discard_list( 200 struct btrfs_discard_ctl *discard_ctl, 201 enum btrfs_discard_state *discard_state, 202 int *discard_index, u64 now) 203 { 204 struct btrfs_block_group *block_group; 205 206 spin_lock(&discard_ctl->lock); 207 again: 208 block_group = find_next_block_group(discard_ctl, now); 209 210 if (block_group && now >= block_group->discard_eligible_time) { 211 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && 212 block_group->used != 0) { 213 if (btrfs_is_block_group_data_only(block_group)) 214 __add_to_discard_list(discard_ctl, block_group); 215 else 216 list_del_init(&block_group->discard_list); 217 goto again; 218 } 219 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) { 220 block_group->discard_cursor = block_group->start; 221 block_group->discard_state = BTRFS_DISCARD_EXTENTS; 222 } 223 discard_ctl->block_group = block_group; 224 } 225 if (block_group) { 226 *discard_state = block_group->discard_state; 227 *discard_index = block_group->discard_index; 228 } 229 spin_unlock(&discard_ctl->lock); 230 231 return block_group; 232 } 233 234 /** 235 * btrfs_discard_check_filter - updates a block groups filters 236 * @block_group: block group of interest 237 * @bytes: recently freed region size after coalescing 238 * 239 * Async discard maintains multiple lists with progressively smaller filters 240 * to prioritize discarding based on size. Should a free space that matches 241 * a larger filter be returned to the free_space_cache, prioritize that discard 242 * by moving @block_group to the proper filter. 243 */ 244 void btrfs_discard_check_filter(struct btrfs_block_group *block_group, 245 u64 bytes) 246 { 247 struct btrfs_discard_ctl *discard_ctl; 248 249 if (!block_group || 250 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 251 return; 252 253 discard_ctl = &block_group->fs_info->discard_ctl; 254 255 if (block_group->discard_index > BTRFS_DISCARD_INDEX_START && 256 bytes >= discard_minlen[block_group->discard_index - 1]) { 257 int i; 258 259 remove_from_discard_list(discard_ctl, block_group); 260 261 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS; 262 i++) { 263 if (bytes >= discard_minlen[i]) { 264 block_group->discard_index = i; 265 add_to_discard_list(discard_ctl, block_group); 266 break; 267 } 268 } 269 } 270 } 271 272 /** 273 * btrfs_update_discard_index - moves a block group along the discard lists 274 * @discard_ctl: discard control 275 * @block_group: block_group of interest 276 * 277 * Increment @block_group's discard_index. If it falls of the list, let it be. 278 * Otherwise add it back to the appropriate list. 279 */ 280 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, 281 struct btrfs_block_group *block_group) 282 { 283 block_group->discard_index++; 284 if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) { 285 block_group->discard_index = 1; 286 return; 287 } 288 289 add_to_discard_list(discard_ctl, block_group); 290 } 291 292 /** 293 * btrfs_discard_cancel_work - remove a block_group from the discard lists 294 * @discard_ctl: discard control 295 * @block_group: block_group of interest 296 * 297 * This removes @block_group from the discard lists. If necessary, it waits on 298 * the current work and then reschedules the delayed work. 299 */ 300 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, 301 struct btrfs_block_group *block_group) 302 { 303 if (remove_from_discard_list(discard_ctl, block_group)) { 304 cancel_delayed_work_sync(&discard_ctl->work); 305 btrfs_discard_schedule_work(discard_ctl, true); 306 } 307 } 308 309 /** 310 * btrfs_discard_queue_work - handles queuing the block_groups 311 * @discard_ctl: discard control 312 * @block_group: block_group of interest 313 * 314 * This maintains the LRU order of the discard lists. 315 */ 316 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, 317 struct btrfs_block_group *block_group) 318 { 319 if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 320 return; 321 322 if (block_group->used == 0) 323 add_to_discard_unused_list(discard_ctl, block_group); 324 else 325 add_to_discard_list(discard_ctl, block_group); 326 327 if (!delayed_work_pending(&discard_ctl->work)) 328 btrfs_discard_schedule_work(discard_ctl, false); 329 } 330 331 static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, 332 u64 now, bool override) 333 { 334 struct btrfs_block_group *block_group; 335 336 if (!btrfs_run_discard_work(discard_ctl)) 337 return; 338 if (!override && delayed_work_pending(&discard_ctl->work)) 339 return; 340 341 block_group = find_next_block_group(discard_ctl, now); 342 if (block_group) { 343 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC; 344 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit); 345 346 /* 347 * A single delayed workqueue item is responsible for 348 * discarding, so we can manage the bytes rate limit by keeping 349 * track of the previous discard. 350 */ 351 if (kbps_limit && discard_ctl->prev_discard) { 352 u64 bps_limit = ((u64)kbps_limit) * SZ_1K; 353 u64 bps_delay = div64_u64(discard_ctl->prev_discard * 354 NSEC_PER_SEC, bps_limit); 355 356 delay = max(delay, bps_delay); 357 } 358 359 /* 360 * This timeout is to hopefully prevent immediate discarding 361 * in a recently allocated block group. 362 */ 363 if (now < block_group->discard_eligible_time) { 364 u64 bg_timeout = block_group->discard_eligible_time - now; 365 366 delay = max(delay, bg_timeout); 367 } 368 369 if (override && discard_ctl->prev_discard) { 370 u64 elapsed = now - discard_ctl->prev_discard_time; 371 372 if (delay > elapsed) 373 delay -= elapsed; 374 else 375 delay = 0; 376 } 377 378 mod_delayed_work(discard_ctl->discard_workers, 379 &discard_ctl->work, nsecs_to_jiffies(delay)); 380 } 381 } 382 383 /* 384 * btrfs_discard_schedule_work - responsible for scheduling the discard work 385 * @discard_ctl: discard control 386 * @override: override the current timer 387 * 388 * Discards are issued by a delayed workqueue item. @override is used to 389 * update the current delay as the baseline delay interval is reevaluated on 390 * transaction commit. This is also maxed with any other rate limit. 391 */ 392 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, 393 bool override) 394 { 395 const u64 now = ktime_get_ns(); 396 397 spin_lock(&discard_ctl->lock); 398 __btrfs_discard_schedule_work(discard_ctl, now, override); 399 spin_unlock(&discard_ctl->lock); 400 } 401 402 /** 403 * btrfs_finish_discard_pass - determine next step of a block_group 404 * @discard_ctl: discard control 405 * @block_group: block_group of interest 406 * 407 * This determines the next step for a block group after it's finished going 408 * through a pass on a discard list. If it is unused and fully trimmed, we can 409 * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto 410 * the appropriate filter list or let it fall off. 411 */ 412 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, 413 struct btrfs_block_group *block_group) 414 { 415 remove_from_discard_list(discard_ctl, block_group); 416 417 if (block_group->used == 0) { 418 if (btrfs_is_free_space_trimmed(block_group)) 419 btrfs_mark_bg_unused(block_group); 420 else 421 add_to_discard_unused_list(discard_ctl, block_group); 422 } else { 423 btrfs_update_discard_index(discard_ctl, block_group); 424 } 425 } 426 427 /** 428 * btrfs_discard_workfn - discard work function 429 * @work: work 430 * 431 * This finds the next block_group to start discarding and then discards a 432 * single region. It does this in a two-pass fashion: first extents and second 433 * bitmaps. Completely discarded block groups are sent to the unused_bgs path. 434 */ 435 static void btrfs_discard_workfn(struct work_struct *work) 436 { 437 struct btrfs_discard_ctl *discard_ctl; 438 struct btrfs_block_group *block_group; 439 enum btrfs_discard_state discard_state; 440 int discard_index = 0; 441 u64 trimmed = 0; 442 u64 minlen = 0; 443 u64 now = ktime_get_ns(); 444 445 discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); 446 447 block_group = peek_discard_list(discard_ctl, &discard_state, 448 &discard_index, now); 449 if (!block_group || !btrfs_run_discard_work(discard_ctl)) 450 return; 451 if (now < block_group->discard_eligible_time) { 452 btrfs_discard_schedule_work(discard_ctl, false); 453 return; 454 } 455 456 /* Perform discarding */ 457 minlen = discard_minlen[discard_index]; 458 459 if (discard_state == BTRFS_DISCARD_BITMAPS) { 460 u64 maxlen = 0; 461 462 /* 463 * Use the previous levels minimum discard length as the max 464 * length filter. In the case something is added to make a 465 * region go beyond the max filter, the entire bitmap is set 466 * back to BTRFS_TRIM_STATE_UNTRIMMED. 467 */ 468 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED) 469 maxlen = discard_minlen[discard_index - 1]; 470 471 btrfs_trim_block_group_bitmaps(block_group, &trimmed, 472 block_group->discard_cursor, 473 btrfs_block_group_end(block_group), 474 minlen, maxlen, true); 475 discard_ctl->discard_bitmap_bytes += trimmed; 476 } else { 477 btrfs_trim_block_group_extents(block_group, &trimmed, 478 block_group->discard_cursor, 479 btrfs_block_group_end(block_group), 480 minlen, true); 481 discard_ctl->discard_extent_bytes += trimmed; 482 } 483 484 /* Determine next steps for a block_group */ 485 if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { 486 if (discard_state == BTRFS_DISCARD_BITMAPS) { 487 btrfs_finish_discard_pass(discard_ctl, block_group); 488 } else { 489 block_group->discard_cursor = block_group->start; 490 spin_lock(&discard_ctl->lock); 491 if (block_group->discard_state != 492 BTRFS_DISCARD_RESET_CURSOR) 493 block_group->discard_state = 494 BTRFS_DISCARD_BITMAPS; 495 spin_unlock(&discard_ctl->lock); 496 } 497 } 498 499 now = ktime_get_ns(); 500 spin_lock(&discard_ctl->lock); 501 discard_ctl->prev_discard = trimmed; 502 discard_ctl->prev_discard_time = now; 503 discard_ctl->block_group = NULL; 504 __btrfs_discard_schedule_work(discard_ctl, now, false); 505 spin_unlock(&discard_ctl->lock); 506 } 507 508 /** 509 * btrfs_run_discard_work - determines if async discard should be running 510 * @discard_ctl: discard control 511 * 512 * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. 513 */ 514 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) 515 { 516 struct btrfs_fs_info *fs_info = container_of(discard_ctl, 517 struct btrfs_fs_info, 518 discard_ctl); 519 520 return (!(fs_info->sb->s_flags & SB_RDONLY) && 521 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); 522 } 523 524 /** 525 * btrfs_discard_calc_delay - recalculate the base delay 526 * @discard_ctl: discard control 527 * 528 * Recalculate the base delay which is based off the total number of 529 * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms) 530 * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC). 531 */ 532 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) 533 { 534 s32 discardable_extents; 535 s64 discardable_bytes; 536 u32 iops_limit; 537 unsigned long delay; 538 539 discardable_extents = atomic_read(&discard_ctl->discardable_extents); 540 if (!discardable_extents) 541 return; 542 543 spin_lock(&discard_ctl->lock); 544 545 /* 546 * The following is to fix a potential -1 discrepenancy that we're not 547 * sure how to reproduce. But given that this is the only place that 548 * utilizes these numbers and this is only called by from 549 * btrfs_finish_extent_commit() which is synchronized, we can correct 550 * here. 551 */ 552 if (discardable_extents < 0) 553 atomic_add(-discardable_extents, 554 &discard_ctl->discardable_extents); 555 556 discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes); 557 if (discardable_bytes < 0) 558 atomic64_add(-discardable_bytes, 559 &discard_ctl->discardable_bytes); 560 561 if (discardable_extents <= 0) { 562 spin_unlock(&discard_ctl->lock); 563 return; 564 } 565 566 iops_limit = READ_ONCE(discard_ctl->iops_limit); 567 if (iops_limit) 568 delay = MSEC_PER_SEC / iops_limit; 569 else 570 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents; 571 572 delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC, 573 BTRFS_DISCARD_MAX_DELAY_MSEC); 574 discard_ctl->delay_ms = delay; 575 576 spin_unlock(&discard_ctl->lock); 577 } 578 579 /** 580 * btrfs_discard_update_discardable - propagate discard counters 581 * @block_group: block_group of interest 582 * 583 * This propagates deltas of counters up to the discard_ctl. It maintains a 584 * current counter and a previous counter passing the delta up to the global 585 * stat. Then the current counter value becomes the previous counter value. 586 */ 587 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group) 588 { 589 struct btrfs_free_space_ctl *ctl; 590 struct btrfs_discard_ctl *discard_ctl; 591 s32 extents_delta; 592 s64 bytes_delta; 593 594 if (!block_group || 595 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) || 596 !btrfs_is_block_group_data_only(block_group)) 597 return; 598 599 ctl = block_group->free_space_ctl; 600 discard_ctl = &block_group->fs_info->discard_ctl; 601 602 lockdep_assert_held(&ctl->tree_lock); 603 extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] - 604 ctl->discardable_extents[BTRFS_STAT_PREV]; 605 if (extents_delta) { 606 atomic_add(extents_delta, &discard_ctl->discardable_extents); 607 ctl->discardable_extents[BTRFS_STAT_PREV] = 608 ctl->discardable_extents[BTRFS_STAT_CURR]; 609 } 610 611 bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] - 612 ctl->discardable_bytes[BTRFS_STAT_PREV]; 613 if (bytes_delta) { 614 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes); 615 ctl->discardable_bytes[BTRFS_STAT_PREV] = 616 ctl->discardable_bytes[BTRFS_STAT_CURR]; 617 } 618 } 619 620 /** 621 * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists 622 * @fs_info: fs_info of interest 623 * 624 * The unused_bgs list needs to be punted to the discard lists because the 625 * order of operations is changed. In the normal sychronous discard path, the 626 * block groups are trimmed via a single large trim in transaction commit. This 627 * is ultimately what we are trying to avoid with asynchronous discard. Thus, 628 * it must be done before going down the unused_bgs path. 629 */ 630 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) 631 { 632 struct btrfs_block_group *block_group, *next; 633 634 spin_lock(&fs_info->unused_bgs_lock); 635 /* We enabled async discard, so punt all to the queue */ 636 list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, 637 bg_list) { 638 list_del_init(&block_group->bg_list); 639 btrfs_put_block_group(block_group); 640 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 641 } 642 spin_unlock(&fs_info->unused_bgs_lock); 643 } 644 645 /** 646 * btrfs_discard_purge_list - purge discard lists 647 * @discard_ctl: discard control 648 * 649 * If we are disabling async discard, we may have intercepted block groups that 650 * are completely free and ready for the unused_bgs path. As discarding will 651 * now happen in transaction commit or not at all, we can safely mark the 652 * corresponding block groups as unused and they will be sent on their merry 653 * way to the unused_bgs list. 654 */ 655 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) 656 { 657 struct btrfs_block_group *block_group, *next; 658 int i; 659 660 spin_lock(&discard_ctl->lock); 661 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 662 list_for_each_entry_safe(block_group, next, 663 &discard_ctl->discard_list[i], 664 discard_list) { 665 list_del_init(&block_group->discard_list); 666 spin_unlock(&discard_ctl->lock); 667 if (block_group->used == 0) 668 btrfs_mark_bg_unused(block_group); 669 spin_lock(&discard_ctl->lock); 670 } 671 } 672 spin_unlock(&discard_ctl->lock); 673 } 674 675 void btrfs_discard_resume(struct btrfs_fs_info *fs_info) 676 { 677 if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { 678 btrfs_discard_cleanup(fs_info); 679 return; 680 } 681 682 btrfs_discard_punt_unused_bgs_list(fs_info); 683 684 set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 685 } 686 687 void btrfs_discard_stop(struct btrfs_fs_info *fs_info) 688 { 689 clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 690 } 691 692 void btrfs_discard_init(struct btrfs_fs_info *fs_info) 693 { 694 struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl; 695 int i; 696 697 spin_lock_init(&discard_ctl->lock); 698 INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn); 699 700 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) 701 INIT_LIST_HEAD(&discard_ctl->discard_list[i]); 702 703 discard_ctl->prev_discard = 0; 704 discard_ctl->prev_discard_time = 0; 705 atomic_set(&discard_ctl->discardable_extents, 0); 706 atomic64_set(&discard_ctl->discardable_bytes, 0); 707 discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE; 708 discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC; 709 discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS; 710 discard_ctl->kbps_limit = 0; 711 discard_ctl->discard_extent_bytes = 0; 712 discard_ctl->discard_bitmap_bytes = 0; 713 atomic64_set(&discard_ctl->discard_bytes_saved, 0); 714 } 715 716 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) 717 { 718 btrfs_discard_stop(fs_info); 719 cancel_delayed_work_sync(&fs_info->discard_ctl.work); 720 btrfs_discard_purge_list(&fs_info->discard_ctl); 721 } 722