1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/jiffies.h> 4 #include <linux/kernel.h> 5 #include <linux/ktime.h> 6 #include <linux/list.h> 7 #include <linux/math64.h> 8 #include <linux/sizes.h> 9 #include <linux/workqueue.h> 10 #include "ctree.h" 11 #include "block-group.h" 12 #include "discard.h" 13 #include "free-space-cache.h" 14 15 /* 16 * This contains the logic to handle async discard. 17 * 18 * Async discard manages trimming of free space outside of transaction commit. 19 * Discarding is done by managing the block_groups on a LRU list based on free 20 * space recency. Two passes are used to first prioritize discarding extents 21 * and then allow for trimming in the bitmap the best opportunity to coalesce. 22 * The block_groups are maintained on multiple lists to allow for multiple 23 * passes with different discard filter requirements. A delayed work item is 24 * used to manage discarding with timeout determined by a max of the delay 25 * incurred by the iops rate limit, the byte rate limit, and the max delay of 26 * BTRFS_DISCARD_MAX_DELAY. 27 * 28 * Note, this only keeps track of block_groups that are explicitly for data. 29 * Mixed block_groups are not supported. 30 * 31 * The first list is special to manage discarding of fully free block groups. 32 * This is necessary because we issue a final trim for a full free block group 33 * after forgetting it. When a block group becomes unused, instead of directly 34 * being added to the unused_bgs list, we add it to this first list. Then 35 * from there, if it becomes fully discarded, we place it onto the unused_bgs 36 * list. 37 * 38 * The in-memory free space cache serves as the backing state for discard. 39 * Consequently this means there is no persistence. We opt to load all the 40 * block groups in as not discarded, so the mount case degenerates to the 41 * crashing case. 42 * 43 * As the free space cache uses bitmaps, there exists a tradeoff between 44 * ease/efficiency for find_free_extent() and the accuracy of discard state. 45 * Here we opt to let untrimmed regions merge with everything while only letting 46 * trimmed regions merge with other trimmed regions. This can cause 47 * overtrimming, but the coalescing benefit seems to be worth it. Additionally, 48 * bitmap state is tracked as a whole. If we're able to fully trim a bitmap, 49 * the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in, 50 * this resets the state and we will retry trimming the whole bitmap. This is a 51 * tradeoff between discard state accuracy and the cost of accounting. 52 */ 53 54 /* This is an initial delay to give some chance for block reuse */ 55 #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC) 56 #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC) 57 58 /* Target completion latency of discarding all discardable extents */ 59 #define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC) 60 #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL) 61 #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL) 62 #define BTRFS_DISCARD_MAX_IOPS (10U) 63 64 /* Montonically decreasing minimum length filters after index 0 */ 65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = { 66 0, 67 BTRFS_ASYNC_DISCARD_MAX_FILTER, 68 BTRFS_ASYNC_DISCARD_MIN_FILTER 69 }; 70 71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, 72 struct btrfs_block_group *block_group) 73 { 74 return &discard_ctl->discard_list[block_group->discard_index]; 75 } 76 77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 78 struct btrfs_block_group *block_group) 79 { 80 if (!btrfs_run_discard_work(discard_ctl)) 81 return; 82 83 if (list_empty(&block_group->discard_list) || 84 block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { 85 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) 86 block_group->discard_index = BTRFS_DISCARD_INDEX_START; 87 block_group->discard_eligible_time = (ktime_get_ns() + 88 BTRFS_DISCARD_DELAY); 89 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 90 } 91 92 list_move_tail(&block_group->discard_list, 93 get_discard_list(discard_ctl, block_group)); 94 } 95 96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 97 struct btrfs_block_group *block_group) 98 { 99 if (!btrfs_is_block_group_data_only(block_group)) 100 return; 101 102 spin_lock(&discard_ctl->lock); 103 __add_to_discard_list(discard_ctl, block_group); 104 spin_unlock(&discard_ctl->lock); 105 } 106 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, 108 struct btrfs_block_group *block_group) 109 { 110 spin_lock(&discard_ctl->lock); 111 112 if (!btrfs_run_discard_work(discard_ctl)) { 113 spin_unlock(&discard_ctl->lock); 114 return; 115 } 116 117 list_del_init(&block_group->discard_list); 118 119 block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; 120 block_group->discard_eligible_time = (ktime_get_ns() + 121 BTRFS_DISCARD_UNUSED_DELAY); 122 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 123 list_add_tail(&block_group->discard_list, 124 &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); 125 126 spin_unlock(&discard_ctl->lock); 127 } 128 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, 130 struct btrfs_block_group *block_group) 131 { 132 bool running = false; 133 134 spin_lock(&discard_ctl->lock); 135 136 if (block_group == discard_ctl->block_group) { 137 running = true; 138 discard_ctl->block_group = NULL; 139 } 140 141 block_group->discard_eligible_time = 0; 142 list_del_init(&block_group->discard_list); 143 144 spin_unlock(&discard_ctl->lock); 145 146 return running; 147 } 148 149 /** 150 * find_next_block_group - find block_group that's up next for discarding 151 * @discard_ctl: discard control 152 * @now: current time 153 * 154 * Iterate over the discard lists to find the next block_group up for 155 * discarding checking the discard_eligible_time of block_group. 156 */ 157 static struct btrfs_block_group *find_next_block_group( 158 struct btrfs_discard_ctl *discard_ctl, 159 u64 now) 160 { 161 struct btrfs_block_group *ret_block_group = NULL, *block_group; 162 int i; 163 164 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 165 struct list_head *discard_list = &discard_ctl->discard_list[i]; 166 167 if (!list_empty(discard_list)) { 168 block_group = list_first_entry(discard_list, 169 struct btrfs_block_group, 170 discard_list); 171 172 if (!ret_block_group) 173 ret_block_group = block_group; 174 175 if (ret_block_group->discard_eligible_time < now) 176 break; 177 178 if (ret_block_group->discard_eligible_time > 179 block_group->discard_eligible_time) 180 ret_block_group = block_group; 181 } 182 } 183 184 return ret_block_group; 185 } 186 187 /** 188 * peek_discard_list - wrap find_next_block_group() 189 * @discard_ctl: discard control 190 * @discard_state: the discard_state of the block_group after state management 191 * @discard_index: the discard_index of the block_group after state management 192 * 193 * This wraps find_next_block_group() and sets the block_group to be in use. 194 * discard_state's control flow is managed here. Variables related to 195 * discard_state are reset here as needed (eg discard_cursor). @discard_state 196 * and @discard_index are remembered as it may change while we're discarding, 197 * but we want the discard to execute in the context determined here. 198 */ 199 static struct btrfs_block_group *peek_discard_list( 200 struct btrfs_discard_ctl *discard_ctl, 201 enum btrfs_discard_state *discard_state, 202 int *discard_index) 203 { 204 struct btrfs_block_group *block_group; 205 const u64 now = ktime_get_ns(); 206 207 spin_lock(&discard_ctl->lock); 208 again: 209 block_group = find_next_block_group(discard_ctl, now); 210 211 if (block_group && now > block_group->discard_eligible_time) { 212 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && 213 block_group->used != 0) { 214 if (btrfs_is_block_group_data_only(block_group)) 215 __add_to_discard_list(discard_ctl, block_group); 216 else 217 list_del_init(&block_group->discard_list); 218 goto again; 219 } 220 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) { 221 block_group->discard_cursor = block_group->start; 222 block_group->discard_state = BTRFS_DISCARD_EXTENTS; 223 } 224 discard_ctl->block_group = block_group; 225 *discard_state = block_group->discard_state; 226 *discard_index = block_group->discard_index; 227 } else { 228 block_group = NULL; 229 } 230 231 spin_unlock(&discard_ctl->lock); 232 233 return block_group; 234 } 235 236 /** 237 * btrfs_discard_check_filter - updates a block groups filters 238 * @block_group: block group of interest 239 * @bytes: recently freed region size after coalescing 240 * 241 * Async discard maintains multiple lists with progressively smaller filters 242 * to prioritize discarding based on size. Should a free space that matches 243 * a larger filter be returned to the free_space_cache, prioritize that discard 244 * by moving @block_group to the proper filter. 245 */ 246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group, 247 u64 bytes) 248 { 249 struct btrfs_discard_ctl *discard_ctl; 250 251 if (!block_group || 252 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 253 return; 254 255 discard_ctl = &block_group->fs_info->discard_ctl; 256 257 if (block_group->discard_index > BTRFS_DISCARD_INDEX_START && 258 bytes >= discard_minlen[block_group->discard_index - 1]) { 259 int i; 260 261 remove_from_discard_list(discard_ctl, block_group); 262 263 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS; 264 i++) { 265 if (bytes >= discard_minlen[i]) { 266 block_group->discard_index = i; 267 add_to_discard_list(discard_ctl, block_group); 268 break; 269 } 270 } 271 } 272 } 273 274 /** 275 * btrfs_update_discard_index - moves a block group along the discard lists 276 * @discard_ctl: discard control 277 * @block_group: block_group of interest 278 * 279 * Increment @block_group's discard_index. If it falls of the list, let it be. 280 * Otherwise add it back to the appropriate list. 281 */ 282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, 283 struct btrfs_block_group *block_group) 284 { 285 block_group->discard_index++; 286 if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) { 287 block_group->discard_index = 1; 288 return; 289 } 290 291 add_to_discard_list(discard_ctl, block_group); 292 } 293 294 /** 295 * btrfs_discard_cancel_work - remove a block_group from the discard lists 296 * @discard_ctl: discard control 297 * @block_group: block_group of interest 298 * 299 * This removes @block_group from the discard lists. If necessary, it waits on 300 * the current work and then reschedules the delayed work. 301 */ 302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, 303 struct btrfs_block_group *block_group) 304 { 305 if (remove_from_discard_list(discard_ctl, block_group)) { 306 cancel_delayed_work_sync(&discard_ctl->work); 307 btrfs_discard_schedule_work(discard_ctl, true); 308 } 309 } 310 311 /** 312 * btrfs_discard_queue_work - handles queuing the block_groups 313 * @discard_ctl: discard control 314 * @block_group: block_group of interest 315 * 316 * This maintains the LRU order of the discard lists. 317 */ 318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, 319 struct btrfs_block_group *block_group) 320 { 321 if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 322 return; 323 324 if (block_group->used == 0) 325 add_to_discard_unused_list(discard_ctl, block_group); 326 else 327 add_to_discard_list(discard_ctl, block_group); 328 329 if (!delayed_work_pending(&discard_ctl->work)) 330 btrfs_discard_schedule_work(discard_ctl, false); 331 } 332 333 /** 334 * btrfs_discard_schedule_work - responsible for scheduling the discard work 335 * @discard_ctl: discard control 336 * @override: override the current timer 337 * 338 * Discards are issued by a delayed workqueue item. @override is used to 339 * update the current delay as the baseline delay interval is reevaluated on 340 * transaction commit. This is also maxed with any other rate limit. 341 */ 342 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, 343 bool override) 344 { 345 struct btrfs_block_group *block_group; 346 const u64 now = ktime_get_ns(); 347 348 spin_lock(&discard_ctl->lock); 349 350 if (!btrfs_run_discard_work(discard_ctl)) 351 goto out; 352 353 if (!override && delayed_work_pending(&discard_ctl->work)) 354 goto out; 355 356 block_group = find_next_block_group(discard_ctl, now); 357 if (block_group) { 358 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC; 359 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit); 360 361 /* 362 * A single delayed workqueue item is responsible for 363 * discarding, so we can manage the bytes rate limit by keeping 364 * track of the previous discard. 365 */ 366 if (kbps_limit && discard_ctl->prev_discard) { 367 u64 bps_limit = ((u64)kbps_limit) * SZ_1K; 368 u64 bps_delay = div64_u64(discard_ctl->prev_discard * 369 NSEC_PER_SEC, bps_limit); 370 371 delay = max(delay, bps_delay); 372 } 373 374 /* 375 * This timeout is to hopefully prevent immediate discarding 376 * in a recently allocated block group. 377 */ 378 if (now < block_group->discard_eligible_time) { 379 u64 bg_timeout = block_group->discard_eligible_time - now; 380 381 delay = max(delay, bg_timeout); 382 } 383 384 if (override && discard_ctl->prev_discard) { 385 u64 elapsed = now - discard_ctl->prev_discard_time; 386 387 if (delay > elapsed) 388 delay -= elapsed; 389 else 390 delay = 0; 391 } 392 393 mod_delayed_work(discard_ctl->discard_workers, 394 &discard_ctl->work, nsecs_to_jiffies(delay)); 395 } 396 out: 397 spin_unlock(&discard_ctl->lock); 398 } 399 400 /** 401 * btrfs_finish_discard_pass - determine next step of a block_group 402 * @discard_ctl: discard control 403 * @block_group: block_group of interest 404 * 405 * This determines the next step for a block group after it's finished going 406 * through a pass on a discard list. If it is unused and fully trimmed, we can 407 * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto 408 * the appropriate filter list or let it fall off. 409 */ 410 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, 411 struct btrfs_block_group *block_group) 412 { 413 remove_from_discard_list(discard_ctl, block_group); 414 415 if (block_group->used == 0) { 416 if (btrfs_is_free_space_trimmed(block_group)) 417 btrfs_mark_bg_unused(block_group); 418 else 419 add_to_discard_unused_list(discard_ctl, block_group); 420 } else { 421 btrfs_update_discard_index(discard_ctl, block_group); 422 } 423 } 424 425 /** 426 * btrfs_discard_workfn - discard work function 427 * @work: work 428 * 429 * This finds the next block_group to start discarding and then discards a 430 * single region. It does this in a two-pass fashion: first extents and second 431 * bitmaps. Completely discarded block groups are sent to the unused_bgs path. 432 */ 433 static void btrfs_discard_workfn(struct work_struct *work) 434 { 435 struct btrfs_discard_ctl *discard_ctl; 436 struct btrfs_block_group *block_group; 437 enum btrfs_discard_state discard_state; 438 int discard_index = 0; 439 u64 trimmed = 0; 440 u64 minlen = 0; 441 442 discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); 443 444 block_group = peek_discard_list(discard_ctl, &discard_state, 445 &discard_index); 446 if (!block_group || !btrfs_run_discard_work(discard_ctl)) 447 return; 448 449 /* Perform discarding */ 450 minlen = discard_minlen[discard_index]; 451 452 if (discard_state == BTRFS_DISCARD_BITMAPS) { 453 u64 maxlen = 0; 454 455 /* 456 * Use the previous levels minimum discard length as the max 457 * length filter. In the case something is added to make a 458 * region go beyond the max filter, the entire bitmap is set 459 * back to BTRFS_TRIM_STATE_UNTRIMMED. 460 */ 461 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED) 462 maxlen = discard_minlen[discard_index - 1]; 463 464 btrfs_trim_block_group_bitmaps(block_group, &trimmed, 465 block_group->discard_cursor, 466 btrfs_block_group_end(block_group), 467 minlen, maxlen, true); 468 discard_ctl->discard_bitmap_bytes += trimmed; 469 } else { 470 btrfs_trim_block_group_extents(block_group, &trimmed, 471 block_group->discard_cursor, 472 btrfs_block_group_end(block_group), 473 minlen, true); 474 discard_ctl->discard_extent_bytes += trimmed; 475 } 476 477 /* 478 * Updated without locks as this is inside the workfn and nothing else 479 * is reading the values 480 */ 481 discard_ctl->prev_discard = trimmed; 482 discard_ctl->prev_discard_time = ktime_get_ns(); 483 484 /* Determine next steps for a block_group */ 485 if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { 486 if (discard_state == BTRFS_DISCARD_BITMAPS) { 487 btrfs_finish_discard_pass(discard_ctl, block_group); 488 } else { 489 block_group->discard_cursor = block_group->start; 490 spin_lock(&discard_ctl->lock); 491 if (block_group->discard_state != 492 BTRFS_DISCARD_RESET_CURSOR) 493 block_group->discard_state = 494 BTRFS_DISCARD_BITMAPS; 495 spin_unlock(&discard_ctl->lock); 496 } 497 } 498 499 spin_lock(&discard_ctl->lock); 500 discard_ctl->block_group = NULL; 501 spin_unlock(&discard_ctl->lock); 502 503 btrfs_discard_schedule_work(discard_ctl, false); 504 } 505 506 /** 507 * btrfs_run_discard_work - determines if async discard should be running 508 * @discard_ctl: discard control 509 * 510 * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. 511 */ 512 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) 513 { 514 struct btrfs_fs_info *fs_info = container_of(discard_ctl, 515 struct btrfs_fs_info, 516 discard_ctl); 517 518 return (!(fs_info->sb->s_flags & SB_RDONLY) && 519 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); 520 } 521 522 /** 523 * btrfs_discard_calc_delay - recalculate the base delay 524 * @discard_ctl: discard control 525 * 526 * Recalculate the base delay which is based off the total number of 527 * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms) 528 * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC). 529 */ 530 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) 531 { 532 s32 discardable_extents; 533 s64 discardable_bytes; 534 u32 iops_limit; 535 unsigned long delay; 536 537 discardable_extents = atomic_read(&discard_ctl->discardable_extents); 538 if (!discardable_extents) 539 return; 540 541 spin_lock(&discard_ctl->lock); 542 543 /* 544 * The following is to fix a potential -1 discrepenancy that we're not 545 * sure how to reproduce. But given that this is the only place that 546 * utilizes these numbers and this is only called by from 547 * btrfs_finish_extent_commit() which is synchronized, we can correct 548 * here. 549 */ 550 if (discardable_extents < 0) 551 atomic_add(-discardable_extents, 552 &discard_ctl->discardable_extents); 553 554 discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes); 555 if (discardable_bytes < 0) 556 atomic64_add(-discardable_bytes, 557 &discard_ctl->discardable_bytes); 558 559 if (discardable_extents <= 0) { 560 spin_unlock(&discard_ctl->lock); 561 return; 562 } 563 564 iops_limit = READ_ONCE(discard_ctl->iops_limit); 565 if (iops_limit) 566 delay = MSEC_PER_SEC / iops_limit; 567 else 568 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents; 569 570 delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC, 571 BTRFS_DISCARD_MAX_DELAY_MSEC); 572 discard_ctl->delay_ms = delay; 573 574 spin_unlock(&discard_ctl->lock); 575 } 576 577 /** 578 * btrfs_discard_update_discardable - propagate discard counters 579 * @block_group: block_group of interest 580 * 581 * This propagates deltas of counters up to the discard_ctl. It maintains a 582 * current counter and a previous counter passing the delta up to the global 583 * stat. Then the current counter value becomes the previous counter value. 584 */ 585 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group) 586 { 587 struct btrfs_free_space_ctl *ctl; 588 struct btrfs_discard_ctl *discard_ctl; 589 s32 extents_delta; 590 s64 bytes_delta; 591 592 if (!block_group || 593 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) || 594 !btrfs_is_block_group_data_only(block_group)) 595 return; 596 597 ctl = block_group->free_space_ctl; 598 discard_ctl = &block_group->fs_info->discard_ctl; 599 600 lockdep_assert_held(&ctl->tree_lock); 601 extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] - 602 ctl->discardable_extents[BTRFS_STAT_PREV]; 603 if (extents_delta) { 604 atomic_add(extents_delta, &discard_ctl->discardable_extents); 605 ctl->discardable_extents[BTRFS_STAT_PREV] = 606 ctl->discardable_extents[BTRFS_STAT_CURR]; 607 } 608 609 bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] - 610 ctl->discardable_bytes[BTRFS_STAT_PREV]; 611 if (bytes_delta) { 612 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes); 613 ctl->discardable_bytes[BTRFS_STAT_PREV] = 614 ctl->discardable_bytes[BTRFS_STAT_CURR]; 615 } 616 } 617 618 /** 619 * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists 620 * @fs_info: fs_info of interest 621 * 622 * The unused_bgs list needs to be punted to the discard lists because the 623 * order of operations is changed. In the normal sychronous discard path, the 624 * block groups are trimmed via a single large trim in transaction commit. This 625 * is ultimately what we are trying to avoid with asynchronous discard. Thus, 626 * it must be done before going down the unused_bgs path. 627 */ 628 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) 629 { 630 struct btrfs_block_group *block_group, *next; 631 632 spin_lock(&fs_info->unused_bgs_lock); 633 /* We enabled async discard, so punt all to the queue */ 634 list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, 635 bg_list) { 636 list_del_init(&block_group->bg_list); 637 btrfs_put_block_group(block_group); 638 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 639 } 640 spin_unlock(&fs_info->unused_bgs_lock); 641 } 642 643 /** 644 * btrfs_discard_purge_list - purge discard lists 645 * @discard_ctl: discard control 646 * 647 * If we are disabling async discard, we may have intercepted block groups that 648 * are completely free and ready for the unused_bgs path. As discarding will 649 * now happen in transaction commit or not at all, we can safely mark the 650 * corresponding block groups as unused and they will be sent on their merry 651 * way to the unused_bgs list. 652 */ 653 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) 654 { 655 struct btrfs_block_group *block_group, *next; 656 int i; 657 658 spin_lock(&discard_ctl->lock); 659 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 660 list_for_each_entry_safe(block_group, next, 661 &discard_ctl->discard_list[i], 662 discard_list) { 663 list_del_init(&block_group->discard_list); 664 spin_unlock(&discard_ctl->lock); 665 if (block_group->used == 0) 666 btrfs_mark_bg_unused(block_group); 667 spin_lock(&discard_ctl->lock); 668 } 669 } 670 spin_unlock(&discard_ctl->lock); 671 } 672 673 void btrfs_discard_resume(struct btrfs_fs_info *fs_info) 674 { 675 if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { 676 btrfs_discard_cleanup(fs_info); 677 return; 678 } 679 680 btrfs_discard_punt_unused_bgs_list(fs_info); 681 682 set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 683 } 684 685 void btrfs_discard_stop(struct btrfs_fs_info *fs_info) 686 { 687 clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 688 } 689 690 void btrfs_discard_init(struct btrfs_fs_info *fs_info) 691 { 692 struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl; 693 int i; 694 695 spin_lock_init(&discard_ctl->lock); 696 INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn); 697 698 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) 699 INIT_LIST_HEAD(&discard_ctl->discard_list[i]); 700 701 discard_ctl->prev_discard = 0; 702 discard_ctl->prev_discard_time = 0; 703 atomic_set(&discard_ctl->discardable_extents, 0); 704 atomic64_set(&discard_ctl->discardable_bytes, 0); 705 discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE; 706 discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC; 707 discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS; 708 discard_ctl->kbps_limit = 0; 709 discard_ctl->discard_extent_bytes = 0; 710 discard_ctl->discard_bitmap_bytes = 0; 711 atomic64_set(&discard_ctl->discard_bytes_saved, 0); 712 } 713 714 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) 715 { 716 btrfs_discard_stop(fs_info); 717 cancel_delayed_work_sync(&fs_info->discard_ctl.work); 718 btrfs_discard_purge_list(&fs_info->discard_ctl); 719 } 720