1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/gc.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/module.h> 10 #include <linux/backing-dev.h> 11 #include <linux/init.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/kthread.h> 14 #include <linux/delay.h> 15 #include <linux/freezer.h> 16 #include <linux/sched/signal.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include "segment.h" 21 #include "gc.h" 22 #include "iostat.h" 23 #include <trace/events/f2fs.h> 24 25 static struct kmem_cache *victim_entry_slab; 26 27 static unsigned int count_bits(const unsigned long *addr, 28 unsigned int offset, unsigned int len); 29 30 static int gc_thread_func(void *data) 31 { 32 struct f2fs_sb_info *sbi = data; 33 struct f2fs_gc_kthread *gc_th = sbi->gc_thread; 34 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; 35 wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq; 36 unsigned int wait_ms; 37 38 wait_ms = gc_th->min_sleep_time; 39 40 set_freezable(); 41 do { 42 bool sync_mode, foreground = false; 43 44 wait_event_interruptible_timeout(*wq, 45 kthread_should_stop() || freezing(current) || 46 waitqueue_active(fggc_wq) || 47 gc_th->gc_wake, 48 msecs_to_jiffies(wait_ms)); 49 50 if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) 51 foreground = true; 52 53 /* give it a try one time */ 54 if (gc_th->gc_wake) 55 gc_th->gc_wake = 0; 56 57 if (try_to_freeze()) { 58 stat_other_skip_bggc_count(sbi); 59 continue; 60 } 61 if (kthread_should_stop()) 62 break; 63 64 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { 65 increase_sleep_time(gc_th, &wait_ms); 66 stat_other_skip_bggc_count(sbi); 67 continue; 68 } 69 70 if (time_to_inject(sbi, FAULT_CHECKPOINT)) { 71 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); 72 f2fs_stop_checkpoint(sbi, false); 73 } 74 75 if (!sb_start_write_trylock(sbi->sb)) { 76 stat_other_skip_bggc_count(sbi); 77 continue; 78 } 79 80 /* 81 * [GC triggering condition] 82 * 0. GC is not conducted currently. 83 * 1. There are enough dirty segments. 84 * 2. IO subsystem is idle by checking the # of writeback pages. 85 * 3. IO subsystem is idle by checking the # of requests in 86 * bdev's request list. 87 * 88 * Note) We have to avoid triggering GCs frequently. 89 * Because it is possible that some segments can be 90 * invalidated soon after by user update or deletion. 91 * So, I'd like to wait some time to collect dirty segments. 92 */ 93 if (sbi->gc_mode == GC_URGENT_HIGH) { 94 wait_ms = gc_th->urgent_sleep_time; 95 down_write(&sbi->gc_lock); 96 goto do_gc; 97 } 98 99 if (foreground) { 100 down_write(&sbi->gc_lock); 101 goto do_gc; 102 } else if (!down_write_trylock(&sbi->gc_lock)) { 103 stat_other_skip_bggc_count(sbi); 104 goto next; 105 } 106 107 if (!is_idle(sbi, GC_TIME)) { 108 increase_sleep_time(gc_th, &wait_ms); 109 up_write(&sbi->gc_lock); 110 stat_io_skip_bggc_count(sbi); 111 goto next; 112 } 113 114 if (has_enough_invalid_blocks(sbi)) 115 decrease_sleep_time(gc_th, &wait_ms); 116 else 117 increase_sleep_time(gc_th, &wait_ms); 118 do_gc: 119 if (!foreground) 120 stat_inc_bggc_count(sbi->stat_info); 121 122 sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; 123 124 /* foreground GC was been triggered via f2fs_balance_fs() */ 125 if (foreground) 126 sync_mode = false; 127 128 /* if return value is not zero, no victim was selected */ 129 if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO)) 130 wait_ms = gc_th->no_gc_sleep_time; 131 132 if (foreground) 133 wake_up_all(&gc_th->fggc_wq); 134 135 trace_f2fs_background_gc(sbi->sb, wait_ms, 136 prefree_segments(sbi), free_segments(sbi)); 137 138 /* balancing f2fs's metadata periodically */ 139 f2fs_balance_fs_bg(sbi, true); 140 next: 141 sb_end_write(sbi->sb); 142 143 } while (!kthread_should_stop()); 144 return 0; 145 } 146 147 int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) 148 { 149 struct f2fs_gc_kthread *gc_th; 150 dev_t dev = sbi->sb->s_bdev->bd_dev; 151 int err = 0; 152 153 gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 154 if (!gc_th) { 155 err = -ENOMEM; 156 goto out; 157 } 158 159 gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; 160 gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; 161 gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; 162 gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; 163 164 gc_th->gc_wake = 0; 165 166 sbi->gc_thread = gc_th; 167 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); 168 init_waitqueue_head(&sbi->gc_thread->fggc_wq); 169 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, 170 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); 171 if (IS_ERR(gc_th->f2fs_gc_task)) { 172 err = PTR_ERR(gc_th->f2fs_gc_task); 173 kfree(gc_th); 174 sbi->gc_thread = NULL; 175 } 176 out: 177 return err; 178 } 179 180 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) 181 { 182 struct f2fs_gc_kthread *gc_th = sbi->gc_thread; 183 184 if (!gc_th) 185 return; 186 kthread_stop(gc_th->f2fs_gc_task); 187 wake_up_all(&gc_th->fggc_wq); 188 kfree(gc_th); 189 sbi->gc_thread = NULL; 190 } 191 192 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) 193 { 194 int gc_mode; 195 196 if (gc_type == BG_GC) { 197 if (sbi->am.atgc_enabled) 198 gc_mode = GC_AT; 199 else 200 gc_mode = GC_CB; 201 } else { 202 gc_mode = GC_GREEDY; 203 } 204 205 switch (sbi->gc_mode) { 206 case GC_IDLE_CB: 207 gc_mode = GC_CB; 208 break; 209 case GC_IDLE_GREEDY: 210 case GC_URGENT_HIGH: 211 gc_mode = GC_GREEDY; 212 break; 213 case GC_IDLE_AT: 214 gc_mode = GC_AT; 215 break; 216 } 217 218 return gc_mode; 219 } 220 221 static void select_policy(struct f2fs_sb_info *sbi, int gc_type, 222 int type, struct victim_sel_policy *p) 223 { 224 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 225 226 if (p->alloc_mode == SSR) { 227 p->gc_mode = GC_GREEDY; 228 p->dirty_bitmap = dirty_i->dirty_segmap[type]; 229 p->max_search = dirty_i->nr_dirty[type]; 230 p->ofs_unit = 1; 231 } else if (p->alloc_mode == AT_SSR) { 232 p->gc_mode = GC_GREEDY; 233 p->dirty_bitmap = dirty_i->dirty_segmap[type]; 234 p->max_search = dirty_i->nr_dirty[type]; 235 p->ofs_unit = 1; 236 } else { 237 p->gc_mode = select_gc_type(sbi, gc_type); 238 p->ofs_unit = sbi->segs_per_sec; 239 if (__is_large_section(sbi)) { 240 p->dirty_bitmap = dirty_i->dirty_secmap; 241 p->max_search = count_bits(p->dirty_bitmap, 242 0, MAIN_SECS(sbi)); 243 } else { 244 p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY]; 245 p->max_search = dirty_i->nr_dirty[DIRTY]; 246 } 247 } 248 249 /* 250 * adjust candidates range, should select all dirty segments for 251 * foreground GC and urgent GC cases. 252 */ 253 if (gc_type != FG_GC && 254 (sbi->gc_mode != GC_URGENT_HIGH) && 255 (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) && 256 p->max_search > sbi->max_victim_search) 257 p->max_search = sbi->max_victim_search; 258 259 /* let's select beginning hot/small space first in no_heap mode*/ 260 if (test_opt(sbi, NOHEAP) && 261 (type == CURSEG_HOT_DATA || IS_NODESEG(type))) 262 p->offset = 0; 263 else 264 p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; 265 } 266 267 static unsigned int get_max_cost(struct f2fs_sb_info *sbi, 268 struct victim_sel_policy *p) 269 { 270 /* SSR allocates in a segment unit */ 271 if (p->alloc_mode == SSR) 272 return sbi->blocks_per_seg; 273 else if (p->alloc_mode == AT_SSR) 274 return UINT_MAX; 275 276 /* LFS */ 277 if (p->gc_mode == GC_GREEDY) 278 return 2 * sbi->blocks_per_seg * p->ofs_unit; 279 else if (p->gc_mode == GC_CB) 280 return UINT_MAX; 281 else if (p->gc_mode == GC_AT) 282 return UINT_MAX; 283 else /* No other gc_mode */ 284 return 0; 285 } 286 287 static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 288 { 289 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 290 unsigned int secno; 291 292 /* 293 * If the gc_type is FG_GC, we can select victim segments 294 * selected by background GC before. 295 * Those segments guarantee they have small valid blocks. 296 */ 297 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { 298 if (sec_usage_check(sbi, secno)) 299 continue; 300 clear_bit(secno, dirty_i->victim_secmap); 301 return GET_SEG_FROM_SEC(sbi, secno); 302 } 303 return NULL_SEGNO; 304 } 305 306 static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) 307 { 308 struct sit_info *sit_i = SIT_I(sbi); 309 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 310 unsigned int start = GET_SEG_FROM_SEC(sbi, secno); 311 unsigned long long mtime = 0; 312 unsigned int vblocks; 313 unsigned char age = 0; 314 unsigned char u; 315 unsigned int i; 316 unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno); 317 318 for (i = 0; i < usable_segs_per_sec; i++) 319 mtime += get_seg_entry(sbi, start + i)->mtime; 320 vblocks = get_valid_blocks(sbi, segno, true); 321 322 mtime = div_u64(mtime, usable_segs_per_sec); 323 vblocks = div_u64(vblocks, usable_segs_per_sec); 324 325 u = (vblocks * 100) >> sbi->log_blocks_per_seg; 326 327 /* Handle if the system time has changed by the user */ 328 if (mtime < sit_i->min_mtime) 329 sit_i->min_mtime = mtime; 330 if (mtime > sit_i->max_mtime) 331 sit_i->max_mtime = mtime; 332 if (sit_i->max_mtime != sit_i->min_mtime) 333 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), 334 sit_i->max_mtime - sit_i->min_mtime); 335 336 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 337 } 338 339 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, 340 unsigned int segno, struct victim_sel_policy *p) 341 { 342 if (p->alloc_mode == SSR) 343 return get_seg_entry(sbi, segno)->ckpt_valid_blocks; 344 345 /* alloc_mode == LFS */ 346 if (p->gc_mode == GC_GREEDY) 347 return get_valid_blocks(sbi, segno, true); 348 else if (p->gc_mode == GC_CB) 349 return get_cb_cost(sbi, segno); 350 351 f2fs_bug_on(sbi, 1); 352 return 0; 353 } 354 355 static unsigned int count_bits(const unsigned long *addr, 356 unsigned int offset, unsigned int len) 357 { 358 unsigned int end = offset + len, sum = 0; 359 360 while (offset < end) { 361 if (test_bit(offset++, addr)) 362 ++sum; 363 } 364 return sum; 365 } 366 367 static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi, 368 unsigned long long mtime, unsigned int segno, 369 struct rb_node *parent, struct rb_node **p, 370 bool left_most) 371 { 372 struct atgc_management *am = &sbi->am; 373 struct victim_entry *ve; 374 375 ve = f2fs_kmem_cache_alloc(victim_entry_slab, 376 GFP_NOFS, true, NULL); 377 378 ve->mtime = mtime; 379 ve->segno = segno; 380 381 rb_link_node(&ve->rb_node, parent, p); 382 rb_insert_color_cached(&ve->rb_node, &am->root, left_most); 383 384 list_add_tail(&ve->list, &am->victim_list); 385 386 am->victim_count++; 387 388 return ve; 389 } 390 391 static void insert_victim_entry(struct f2fs_sb_info *sbi, 392 unsigned long long mtime, unsigned int segno) 393 { 394 struct atgc_management *am = &sbi->am; 395 struct rb_node **p; 396 struct rb_node *parent = NULL; 397 bool left_most = true; 398 399 p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most); 400 attach_victim_entry(sbi, mtime, segno, parent, p, left_most); 401 } 402 403 static void add_victim_entry(struct f2fs_sb_info *sbi, 404 struct victim_sel_policy *p, unsigned int segno) 405 { 406 struct sit_info *sit_i = SIT_I(sbi); 407 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 408 unsigned int start = GET_SEG_FROM_SEC(sbi, secno); 409 unsigned long long mtime = 0; 410 unsigned int i; 411 412 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 413 if (p->gc_mode == GC_AT && 414 get_valid_blocks(sbi, segno, true) == 0) 415 return; 416 } 417 418 for (i = 0; i < sbi->segs_per_sec; i++) 419 mtime += get_seg_entry(sbi, start + i)->mtime; 420 mtime = div_u64(mtime, sbi->segs_per_sec); 421 422 /* Handle if the system time has changed by the user */ 423 if (mtime < sit_i->min_mtime) 424 sit_i->min_mtime = mtime; 425 if (mtime > sit_i->max_mtime) 426 sit_i->max_mtime = mtime; 427 if (mtime < sit_i->dirty_min_mtime) 428 sit_i->dirty_min_mtime = mtime; 429 if (mtime > sit_i->dirty_max_mtime) 430 sit_i->dirty_max_mtime = mtime; 431 432 /* don't choose young section as candidate */ 433 if (sit_i->dirty_max_mtime - mtime < p->age_threshold) 434 return; 435 436 insert_victim_entry(sbi, mtime, segno); 437 } 438 439 static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi, 440 struct victim_sel_policy *p) 441 { 442 struct atgc_management *am = &sbi->am; 443 struct rb_node *parent = NULL; 444 bool left_most; 445 446 f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most); 447 448 return parent; 449 } 450 451 static void atgc_lookup_victim(struct f2fs_sb_info *sbi, 452 struct victim_sel_policy *p) 453 { 454 struct sit_info *sit_i = SIT_I(sbi); 455 struct atgc_management *am = &sbi->am; 456 struct rb_root_cached *root = &am->root; 457 struct rb_node *node; 458 struct rb_entry *re; 459 struct victim_entry *ve; 460 unsigned long long total_time; 461 unsigned long long age, u, accu; 462 unsigned long long max_mtime = sit_i->dirty_max_mtime; 463 unsigned long long min_mtime = sit_i->dirty_min_mtime; 464 unsigned int sec_blocks = BLKS_PER_SEC(sbi); 465 unsigned int vblocks; 466 unsigned int dirty_threshold = max(am->max_candidate_count, 467 am->candidate_ratio * 468 am->victim_count / 100); 469 unsigned int age_weight = am->age_weight; 470 unsigned int cost; 471 unsigned int iter = 0; 472 473 if (max_mtime < min_mtime) 474 return; 475 476 max_mtime += 1; 477 total_time = max_mtime - min_mtime; 478 479 accu = div64_u64(ULLONG_MAX, total_time); 480 accu = min_t(unsigned long long, div_u64(accu, 100), 481 DEFAULT_ACCURACY_CLASS); 482 483 node = rb_first_cached(root); 484 next: 485 re = rb_entry_safe(node, struct rb_entry, rb_node); 486 if (!re) 487 return; 488 489 ve = (struct victim_entry *)re; 490 491 if (ve->mtime >= max_mtime || ve->mtime < min_mtime) 492 goto skip; 493 494 /* age = 10000 * x% * 60 */ 495 age = div64_u64(accu * (max_mtime - ve->mtime), total_time) * 496 age_weight; 497 498 vblocks = get_valid_blocks(sbi, ve->segno, true); 499 f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks); 500 501 /* u = 10000 * x% * 40 */ 502 u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) * 503 (100 - age_weight); 504 505 f2fs_bug_on(sbi, age + u >= UINT_MAX); 506 507 cost = UINT_MAX - (age + u); 508 iter++; 509 510 if (cost < p->min_cost || 511 (cost == p->min_cost && age > p->oldest_age)) { 512 p->min_cost = cost; 513 p->oldest_age = age; 514 p->min_segno = ve->segno; 515 } 516 skip: 517 if (iter < dirty_threshold) { 518 node = rb_next(node); 519 goto next; 520 } 521 } 522 523 /* 524 * select candidates around source section in range of 525 * [target - dirty_threshold, target + dirty_threshold] 526 */ 527 static void atssr_lookup_victim(struct f2fs_sb_info *sbi, 528 struct victim_sel_policy *p) 529 { 530 struct sit_info *sit_i = SIT_I(sbi); 531 struct atgc_management *am = &sbi->am; 532 struct rb_node *node; 533 struct rb_entry *re; 534 struct victim_entry *ve; 535 unsigned long long age; 536 unsigned long long max_mtime = sit_i->dirty_max_mtime; 537 unsigned long long min_mtime = sit_i->dirty_min_mtime; 538 unsigned int seg_blocks = sbi->blocks_per_seg; 539 unsigned int vblocks; 540 unsigned int dirty_threshold = max(am->max_candidate_count, 541 am->candidate_ratio * 542 am->victim_count / 100); 543 unsigned int cost; 544 unsigned int iter = 0; 545 int stage = 0; 546 547 if (max_mtime < min_mtime) 548 return; 549 max_mtime += 1; 550 next_stage: 551 node = lookup_central_victim(sbi, p); 552 next_node: 553 re = rb_entry_safe(node, struct rb_entry, rb_node); 554 if (!re) { 555 if (stage == 0) 556 goto skip_stage; 557 return; 558 } 559 560 ve = (struct victim_entry *)re; 561 562 if (ve->mtime >= max_mtime || ve->mtime < min_mtime) 563 goto skip_node; 564 565 age = max_mtime - ve->mtime; 566 567 vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks; 568 f2fs_bug_on(sbi, !vblocks); 569 570 /* rare case */ 571 if (vblocks == seg_blocks) 572 goto skip_node; 573 574 iter++; 575 576 age = max_mtime - abs(p->age - age); 577 cost = UINT_MAX - vblocks; 578 579 if (cost < p->min_cost || 580 (cost == p->min_cost && age > p->oldest_age)) { 581 p->min_cost = cost; 582 p->oldest_age = age; 583 p->min_segno = ve->segno; 584 } 585 skip_node: 586 if (iter < dirty_threshold) { 587 if (stage == 0) 588 node = rb_prev(node); 589 else if (stage == 1) 590 node = rb_next(node); 591 goto next_node; 592 } 593 skip_stage: 594 if (stage < 1) { 595 stage++; 596 iter = 0; 597 goto next_stage; 598 } 599 } 600 static void lookup_victim_by_age(struct f2fs_sb_info *sbi, 601 struct victim_sel_policy *p) 602 { 603 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 604 &sbi->am.root, true)); 605 606 if (p->gc_mode == GC_AT) 607 atgc_lookup_victim(sbi, p); 608 else if (p->alloc_mode == AT_SSR) 609 atssr_lookup_victim(sbi, p); 610 else 611 f2fs_bug_on(sbi, 1); 612 } 613 614 static void release_victim_entry(struct f2fs_sb_info *sbi) 615 { 616 struct atgc_management *am = &sbi->am; 617 struct victim_entry *ve, *tmp; 618 619 list_for_each_entry_safe(ve, tmp, &am->victim_list, list) { 620 list_del(&ve->list); 621 kmem_cache_free(victim_entry_slab, ve); 622 am->victim_count--; 623 } 624 625 am->root = RB_ROOT_CACHED; 626 627 f2fs_bug_on(sbi, am->victim_count); 628 f2fs_bug_on(sbi, !list_empty(&am->victim_list)); 629 } 630 631 /* 632 * This function is called from two paths. 633 * One is garbage collection and the other is SSR segment selection. 634 * When it is called during GC, it just gets a victim segment 635 * and it does not remove it from dirty seglist. 636 * When it is called from SSR segment selection, it finds a segment 637 * which has minimum valid blocks and removes it from dirty seglist. 638 */ 639 static int get_victim_by_default(struct f2fs_sb_info *sbi, 640 unsigned int *result, int gc_type, int type, 641 char alloc_mode, unsigned long long age) 642 { 643 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 644 struct sit_info *sm = SIT_I(sbi); 645 struct victim_sel_policy p; 646 unsigned int secno, last_victim; 647 unsigned int last_segment; 648 unsigned int nsearched; 649 bool is_atgc; 650 int ret = 0; 651 652 mutex_lock(&dirty_i->seglist_lock); 653 last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; 654 655 p.alloc_mode = alloc_mode; 656 p.age = age; 657 p.age_threshold = sbi->am.age_threshold; 658 659 retry: 660 select_policy(sbi, gc_type, type, &p); 661 p.min_segno = NULL_SEGNO; 662 p.oldest_age = 0; 663 p.min_cost = get_max_cost(sbi, &p); 664 665 is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR); 666 nsearched = 0; 667 668 if (is_atgc) 669 SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX; 670 671 if (*result != NULL_SEGNO) { 672 if (!get_valid_blocks(sbi, *result, false)) { 673 ret = -ENODATA; 674 goto out; 675 } 676 677 if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) 678 ret = -EBUSY; 679 else 680 p.min_segno = *result; 681 goto out; 682 } 683 684 ret = -ENODATA; 685 if (p.max_search == 0) 686 goto out; 687 688 if (__is_large_section(sbi) && p.alloc_mode == LFS) { 689 if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { 690 p.min_segno = sbi->next_victim_seg[BG_GC]; 691 *result = p.min_segno; 692 sbi->next_victim_seg[BG_GC] = NULL_SEGNO; 693 goto got_result; 694 } 695 if (gc_type == FG_GC && 696 sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { 697 p.min_segno = sbi->next_victim_seg[FG_GC]; 698 *result = p.min_segno; 699 sbi->next_victim_seg[FG_GC] = NULL_SEGNO; 700 goto got_result; 701 } 702 } 703 704 last_victim = sm->last_victim[p.gc_mode]; 705 if (p.alloc_mode == LFS && gc_type == FG_GC) { 706 p.min_segno = check_bg_victims(sbi); 707 if (p.min_segno != NULL_SEGNO) 708 goto got_it; 709 } 710 711 while (1) { 712 unsigned long cost, *dirty_bitmap; 713 unsigned int unit_no, segno; 714 715 dirty_bitmap = p.dirty_bitmap; 716 unit_no = find_next_bit(dirty_bitmap, 717 last_segment / p.ofs_unit, 718 p.offset / p.ofs_unit); 719 segno = unit_no * p.ofs_unit; 720 if (segno >= last_segment) { 721 if (sm->last_victim[p.gc_mode]) { 722 last_segment = 723 sm->last_victim[p.gc_mode]; 724 sm->last_victim[p.gc_mode] = 0; 725 p.offset = 0; 726 continue; 727 } 728 break; 729 } 730 731 p.offset = segno + p.ofs_unit; 732 nsearched++; 733 734 #ifdef CONFIG_F2FS_CHECK_FS 735 /* 736 * skip selecting the invalid segno (that is failed due to block 737 * validity check failure during GC) to avoid endless GC loop in 738 * such cases. 739 */ 740 if (test_bit(segno, sm->invalid_segmap)) 741 goto next; 742 #endif 743 744 secno = GET_SEC_FROM_SEG(sbi, segno); 745 746 if (sec_usage_check(sbi, secno)) 747 goto next; 748 749 /* Don't touch checkpointed data */ 750 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 751 if (p.alloc_mode == LFS) { 752 /* 753 * LFS is set to find source section during GC. 754 * The victim should have no checkpointed data. 755 */ 756 if (get_ckpt_valid_blocks(sbi, segno, true)) 757 goto next; 758 } else { 759 /* 760 * SSR | AT_SSR are set to find target segment 761 * for writes which can be full by checkpointed 762 * and newly written blocks. 763 */ 764 if (!f2fs_segment_has_free_slot(sbi, segno)) 765 goto next; 766 } 767 } 768 769 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) 770 goto next; 771 772 if (is_atgc) { 773 add_victim_entry(sbi, &p, segno); 774 goto next; 775 } 776 777 cost = get_gc_cost(sbi, segno, &p); 778 779 if (p.min_cost > cost) { 780 p.min_segno = segno; 781 p.min_cost = cost; 782 } 783 next: 784 if (nsearched >= p.max_search) { 785 if (!sm->last_victim[p.gc_mode] && segno <= last_victim) 786 sm->last_victim[p.gc_mode] = 787 last_victim + p.ofs_unit; 788 else 789 sm->last_victim[p.gc_mode] = segno + p.ofs_unit; 790 sm->last_victim[p.gc_mode] %= 791 (MAIN_SECS(sbi) * sbi->segs_per_sec); 792 break; 793 } 794 } 795 796 /* get victim for GC_AT/AT_SSR */ 797 if (is_atgc) { 798 lookup_victim_by_age(sbi, &p); 799 release_victim_entry(sbi); 800 } 801 802 if (is_atgc && p.min_segno == NULL_SEGNO && 803 sm->elapsed_time < p.age_threshold) { 804 p.age_threshold = 0; 805 goto retry; 806 } 807 808 if (p.min_segno != NULL_SEGNO) { 809 got_it: 810 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; 811 got_result: 812 if (p.alloc_mode == LFS) { 813 secno = GET_SEC_FROM_SEG(sbi, p.min_segno); 814 if (gc_type == FG_GC) 815 sbi->cur_victim_sec = secno; 816 else 817 set_bit(secno, dirty_i->victim_secmap); 818 } 819 ret = 0; 820 821 } 822 out: 823 if (p.min_segno != NULL_SEGNO) 824 trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, 825 sbi->cur_victim_sec, 826 prefree_segments(sbi), free_segments(sbi)); 827 mutex_unlock(&dirty_i->seglist_lock); 828 829 return ret; 830 } 831 832 static const struct victim_selection default_v_ops = { 833 .get_victim = get_victim_by_default, 834 }; 835 836 static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) 837 { 838 struct inode_entry *ie; 839 840 ie = radix_tree_lookup(&gc_list->iroot, ino); 841 if (ie) 842 return ie->inode; 843 return NULL; 844 } 845 846 static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) 847 { 848 struct inode_entry *new_ie; 849 850 if (inode == find_gc_inode(gc_list, inode->i_ino)) { 851 iput(inode); 852 return; 853 } 854 new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, 855 GFP_NOFS, true, NULL); 856 new_ie->inode = inode; 857 858 f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); 859 list_add_tail(&new_ie->list, &gc_list->ilist); 860 } 861 862 static void put_gc_inode(struct gc_inode_list *gc_list) 863 { 864 struct inode_entry *ie, *next_ie; 865 866 list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { 867 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); 868 iput(ie->inode); 869 list_del(&ie->list); 870 kmem_cache_free(f2fs_inode_entry_slab, ie); 871 } 872 } 873 874 static int check_valid_map(struct f2fs_sb_info *sbi, 875 unsigned int segno, int offset) 876 { 877 struct sit_info *sit_i = SIT_I(sbi); 878 struct seg_entry *sentry; 879 int ret; 880 881 down_read(&sit_i->sentry_lock); 882 sentry = get_seg_entry(sbi, segno); 883 ret = f2fs_test_bit(offset, sentry->cur_valid_map); 884 up_read(&sit_i->sentry_lock); 885 return ret; 886 } 887 888 /* 889 * This function compares node address got in summary with that in NAT. 890 * On validity, copy that node with cold status, otherwise (invalid node) 891 * ignore that. 892 */ 893 static int gc_node_segment(struct f2fs_sb_info *sbi, 894 struct f2fs_summary *sum, unsigned int segno, int gc_type) 895 { 896 struct f2fs_summary *entry; 897 block_t start_addr; 898 int off; 899 int phase = 0; 900 bool fggc = (gc_type == FG_GC); 901 int submitted = 0; 902 unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno); 903 904 start_addr = START_BLOCK(sbi, segno); 905 906 next_step: 907 entry = sum; 908 909 if (fggc && phase == 2) 910 atomic_inc(&sbi->wb_sync_req[NODE]); 911 912 for (off = 0; off < usable_blks_in_seg; off++, entry++) { 913 nid_t nid = le32_to_cpu(entry->nid); 914 struct page *node_page; 915 struct node_info ni; 916 int err; 917 918 /* stop BG_GC if there is not enough free sections. */ 919 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) 920 return submitted; 921 922 if (check_valid_map(sbi, segno, off) == 0) 923 continue; 924 925 if (phase == 0) { 926 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, 927 META_NAT, true); 928 continue; 929 } 930 931 if (phase == 1) { 932 f2fs_ra_node_page(sbi, nid); 933 continue; 934 } 935 936 /* phase == 2 */ 937 node_page = f2fs_get_node_page(sbi, nid); 938 if (IS_ERR(node_page)) 939 continue; 940 941 /* block may become invalid during f2fs_get_node_page */ 942 if (check_valid_map(sbi, segno, off) == 0) { 943 f2fs_put_page(node_page, 1); 944 continue; 945 } 946 947 if (f2fs_get_node_info(sbi, nid, &ni)) { 948 f2fs_put_page(node_page, 1); 949 continue; 950 } 951 952 if (ni.blk_addr != start_addr + off) { 953 f2fs_put_page(node_page, 1); 954 continue; 955 } 956 957 err = f2fs_move_node_page(node_page, gc_type); 958 if (!err && gc_type == FG_GC) 959 submitted++; 960 stat_inc_node_blk_count(sbi, 1, gc_type); 961 } 962 963 if (++phase < 3) 964 goto next_step; 965 966 if (fggc) 967 atomic_dec(&sbi->wb_sync_req[NODE]); 968 return submitted; 969 } 970 971 /* 972 * Calculate start block index indicating the given node offset. 973 * Be careful, caller should give this node offset only indicating direct node 974 * blocks. If any node offsets, which point the other types of node blocks such 975 * as indirect or double indirect node blocks, are given, it must be a caller's 976 * bug. 977 */ 978 block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) 979 { 980 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; 981 unsigned int bidx; 982 983 if (node_ofs == 0) 984 return 0; 985 986 if (node_ofs <= 2) { 987 bidx = node_ofs - 1; 988 } else if (node_ofs <= indirect_blks) { 989 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); 990 991 bidx = node_ofs - 2 - dec; 992 } else { 993 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); 994 995 bidx = node_ofs - 5 - dec; 996 } 997 return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode); 998 } 999 1000 static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 1001 struct node_info *dni, block_t blkaddr, unsigned int *nofs) 1002 { 1003 struct page *node_page; 1004 nid_t nid; 1005 unsigned int ofs_in_node; 1006 block_t source_blkaddr; 1007 1008 nid = le32_to_cpu(sum->nid); 1009 ofs_in_node = le16_to_cpu(sum->ofs_in_node); 1010 1011 node_page = f2fs_get_node_page(sbi, nid); 1012 if (IS_ERR(node_page)) 1013 return false; 1014 1015 if (f2fs_get_node_info(sbi, nid, dni)) { 1016 f2fs_put_page(node_page, 1); 1017 return false; 1018 } 1019 1020 if (sum->version != dni->version) { 1021 f2fs_warn(sbi, "%s: valid data with mismatched node version.", 1022 __func__); 1023 set_sbi_flag(sbi, SBI_NEED_FSCK); 1024 } 1025 1026 *nofs = ofs_of_node(node_page); 1027 source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); 1028 f2fs_put_page(node_page, 1); 1029 1030 if (source_blkaddr != blkaddr) { 1031 #ifdef CONFIG_F2FS_CHECK_FS 1032 unsigned int segno = GET_SEGNO(sbi, blkaddr); 1033 unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 1034 1035 if (unlikely(check_valid_map(sbi, segno, offset))) { 1036 if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) { 1037 f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u", 1038 blkaddr, source_blkaddr, segno); 1039 f2fs_bug_on(sbi, 1); 1040 } 1041 } 1042 #endif 1043 return false; 1044 } 1045 return true; 1046 } 1047 1048 static int ra_data_block(struct inode *inode, pgoff_t index) 1049 { 1050 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1051 struct address_space *mapping = inode->i_mapping; 1052 struct dnode_of_data dn; 1053 struct page *page; 1054 struct extent_info ei = {0, 0, 0}; 1055 struct f2fs_io_info fio = { 1056 .sbi = sbi, 1057 .ino = inode->i_ino, 1058 .type = DATA, 1059 .temp = COLD, 1060 .op = REQ_OP_READ, 1061 .op_flags = 0, 1062 .encrypted_page = NULL, 1063 .in_list = false, 1064 .retry = false, 1065 }; 1066 int err; 1067 1068 page = f2fs_grab_cache_page(mapping, index, true); 1069 if (!page) 1070 return -ENOMEM; 1071 1072 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 1073 dn.data_blkaddr = ei.blk + index - ei.fofs; 1074 if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, 1075 DATA_GENERIC_ENHANCE_READ))) { 1076 err = -EFSCORRUPTED; 1077 goto put_page; 1078 } 1079 goto got_it; 1080 } 1081 1082 set_new_dnode(&dn, inode, NULL, NULL, 0); 1083 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 1084 if (err) 1085 goto put_page; 1086 f2fs_put_dnode(&dn); 1087 1088 if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { 1089 err = -ENOENT; 1090 goto put_page; 1091 } 1092 if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, 1093 DATA_GENERIC_ENHANCE))) { 1094 err = -EFSCORRUPTED; 1095 goto put_page; 1096 } 1097 got_it: 1098 /* read page */ 1099 fio.page = page; 1100 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; 1101 1102 /* 1103 * don't cache encrypted data into meta inode until previous dirty 1104 * data were writebacked to avoid racing between GC and flush. 1105 */ 1106 f2fs_wait_on_page_writeback(page, DATA, true, true); 1107 1108 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 1109 1110 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), 1111 dn.data_blkaddr, 1112 FGP_LOCK | FGP_CREAT, GFP_NOFS); 1113 if (!fio.encrypted_page) { 1114 err = -ENOMEM; 1115 goto put_page; 1116 } 1117 1118 err = f2fs_submit_page_bio(&fio); 1119 if (err) 1120 goto put_encrypted_page; 1121 f2fs_put_page(fio.encrypted_page, 0); 1122 f2fs_put_page(page, 1); 1123 1124 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); 1125 f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); 1126 1127 return 0; 1128 put_encrypted_page: 1129 f2fs_put_page(fio.encrypted_page, 1); 1130 put_page: 1131 f2fs_put_page(page, 1); 1132 return err; 1133 } 1134 1135 /* 1136 * Move data block via META_MAPPING while keeping locked data page. 1137 * This can be used to move blocks, aka LBAs, directly on disk. 1138 */ 1139 static int move_data_block(struct inode *inode, block_t bidx, 1140 int gc_type, unsigned int segno, int off) 1141 { 1142 struct f2fs_io_info fio = { 1143 .sbi = F2FS_I_SB(inode), 1144 .ino = inode->i_ino, 1145 .type = DATA, 1146 .temp = COLD, 1147 .op = REQ_OP_READ, 1148 .op_flags = 0, 1149 .encrypted_page = NULL, 1150 .in_list = false, 1151 .retry = false, 1152 }; 1153 struct dnode_of_data dn; 1154 struct f2fs_summary sum; 1155 struct node_info ni; 1156 struct page *page, *mpage; 1157 block_t newaddr; 1158 int err = 0; 1159 bool lfs_mode = f2fs_lfs_mode(fio.sbi); 1160 int type = fio.sbi->am.atgc_enabled && (gc_type == BG_GC) && 1161 (fio.sbi->gc_mode != GC_URGENT_HIGH) ? 1162 CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; 1163 1164 /* do not read out */ 1165 page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); 1166 if (!page) 1167 return -ENOMEM; 1168 1169 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { 1170 err = -ENOENT; 1171 goto out; 1172 } 1173 1174 if (f2fs_is_atomic_file(inode)) { 1175 F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; 1176 F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; 1177 err = -EAGAIN; 1178 goto out; 1179 } 1180 1181 if (f2fs_is_pinned_file(inode)) { 1182 f2fs_pin_file_control(inode, true); 1183 err = -EAGAIN; 1184 goto out; 1185 } 1186 1187 set_new_dnode(&dn, inode, NULL, NULL, 0); 1188 err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 1189 if (err) 1190 goto out; 1191 1192 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 1193 ClearPageUptodate(page); 1194 err = -ENOENT; 1195 goto put_out; 1196 } 1197 1198 /* 1199 * don't cache encrypted data into meta inode until previous dirty 1200 * data were writebacked to avoid racing between GC and flush. 1201 */ 1202 f2fs_wait_on_page_writeback(page, DATA, true, true); 1203 1204 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 1205 1206 err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); 1207 if (err) 1208 goto put_out; 1209 1210 /* read page */ 1211 fio.page = page; 1212 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; 1213 1214 if (lfs_mode) 1215 down_write(&fio.sbi->io_order_lock); 1216 1217 mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), 1218 fio.old_blkaddr, false); 1219 if (!mpage) { 1220 err = -ENOMEM; 1221 goto up_out; 1222 } 1223 1224 fio.encrypted_page = mpage; 1225 1226 /* read source block in mpage */ 1227 if (!PageUptodate(mpage)) { 1228 err = f2fs_submit_page_bio(&fio); 1229 if (err) { 1230 f2fs_put_page(mpage, 1); 1231 goto up_out; 1232 } 1233 1234 f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); 1235 f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); 1236 1237 lock_page(mpage); 1238 if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || 1239 !PageUptodate(mpage))) { 1240 err = -EIO; 1241 f2fs_put_page(mpage, 1); 1242 goto up_out; 1243 } 1244 } 1245 1246 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); 1247 1248 /* allocate block address */ 1249 f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, 1250 &sum, type, NULL); 1251 1252 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), 1253 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); 1254 if (!fio.encrypted_page) { 1255 err = -ENOMEM; 1256 f2fs_put_page(mpage, 1); 1257 goto recover_block; 1258 } 1259 1260 /* write target block */ 1261 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); 1262 memcpy(page_address(fio.encrypted_page), 1263 page_address(mpage), PAGE_SIZE); 1264 f2fs_put_page(mpage, 1); 1265 invalidate_mapping_pages(META_MAPPING(fio.sbi), 1266 fio.old_blkaddr, fio.old_blkaddr); 1267 f2fs_invalidate_compress_page(fio.sbi, fio.old_blkaddr); 1268 1269 set_page_dirty(fio.encrypted_page); 1270 if (clear_page_dirty_for_io(fio.encrypted_page)) 1271 dec_page_count(fio.sbi, F2FS_DIRTY_META); 1272 1273 set_page_writeback(fio.encrypted_page); 1274 ClearPageError(page); 1275 1276 fio.op = REQ_OP_WRITE; 1277 fio.op_flags = REQ_SYNC; 1278 fio.new_blkaddr = newaddr; 1279 f2fs_submit_page_write(&fio); 1280 if (fio.retry) { 1281 err = -EAGAIN; 1282 if (PageWriteback(fio.encrypted_page)) 1283 end_page_writeback(fio.encrypted_page); 1284 goto put_page_out; 1285 } 1286 1287 f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); 1288 1289 f2fs_update_data_blkaddr(&dn, newaddr); 1290 set_inode_flag(inode, FI_APPEND_WRITE); 1291 if (page->index == 0) 1292 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 1293 put_page_out: 1294 f2fs_put_page(fio.encrypted_page, 1); 1295 recover_block: 1296 if (err) 1297 f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, 1298 true, true, true); 1299 up_out: 1300 if (lfs_mode) 1301 up_write(&fio.sbi->io_order_lock); 1302 put_out: 1303 f2fs_put_dnode(&dn); 1304 out: 1305 f2fs_put_page(page, 1); 1306 return err; 1307 } 1308 1309 static int move_data_page(struct inode *inode, block_t bidx, int gc_type, 1310 unsigned int segno, int off) 1311 { 1312 struct page *page; 1313 int err = 0; 1314 1315 page = f2fs_get_lock_data_page(inode, bidx, true); 1316 if (IS_ERR(page)) 1317 return PTR_ERR(page); 1318 1319 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { 1320 err = -ENOENT; 1321 goto out; 1322 } 1323 1324 if (f2fs_is_atomic_file(inode)) { 1325 F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; 1326 F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; 1327 err = -EAGAIN; 1328 goto out; 1329 } 1330 if (f2fs_is_pinned_file(inode)) { 1331 if (gc_type == FG_GC) 1332 f2fs_pin_file_control(inode, true); 1333 err = -EAGAIN; 1334 goto out; 1335 } 1336 1337 if (gc_type == BG_GC) { 1338 if (PageWriteback(page)) { 1339 err = -EAGAIN; 1340 goto out; 1341 } 1342 set_page_dirty(page); 1343 set_page_private_gcing(page); 1344 } else { 1345 struct f2fs_io_info fio = { 1346 .sbi = F2FS_I_SB(inode), 1347 .ino = inode->i_ino, 1348 .type = DATA, 1349 .temp = COLD, 1350 .op = REQ_OP_WRITE, 1351 .op_flags = REQ_SYNC, 1352 .old_blkaddr = NULL_ADDR, 1353 .page = page, 1354 .encrypted_page = NULL, 1355 .need_lock = LOCK_REQ, 1356 .io_type = FS_GC_DATA_IO, 1357 }; 1358 bool is_dirty = PageDirty(page); 1359 1360 retry: 1361 f2fs_wait_on_page_writeback(page, DATA, true, true); 1362 1363 set_page_dirty(page); 1364 if (clear_page_dirty_for_io(page)) { 1365 inode_dec_dirty_pages(inode); 1366 f2fs_remove_dirty_inode(inode); 1367 } 1368 1369 set_page_private_gcing(page); 1370 1371 err = f2fs_do_write_data_page(&fio); 1372 if (err) { 1373 clear_page_private_gcing(page); 1374 if (err == -ENOMEM) { 1375 congestion_wait(BLK_RW_ASYNC, 1376 DEFAULT_IO_TIMEOUT); 1377 goto retry; 1378 } 1379 if (is_dirty) 1380 set_page_dirty(page); 1381 } 1382 } 1383 out: 1384 f2fs_put_page(page, 1); 1385 return err; 1386 } 1387 1388 /* 1389 * This function tries to get parent node of victim data block, and identifies 1390 * data block validity. If the block is valid, copy that with cold status and 1391 * modify parent node. 1392 * If the parent node is not valid or the data block address is different, 1393 * the victim data block is ignored. 1394 */ 1395 static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 1396 struct gc_inode_list *gc_list, unsigned int segno, int gc_type, 1397 bool force_migrate) 1398 { 1399 struct super_block *sb = sbi->sb; 1400 struct f2fs_summary *entry; 1401 block_t start_addr; 1402 int off; 1403 int phase = 0; 1404 int submitted = 0; 1405 unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno); 1406 1407 start_addr = START_BLOCK(sbi, segno); 1408 1409 next_step: 1410 entry = sum; 1411 1412 for (off = 0; off < usable_blks_in_seg; off++, entry++) { 1413 struct page *data_page; 1414 struct inode *inode; 1415 struct node_info dni; /* dnode info for the data */ 1416 unsigned int ofs_in_node, nofs; 1417 block_t start_bidx; 1418 nid_t nid = le32_to_cpu(entry->nid); 1419 1420 /* 1421 * stop BG_GC if there is not enough free sections. 1422 * Or, stop GC if the segment becomes fully valid caused by 1423 * race condition along with SSR block allocation. 1424 */ 1425 if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || 1426 (!force_migrate && get_valid_blocks(sbi, segno, true) == 1427 BLKS_PER_SEC(sbi))) 1428 return submitted; 1429 1430 if (check_valid_map(sbi, segno, off) == 0) 1431 continue; 1432 1433 if (phase == 0) { 1434 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, 1435 META_NAT, true); 1436 continue; 1437 } 1438 1439 if (phase == 1) { 1440 f2fs_ra_node_page(sbi, nid); 1441 continue; 1442 } 1443 1444 /* Get an inode by ino with checking validity */ 1445 if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) 1446 continue; 1447 1448 if (phase == 2) { 1449 f2fs_ra_node_page(sbi, dni.ino); 1450 continue; 1451 } 1452 1453 ofs_in_node = le16_to_cpu(entry->ofs_in_node); 1454 1455 if (phase == 3) { 1456 inode = f2fs_iget(sb, dni.ino); 1457 if (IS_ERR(inode) || is_bad_inode(inode)) 1458 continue; 1459 1460 if (!down_write_trylock( 1461 &F2FS_I(inode)->i_gc_rwsem[WRITE])) { 1462 iput(inode); 1463 sbi->skipped_gc_rwsem++; 1464 continue; 1465 } 1466 1467 start_bidx = f2fs_start_bidx_of_node(nofs, inode) + 1468 ofs_in_node; 1469 1470 if (f2fs_post_read_required(inode)) { 1471 int err = ra_data_block(inode, start_bidx); 1472 1473 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1474 if (err) { 1475 iput(inode); 1476 continue; 1477 } 1478 add_gc_inode(gc_list, inode); 1479 continue; 1480 } 1481 1482 data_page = f2fs_get_read_data_page(inode, 1483 start_bidx, REQ_RAHEAD, true); 1484 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1485 if (IS_ERR(data_page)) { 1486 iput(inode); 1487 continue; 1488 } 1489 1490 f2fs_put_page(data_page, 0); 1491 add_gc_inode(gc_list, inode); 1492 continue; 1493 } 1494 1495 /* phase 4 */ 1496 inode = find_gc_inode(gc_list, dni.ino); 1497 if (inode) { 1498 struct f2fs_inode_info *fi = F2FS_I(inode); 1499 bool locked = false; 1500 int err; 1501 1502 if (S_ISREG(inode->i_mode)) { 1503 if (!down_write_trylock(&fi->i_gc_rwsem[READ])) { 1504 sbi->skipped_gc_rwsem++; 1505 continue; 1506 } 1507 if (!down_write_trylock( 1508 &fi->i_gc_rwsem[WRITE])) { 1509 sbi->skipped_gc_rwsem++; 1510 up_write(&fi->i_gc_rwsem[READ]); 1511 continue; 1512 } 1513 locked = true; 1514 1515 /* wait for all inflight aio data */ 1516 inode_dio_wait(inode); 1517 } 1518 1519 start_bidx = f2fs_start_bidx_of_node(nofs, inode) 1520 + ofs_in_node; 1521 if (f2fs_post_read_required(inode)) 1522 err = move_data_block(inode, start_bidx, 1523 gc_type, segno, off); 1524 else 1525 err = move_data_page(inode, start_bidx, gc_type, 1526 segno, off); 1527 1528 if (!err && (gc_type == FG_GC || 1529 f2fs_post_read_required(inode))) 1530 submitted++; 1531 1532 if (locked) { 1533 up_write(&fi->i_gc_rwsem[WRITE]); 1534 up_write(&fi->i_gc_rwsem[READ]); 1535 } 1536 1537 stat_inc_data_blk_count(sbi, 1, gc_type); 1538 } 1539 } 1540 1541 if (++phase < 5) 1542 goto next_step; 1543 1544 return submitted; 1545 } 1546 1547 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 1548 int gc_type) 1549 { 1550 struct sit_info *sit_i = SIT_I(sbi); 1551 int ret; 1552 1553 down_write(&sit_i->sentry_lock); 1554 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, 1555 NO_CHECK_TYPE, LFS, 0); 1556 up_write(&sit_i->sentry_lock); 1557 return ret; 1558 } 1559 1560 static int do_garbage_collect(struct f2fs_sb_info *sbi, 1561 unsigned int start_segno, 1562 struct gc_inode_list *gc_list, int gc_type, 1563 bool force_migrate) 1564 { 1565 struct page *sum_page; 1566 struct f2fs_summary_block *sum; 1567 struct blk_plug plug; 1568 unsigned int segno = start_segno; 1569 unsigned int end_segno = start_segno + sbi->segs_per_sec; 1570 int seg_freed = 0, migrated = 0; 1571 unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? 1572 SUM_TYPE_DATA : SUM_TYPE_NODE; 1573 int submitted = 0; 1574 1575 if (__is_large_section(sbi)) 1576 end_segno = rounddown(end_segno, sbi->segs_per_sec); 1577 1578 /* 1579 * zone-capacity can be less than zone-size in zoned devices, 1580 * resulting in less than expected usable segments in the zone, 1581 * calculate the end segno in the zone which can be garbage collected 1582 */ 1583 if (f2fs_sb_has_blkzoned(sbi)) 1584 end_segno -= sbi->segs_per_sec - 1585 f2fs_usable_segs_in_sec(sbi, segno); 1586 1587 sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); 1588 1589 /* readahead multi ssa blocks those have contiguous address */ 1590 if (__is_large_section(sbi)) 1591 f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), 1592 end_segno - segno, META_SSA, true); 1593 1594 /* reference all summary page */ 1595 while (segno < end_segno) { 1596 sum_page = f2fs_get_sum_page(sbi, segno++); 1597 if (IS_ERR(sum_page)) { 1598 int err = PTR_ERR(sum_page); 1599 1600 end_segno = segno - 1; 1601 for (segno = start_segno; segno < end_segno; segno++) { 1602 sum_page = find_get_page(META_MAPPING(sbi), 1603 GET_SUM_BLOCK(sbi, segno)); 1604 f2fs_put_page(sum_page, 0); 1605 f2fs_put_page(sum_page, 0); 1606 } 1607 return err; 1608 } 1609 unlock_page(sum_page); 1610 } 1611 1612 blk_start_plug(&plug); 1613 1614 for (segno = start_segno; segno < end_segno; segno++) { 1615 1616 /* find segment summary of victim */ 1617 sum_page = find_get_page(META_MAPPING(sbi), 1618 GET_SUM_BLOCK(sbi, segno)); 1619 f2fs_put_page(sum_page, 0); 1620 1621 if (get_valid_blocks(sbi, segno, false) == 0) 1622 goto freed; 1623 if (gc_type == BG_GC && __is_large_section(sbi) && 1624 migrated >= sbi->migration_granularity) 1625 goto skip; 1626 if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) 1627 goto skip; 1628 1629 sum = page_address(sum_page); 1630 if (type != GET_SUM_TYPE((&sum->footer))) { 1631 f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", 1632 segno, type, GET_SUM_TYPE((&sum->footer))); 1633 set_sbi_flag(sbi, SBI_NEED_FSCK); 1634 f2fs_stop_checkpoint(sbi, false); 1635 goto skip; 1636 } 1637 1638 /* 1639 * this is to avoid deadlock: 1640 * - lock_page(sum_page) - f2fs_replace_block 1641 * - check_valid_map() - down_write(sentry_lock) 1642 * - down_read(sentry_lock) - change_curseg() 1643 * - lock_page(sum_page) 1644 */ 1645 if (type == SUM_TYPE_NODE) 1646 submitted += gc_node_segment(sbi, sum->entries, segno, 1647 gc_type); 1648 else 1649 submitted += gc_data_segment(sbi, sum->entries, gc_list, 1650 segno, gc_type, 1651 force_migrate); 1652 1653 stat_inc_seg_count(sbi, type, gc_type); 1654 sbi->gc_reclaimed_segs[sbi->gc_mode]++; 1655 migrated++; 1656 1657 freed: 1658 if (gc_type == FG_GC && 1659 get_valid_blocks(sbi, segno, false) == 0) 1660 seg_freed++; 1661 1662 if (__is_large_section(sbi) && segno + 1 < end_segno) 1663 sbi->next_victim_seg[gc_type] = segno + 1; 1664 skip: 1665 f2fs_put_page(sum_page, 0); 1666 } 1667 1668 if (submitted) 1669 f2fs_submit_merged_write(sbi, 1670 (type == SUM_TYPE_NODE) ? NODE : DATA); 1671 1672 blk_finish_plug(&plug); 1673 1674 stat_inc_call_count(sbi->stat_info); 1675 1676 return seg_freed; 1677 } 1678 1679 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, 1680 bool background, bool force, unsigned int segno) 1681 { 1682 int gc_type = sync ? FG_GC : BG_GC; 1683 int sec_freed = 0, seg_freed = 0, total_freed = 0; 1684 int ret = 0; 1685 struct cp_control cpc; 1686 unsigned int init_segno = segno; 1687 struct gc_inode_list gc_list = { 1688 .ilist = LIST_HEAD_INIT(gc_list.ilist), 1689 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), 1690 }; 1691 unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC]; 1692 unsigned long long first_skipped; 1693 unsigned int skipped_round = 0, round = 0; 1694 1695 trace_f2fs_gc_begin(sbi->sb, sync, background, 1696 get_pages(sbi, F2FS_DIRTY_NODES), 1697 get_pages(sbi, F2FS_DIRTY_DENTS), 1698 get_pages(sbi, F2FS_DIRTY_IMETA), 1699 free_sections(sbi), 1700 free_segments(sbi), 1701 reserved_segments(sbi), 1702 prefree_segments(sbi)); 1703 1704 cpc.reason = __get_cp_reason(sbi); 1705 sbi->skipped_gc_rwsem = 0; 1706 first_skipped = last_skipped; 1707 gc_more: 1708 if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) { 1709 ret = -EINVAL; 1710 goto stop; 1711 } 1712 if (unlikely(f2fs_cp_error(sbi))) { 1713 ret = -EIO; 1714 goto stop; 1715 } 1716 1717 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) { 1718 /* 1719 * For example, if there are many prefree_segments below given 1720 * threshold, we can make them free by checkpoint. Then, we 1721 * secure free segments which doesn't need fggc any more. 1722 */ 1723 if (prefree_segments(sbi) && 1724 !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 1725 ret = f2fs_write_checkpoint(sbi, &cpc); 1726 if (ret) 1727 goto stop; 1728 } 1729 if (has_not_enough_free_secs(sbi, 0, 0)) 1730 gc_type = FG_GC; 1731 } 1732 1733 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ 1734 if (gc_type == BG_GC && !background) { 1735 ret = -EINVAL; 1736 goto stop; 1737 } 1738 ret = __get_victim(sbi, &segno, gc_type); 1739 if (ret) 1740 goto stop; 1741 1742 seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force); 1743 if (gc_type == FG_GC && 1744 seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) 1745 sec_freed++; 1746 total_freed += seg_freed; 1747 1748 if (gc_type == FG_GC) { 1749 if (sbi->skipped_atomic_files[FG_GC] > last_skipped || 1750 sbi->skipped_gc_rwsem) 1751 skipped_round++; 1752 last_skipped = sbi->skipped_atomic_files[FG_GC]; 1753 round++; 1754 } 1755 1756 if (gc_type == FG_GC) 1757 sbi->cur_victim_sec = NULL_SEGNO; 1758 1759 if (sync) 1760 goto stop; 1761 1762 if (has_not_enough_free_secs(sbi, sec_freed, 0)) { 1763 if (skipped_round <= MAX_SKIP_GC_COUNT || 1764 skipped_round * 2 < round) { 1765 segno = NULL_SEGNO; 1766 goto gc_more; 1767 } 1768 1769 if (first_skipped < last_skipped && 1770 (last_skipped - first_skipped) > 1771 sbi->skipped_gc_rwsem) { 1772 f2fs_drop_inmem_pages_all(sbi, true); 1773 segno = NULL_SEGNO; 1774 goto gc_more; 1775 } 1776 if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 1777 ret = f2fs_write_checkpoint(sbi, &cpc); 1778 } 1779 stop: 1780 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; 1781 SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; 1782 1783 trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed, 1784 get_pages(sbi, F2FS_DIRTY_NODES), 1785 get_pages(sbi, F2FS_DIRTY_DENTS), 1786 get_pages(sbi, F2FS_DIRTY_IMETA), 1787 free_sections(sbi), 1788 free_segments(sbi), 1789 reserved_segments(sbi), 1790 prefree_segments(sbi)); 1791 1792 up_write(&sbi->gc_lock); 1793 1794 put_gc_inode(&gc_list); 1795 1796 if (sync && !ret) 1797 ret = sec_freed ? 0 : -EAGAIN; 1798 return ret; 1799 } 1800 1801 int __init f2fs_create_garbage_collection_cache(void) 1802 { 1803 victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry", 1804 sizeof(struct victim_entry)); 1805 if (!victim_entry_slab) 1806 return -ENOMEM; 1807 return 0; 1808 } 1809 1810 void f2fs_destroy_garbage_collection_cache(void) 1811 { 1812 kmem_cache_destroy(victim_entry_slab); 1813 } 1814 1815 static void init_atgc_management(struct f2fs_sb_info *sbi) 1816 { 1817 struct atgc_management *am = &sbi->am; 1818 1819 if (test_opt(sbi, ATGC) && 1820 SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD) 1821 am->atgc_enabled = true; 1822 1823 am->root = RB_ROOT_CACHED; 1824 INIT_LIST_HEAD(&am->victim_list); 1825 am->victim_count = 0; 1826 1827 am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO; 1828 am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT; 1829 am->age_weight = DEF_GC_THREAD_AGE_WEIGHT; 1830 am->age_threshold = DEF_GC_THREAD_AGE_THRESHOLD; 1831 } 1832 1833 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) 1834 { 1835 DIRTY_I(sbi)->v_ops = &default_v_ops; 1836 1837 sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; 1838 1839 /* give warm/cold data area from slower device */ 1840 if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi)) 1841 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 1842 GET_SEGNO(sbi, FDEV(0).end_blk) + 1; 1843 1844 init_atgc_management(sbi); 1845 } 1846 1847 static int free_segment_range(struct f2fs_sb_info *sbi, 1848 unsigned int secs, bool gc_only) 1849 { 1850 unsigned int segno, next_inuse, start, end; 1851 struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; 1852 int gc_mode, gc_type; 1853 int err = 0; 1854 int type; 1855 1856 /* Force block allocation for GC */ 1857 MAIN_SECS(sbi) -= secs; 1858 start = MAIN_SECS(sbi) * sbi->segs_per_sec; 1859 end = MAIN_SEGS(sbi) - 1; 1860 1861 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 1862 for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) 1863 if (SIT_I(sbi)->last_victim[gc_mode] >= start) 1864 SIT_I(sbi)->last_victim[gc_mode] = 0; 1865 1866 for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) 1867 if (sbi->next_victim_seg[gc_type] >= start) 1868 sbi->next_victim_seg[gc_type] = NULL_SEGNO; 1869 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 1870 1871 /* Move out cursegs from the target range */ 1872 for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) 1873 f2fs_allocate_segment_for_resize(sbi, type, start, end); 1874 1875 /* do GC to move out valid blocks in the range */ 1876 for (segno = start; segno <= end; segno += sbi->segs_per_sec) { 1877 struct gc_inode_list gc_list = { 1878 .ilist = LIST_HEAD_INIT(gc_list.ilist), 1879 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), 1880 }; 1881 1882 do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); 1883 put_gc_inode(&gc_list); 1884 1885 if (!gc_only && get_valid_blocks(sbi, segno, true)) { 1886 err = -EAGAIN; 1887 goto out; 1888 } 1889 if (fatal_signal_pending(current)) { 1890 err = -ERESTARTSYS; 1891 goto out; 1892 } 1893 } 1894 if (gc_only) 1895 goto out; 1896 1897 err = f2fs_write_checkpoint(sbi, &cpc); 1898 if (err) 1899 goto out; 1900 1901 next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); 1902 if (next_inuse <= end) { 1903 f2fs_err(sbi, "segno %u should be free but still inuse!", 1904 next_inuse); 1905 f2fs_bug_on(sbi, 1); 1906 } 1907 out: 1908 MAIN_SECS(sbi) += secs; 1909 return err; 1910 } 1911 1912 static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) 1913 { 1914 struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); 1915 int section_count; 1916 int segment_count; 1917 int segment_count_main; 1918 long long block_count; 1919 int segs = secs * sbi->segs_per_sec; 1920 1921 down_write(&sbi->sb_lock); 1922 1923 section_count = le32_to_cpu(raw_sb->section_count); 1924 segment_count = le32_to_cpu(raw_sb->segment_count); 1925 segment_count_main = le32_to_cpu(raw_sb->segment_count_main); 1926 block_count = le64_to_cpu(raw_sb->block_count); 1927 1928 raw_sb->section_count = cpu_to_le32(section_count + secs); 1929 raw_sb->segment_count = cpu_to_le32(segment_count + segs); 1930 raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); 1931 raw_sb->block_count = cpu_to_le64(block_count + 1932 (long long)segs * sbi->blocks_per_seg); 1933 if (f2fs_is_multi_device(sbi)) { 1934 int last_dev = sbi->s_ndevs - 1; 1935 int dev_segs = 1936 le32_to_cpu(raw_sb->devs[last_dev].total_segments); 1937 1938 raw_sb->devs[last_dev].total_segments = 1939 cpu_to_le32(dev_segs + segs); 1940 } 1941 1942 up_write(&sbi->sb_lock); 1943 } 1944 1945 static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) 1946 { 1947 int segs = secs * sbi->segs_per_sec; 1948 long long blks = (long long)segs * sbi->blocks_per_seg; 1949 long long user_block_count = 1950 le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); 1951 1952 SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; 1953 MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; 1954 MAIN_SECS(sbi) += secs; 1955 FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; 1956 FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; 1957 F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); 1958 1959 if (f2fs_is_multi_device(sbi)) { 1960 int last_dev = sbi->s_ndevs - 1; 1961 1962 FDEV(last_dev).total_segments = 1963 (int)FDEV(last_dev).total_segments + segs; 1964 FDEV(last_dev).end_blk = 1965 (long long)FDEV(last_dev).end_blk + blks; 1966 #ifdef CONFIG_BLK_DEV_ZONED 1967 FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + 1968 (int)(blks >> sbi->log_blocks_per_blkz); 1969 #endif 1970 } 1971 } 1972 1973 int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) 1974 { 1975 __u64 old_block_count, shrunk_blocks; 1976 struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; 1977 unsigned int secs; 1978 int err = 0; 1979 __u32 rem; 1980 1981 old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); 1982 if (block_count > old_block_count) 1983 return -EINVAL; 1984 1985 if (f2fs_is_multi_device(sbi)) { 1986 int last_dev = sbi->s_ndevs - 1; 1987 __u64 last_segs = FDEV(last_dev).total_segments; 1988 1989 if (block_count + last_segs * sbi->blocks_per_seg <= 1990 old_block_count) 1991 return -EINVAL; 1992 } 1993 1994 /* new fs size should align to section size */ 1995 div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); 1996 if (rem) 1997 return -EINVAL; 1998 1999 if (block_count == old_block_count) 2000 return 0; 2001 2002 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 2003 f2fs_err(sbi, "Should run fsck to repair first."); 2004 return -EFSCORRUPTED; 2005 } 2006 2007 if (test_opt(sbi, DISABLE_CHECKPOINT)) { 2008 f2fs_err(sbi, "Checkpoint should be enabled."); 2009 return -EINVAL; 2010 } 2011 2012 shrunk_blocks = old_block_count - block_count; 2013 secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); 2014 2015 /* stop other GC */ 2016 if (!down_write_trylock(&sbi->gc_lock)) 2017 return -EAGAIN; 2018 2019 /* stop CP to protect MAIN_SEC in free_segment_range */ 2020 f2fs_lock_op(sbi); 2021 2022 spin_lock(&sbi->stat_lock); 2023 if (shrunk_blocks + valid_user_blocks(sbi) + 2024 sbi->current_reserved_blocks + sbi->unusable_block_count + 2025 F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) 2026 err = -ENOSPC; 2027 spin_unlock(&sbi->stat_lock); 2028 2029 if (err) 2030 goto out_unlock; 2031 2032 err = free_segment_range(sbi, secs, true); 2033 2034 out_unlock: 2035 f2fs_unlock_op(sbi); 2036 up_write(&sbi->gc_lock); 2037 if (err) 2038 return err; 2039 2040 set_sbi_flag(sbi, SBI_IS_RESIZEFS); 2041 2042 freeze_super(sbi->sb); 2043 down_write(&sbi->gc_lock); 2044 down_write(&sbi->cp_global_sem); 2045 2046 spin_lock(&sbi->stat_lock); 2047 if (shrunk_blocks + valid_user_blocks(sbi) + 2048 sbi->current_reserved_blocks + sbi->unusable_block_count + 2049 F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) 2050 err = -ENOSPC; 2051 else 2052 sbi->user_block_count -= shrunk_blocks; 2053 spin_unlock(&sbi->stat_lock); 2054 if (err) 2055 goto out_err; 2056 2057 err = free_segment_range(sbi, secs, false); 2058 if (err) 2059 goto recover_out; 2060 2061 update_sb_metadata(sbi, -secs); 2062 2063 err = f2fs_commit_super(sbi, false); 2064 if (err) { 2065 update_sb_metadata(sbi, secs); 2066 goto recover_out; 2067 } 2068 2069 update_fs_metadata(sbi, -secs); 2070 clear_sbi_flag(sbi, SBI_IS_RESIZEFS); 2071 set_sbi_flag(sbi, SBI_IS_DIRTY); 2072 2073 err = f2fs_write_checkpoint(sbi, &cpc); 2074 if (err) { 2075 update_fs_metadata(sbi, secs); 2076 update_sb_metadata(sbi, secs); 2077 f2fs_commit_super(sbi, false); 2078 } 2079 recover_out: 2080 if (err) { 2081 set_sbi_flag(sbi, SBI_NEED_FSCK); 2082 f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); 2083 2084 spin_lock(&sbi->stat_lock); 2085 sbi->user_block_count += shrunk_blocks; 2086 spin_unlock(&sbi->stat_lock); 2087 } 2088 out_err: 2089 up_write(&sbi->cp_global_sem); 2090 up_write(&sbi->gc_lock); 2091 thaw_super(sbi->sb); 2092 clear_sbi_flag(sbi, SBI_IS_RESIZEFS); 2093 return err; 2094 } 2095