1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/gc.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/module.h> 10 #include <linux/backing-dev.h> 11 #include <linux/init.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/kthread.h> 14 #include <linux/delay.h> 15 #include <linux/freezer.h> 16 17 #include "f2fs.h" 18 #include "node.h" 19 #include "segment.h" 20 #include "gc.h" 21 #include <trace/events/f2fs.h> 22 23 static int gc_thread_func(void *data) 24 { 25 struct f2fs_sb_info *sbi = data; 26 struct f2fs_gc_kthread *gc_th = sbi->gc_thread; 27 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; 28 unsigned int wait_ms; 29 30 wait_ms = gc_th->min_sleep_time; 31 32 set_freezable(); 33 do { 34 bool sync_mode; 35 36 wait_event_interruptible_timeout(*wq, 37 kthread_should_stop() || freezing(current) || 38 gc_th->gc_wake, 39 msecs_to_jiffies(wait_ms)); 40 41 /* give it a try one time */ 42 if (gc_th->gc_wake) 43 gc_th->gc_wake = 0; 44 45 if (try_to_freeze()) { 46 stat_other_skip_bggc_count(sbi); 47 continue; 48 } 49 if (kthread_should_stop()) 50 break; 51 52 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { 53 increase_sleep_time(gc_th, &wait_ms); 54 stat_other_skip_bggc_count(sbi); 55 continue; 56 } 57 58 if (time_to_inject(sbi, FAULT_CHECKPOINT)) { 59 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); 60 f2fs_stop_checkpoint(sbi, false); 61 } 62 63 if (!sb_start_write_trylock(sbi->sb)) { 64 stat_other_skip_bggc_count(sbi); 65 continue; 66 } 67 68 /* 69 * [GC triggering condition] 70 * 0. GC is not conducted currently. 71 * 1. There are enough dirty segments. 72 * 2. IO subsystem is idle by checking the # of writeback pages. 73 * 3. IO subsystem is idle by checking the # of requests in 74 * bdev's request list. 75 * 76 * Note) We have to avoid triggering GCs frequently. 77 * Because it is possible that some segments can be 78 * invalidated soon after by user update or deletion. 79 * So, I'd like to wait some time to collect dirty segments. 80 */ 81 if (sbi->gc_mode == GC_URGENT) { 82 wait_ms = gc_th->urgent_sleep_time; 83 down_write(&sbi->gc_lock); 84 goto do_gc; 85 } 86 87 if (!down_write_trylock(&sbi->gc_lock)) { 88 stat_other_skip_bggc_count(sbi); 89 goto next; 90 } 91 92 if (!is_idle(sbi, GC_TIME)) { 93 increase_sleep_time(gc_th, &wait_ms); 94 up_write(&sbi->gc_lock); 95 stat_io_skip_bggc_count(sbi); 96 goto next; 97 } 98 99 if (has_enough_invalid_blocks(sbi)) 100 decrease_sleep_time(gc_th, &wait_ms); 101 else 102 increase_sleep_time(gc_th, &wait_ms); 103 do_gc: 104 stat_inc_bggc_count(sbi->stat_info); 105 106 sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; 107 108 /* if return value is not zero, no victim was selected */ 109 if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO)) 110 wait_ms = gc_th->no_gc_sleep_time; 111 112 trace_f2fs_background_gc(sbi->sb, wait_ms, 113 prefree_segments(sbi), free_segments(sbi)); 114 115 /* balancing f2fs's metadata periodically */ 116 f2fs_balance_fs_bg(sbi, true); 117 next: 118 sb_end_write(sbi->sb); 119 120 } while (!kthread_should_stop()); 121 return 0; 122 } 123 124 int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) 125 { 126 struct f2fs_gc_kthread *gc_th; 127 dev_t dev = sbi->sb->s_bdev->bd_dev; 128 int err = 0; 129 130 gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 131 if (!gc_th) { 132 err = -ENOMEM; 133 goto out; 134 } 135 136 gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; 137 gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; 138 gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; 139 gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; 140 141 gc_th->gc_wake= 0; 142 143 sbi->gc_thread = gc_th; 144 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); 145 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, 146 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); 147 if (IS_ERR(gc_th->f2fs_gc_task)) { 148 err = PTR_ERR(gc_th->f2fs_gc_task); 149 kvfree(gc_th); 150 sbi->gc_thread = NULL; 151 } 152 out: 153 return err; 154 } 155 156 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) 157 { 158 struct f2fs_gc_kthread *gc_th = sbi->gc_thread; 159 if (!gc_th) 160 return; 161 kthread_stop(gc_th->f2fs_gc_task); 162 kvfree(gc_th); 163 sbi->gc_thread = NULL; 164 } 165 166 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) 167 { 168 int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; 169 170 switch (sbi->gc_mode) { 171 case GC_IDLE_CB: 172 gc_mode = GC_CB; 173 break; 174 case GC_IDLE_GREEDY: 175 case GC_URGENT: 176 gc_mode = GC_GREEDY; 177 break; 178 } 179 return gc_mode; 180 } 181 182 static void select_policy(struct f2fs_sb_info *sbi, int gc_type, 183 int type, struct victim_sel_policy *p) 184 { 185 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 186 187 if (p->alloc_mode == SSR) { 188 p->gc_mode = GC_GREEDY; 189 p->dirty_segmap = dirty_i->dirty_segmap[type]; 190 p->max_search = dirty_i->nr_dirty[type]; 191 p->ofs_unit = 1; 192 } else { 193 p->gc_mode = select_gc_type(sbi, gc_type); 194 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; 195 p->max_search = dirty_i->nr_dirty[DIRTY]; 196 p->ofs_unit = sbi->segs_per_sec; 197 } 198 199 /* 200 * adjust candidates range, should select all dirty segments for 201 * foreground GC and urgent GC cases. 202 */ 203 if (gc_type != FG_GC && 204 (sbi->gc_mode != GC_URGENT) && 205 p->max_search > sbi->max_victim_search) 206 p->max_search = sbi->max_victim_search; 207 208 /* let's select beginning hot/small space first in no_heap mode*/ 209 if (test_opt(sbi, NOHEAP) && 210 (type == CURSEG_HOT_DATA || IS_NODESEG(type))) 211 p->offset = 0; 212 else 213 p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; 214 } 215 216 static unsigned int get_max_cost(struct f2fs_sb_info *sbi, 217 struct victim_sel_policy *p) 218 { 219 /* SSR allocates in a segment unit */ 220 if (p->alloc_mode == SSR) 221 return sbi->blocks_per_seg; 222 if (p->gc_mode == GC_GREEDY) 223 return 2 * sbi->blocks_per_seg * p->ofs_unit; 224 else if (p->gc_mode == GC_CB) 225 return UINT_MAX; 226 else /* No other gc_mode */ 227 return 0; 228 } 229 230 static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 231 { 232 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 233 unsigned int secno; 234 235 /* 236 * If the gc_type is FG_GC, we can select victim segments 237 * selected by background GC before. 238 * Those segments guarantee they have small valid blocks. 239 */ 240 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { 241 if (sec_usage_check(sbi, secno)) 242 continue; 243 clear_bit(secno, dirty_i->victim_secmap); 244 return GET_SEG_FROM_SEC(sbi, secno); 245 } 246 return NULL_SEGNO; 247 } 248 249 static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) 250 { 251 struct sit_info *sit_i = SIT_I(sbi); 252 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 253 unsigned int start = GET_SEG_FROM_SEC(sbi, secno); 254 unsigned long long mtime = 0; 255 unsigned int vblocks; 256 unsigned char age = 0; 257 unsigned char u; 258 unsigned int i; 259 260 for (i = 0; i < sbi->segs_per_sec; i++) 261 mtime += get_seg_entry(sbi, start + i)->mtime; 262 vblocks = get_valid_blocks(sbi, segno, true); 263 264 mtime = div_u64(mtime, sbi->segs_per_sec); 265 vblocks = div_u64(vblocks, sbi->segs_per_sec); 266 267 u = (vblocks * 100) >> sbi->log_blocks_per_seg; 268 269 /* Handle if the system time has changed by the user */ 270 if (mtime < sit_i->min_mtime) 271 sit_i->min_mtime = mtime; 272 if (mtime > sit_i->max_mtime) 273 sit_i->max_mtime = mtime; 274 if (sit_i->max_mtime != sit_i->min_mtime) 275 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), 276 sit_i->max_mtime - sit_i->min_mtime); 277 278 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 279 } 280 281 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, 282 unsigned int segno, struct victim_sel_policy *p) 283 { 284 if (p->alloc_mode == SSR) 285 return get_seg_entry(sbi, segno)->ckpt_valid_blocks; 286 287 /* alloc_mode == LFS */ 288 if (p->gc_mode == GC_GREEDY) 289 return get_valid_blocks(sbi, segno, true); 290 else 291 return get_cb_cost(sbi, segno); 292 } 293 294 static unsigned int count_bits(const unsigned long *addr, 295 unsigned int offset, unsigned int len) 296 { 297 unsigned int end = offset + len, sum = 0; 298 299 while (offset < end) { 300 if (test_bit(offset++, addr)) 301 ++sum; 302 } 303 return sum; 304 } 305 306 /* 307 * This function is called from two paths. 308 * One is garbage collection and the other is SSR segment selection. 309 * When it is called during GC, it just gets a victim segment 310 * and it does not remove it from dirty seglist. 311 * When it is called from SSR segment selection, it finds a segment 312 * which has minimum valid blocks and removes it from dirty seglist. 313 */ 314 static int get_victim_by_default(struct f2fs_sb_info *sbi, 315 unsigned int *result, int gc_type, int type, char alloc_mode) 316 { 317 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 318 struct sit_info *sm = SIT_I(sbi); 319 struct victim_sel_policy p; 320 unsigned int secno, last_victim; 321 unsigned int last_segment; 322 unsigned int nsearched = 0; 323 324 mutex_lock(&dirty_i->seglist_lock); 325 last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; 326 327 p.alloc_mode = alloc_mode; 328 select_policy(sbi, gc_type, type, &p); 329 330 p.min_segno = NULL_SEGNO; 331 p.min_cost = get_max_cost(sbi, &p); 332 333 if (*result != NULL_SEGNO) { 334 if (get_valid_blocks(sbi, *result, false) && 335 !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) 336 p.min_segno = *result; 337 goto out; 338 } 339 340 if (p.max_search == 0) 341 goto out; 342 343 if (__is_large_section(sbi) && p.alloc_mode == LFS) { 344 if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { 345 p.min_segno = sbi->next_victim_seg[BG_GC]; 346 *result = p.min_segno; 347 sbi->next_victim_seg[BG_GC] = NULL_SEGNO; 348 goto got_result; 349 } 350 if (gc_type == FG_GC && 351 sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { 352 p.min_segno = sbi->next_victim_seg[FG_GC]; 353 *result = p.min_segno; 354 sbi->next_victim_seg[FG_GC] = NULL_SEGNO; 355 goto got_result; 356 } 357 } 358 359 last_victim = sm->last_victim[p.gc_mode]; 360 if (p.alloc_mode == LFS && gc_type == FG_GC) { 361 p.min_segno = check_bg_victims(sbi); 362 if (p.min_segno != NULL_SEGNO) 363 goto got_it; 364 } 365 366 while (1) { 367 unsigned long cost; 368 unsigned int segno; 369 370 segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); 371 if (segno >= last_segment) { 372 if (sm->last_victim[p.gc_mode]) { 373 last_segment = 374 sm->last_victim[p.gc_mode]; 375 sm->last_victim[p.gc_mode] = 0; 376 p.offset = 0; 377 continue; 378 } 379 break; 380 } 381 382 p.offset = segno + p.ofs_unit; 383 if (p.ofs_unit > 1) { 384 p.offset -= segno % p.ofs_unit; 385 nsearched += count_bits(p.dirty_segmap, 386 p.offset - p.ofs_unit, 387 p.ofs_unit); 388 } else { 389 nsearched++; 390 } 391 392 #ifdef CONFIG_F2FS_CHECK_FS 393 /* 394 * skip selecting the invalid segno (that is failed due to block 395 * validity check failure during GC) to avoid endless GC loop in 396 * such cases. 397 */ 398 if (test_bit(segno, sm->invalid_segmap)) 399 goto next; 400 #endif 401 402 secno = GET_SEC_FROM_SEG(sbi, segno); 403 404 if (sec_usage_check(sbi, secno)) 405 goto next; 406 /* Don't touch checkpointed data */ 407 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 408 get_ckpt_valid_blocks(sbi, segno) && 409 p.alloc_mode != SSR)) 410 goto next; 411 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) 412 goto next; 413 414 cost = get_gc_cost(sbi, segno, &p); 415 416 if (p.min_cost > cost) { 417 p.min_segno = segno; 418 p.min_cost = cost; 419 } 420 next: 421 if (nsearched >= p.max_search) { 422 if (!sm->last_victim[p.gc_mode] && segno <= last_victim) 423 sm->last_victim[p.gc_mode] = last_victim + 1; 424 else 425 sm->last_victim[p.gc_mode] = segno + 1; 426 sm->last_victim[p.gc_mode] %= 427 (MAIN_SECS(sbi) * sbi->segs_per_sec); 428 break; 429 } 430 } 431 if (p.min_segno != NULL_SEGNO) { 432 got_it: 433 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; 434 got_result: 435 if (p.alloc_mode == LFS) { 436 secno = GET_SEC_FROM_SEG(sbi, p.min_segno); 437 if (gc_type == FG_GC) 438 sbi->cur_victim_sec = secno; 439 else 440 set_bit(secno, dirty_i->victim_secmap); 441 } 442 443 } 444 out: 445 if (p.min_segno != NULL_SEGNO) 446 trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, 447 sbi->cur_victim_sec, 448 prefree_segments(sbi), free_segments(sbi)); 449 mutex_unlock(&dirty_i->seglist_lock); 450 451 return (p.min_segno == NULL_SEGNO) ? 0 : 1; 452 } 453 454 static const struct victim_selection default_v_ops = { 455 .get_victim = get_victim_by_default, 456 }; 457 458 static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) 459 { 460 struct inode_entry *ie; 461 462 ie = radix_tree_lookup(&gc_list->iroot, ino); 463 if (ie) 464 return ie->inode; 465 return NULL; 466 } 467 468 static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) 469 { 470 struct inode_entry *new_ie; 471 472 if (inode == find_gc_inode(gc_list, inode->i_ino)) { 473 iput(inode); 474 return; 475 } 476 new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS); 477 new_ie->inode = inode; 478 479 f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); 480 list_add_tail(&new_ie->list, &gc_list->ilist); 481 } 482 483 static void put_gc_inode(struct gc_inode_list *gc_list) 484 { 485 struct inode_entry *ie, *next_ie; 486 list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { 487 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); 488 iput(ie->inode); 489 list_del(&ie->list); 490 kmem_cache_free(f2fs_inode_entry_slab, ie); 491 } 492 } 493 494 static int check_valid_map(struct f2fs_sb_info *sbi, 495 unsigned int segno, int offset) 496 { 497 struct sit_info *sit_i = SIT_I(sbi); 498 struct seg_entry *sentry; 499 int ret; 500 501 down_read(&sit_i->sentry_lock); 502 sentry = get_seg_entry(sbi, segno); 503 ret = f2fs_test_bit(offset, sentry->cur_valid_map); 504 up_read(&sit_i->sentry_lock); 505 return ret; 506 } 507 508 /* 509 * This function compares node address got in summary with that in NAT. 510 * On validity, copy that node with cold status, otherwise (invalid node) 511 * ignore that. 512 */ 513 static int gc_node_segment(struct f2fs_sb_info *sbi, 514 struct f2fs_summary *sum, unsigned int segno, int gc_type) 515 { 516 struct f2fs_summary *entry; 517 block_t start_addr; 518 int off; 519 int phase = 0; 520 bool fggc = (gc_type == FG_GC); 521 int submitted = 0; 522 523 start_addr = START_BLOCK(sbi, segno); 524 525 next_step: 526 entry = sum; 527 528 if (fggc && phase == 2) 529 atomic_inc(&sbi->wb_sync_req[NODE]); 530 531 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 532 nid_t nid = le32_to_cpu(entry->nid); 533 struct page *node_page; 534 struct node_info ni; 535 int err; 536 537 /* stop BG_GC if there is not enough free sections. */ 538 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) 539 return submitted; 540 541 if (check_valid_map(sbi, segno, off) == 0) 542 continue; 543 544 if (phase == 0) { 545 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, 546 META_NAT, true); 547 continue; 548 } 549 550 if (phase == 1) { 551 f2fs_ra_node_page(sbi, nid); 552 continue; 553 } 554 555 /* phase == 2 */ 556 node_page = f2fs_get_node_page(sbi, nid); 557 if (IS_ERR(node_page)) 558 continue; 559 560 /* block may become invalid during f2fs_get_node_page */ 561 if (check_valid_map(sbi, segno, off) == 0) { 562 f2fs_put_page(node_page, 1); 563 continue; 564 } 565 566 if (f2fs_get_node_info(sbi, nid, &ni)) { 567 f2fs_put_page(node_page, 1); 568 continue; 569 } 570 571 if (ni.blk_addr != start_addr + off) { 572 f2fs_put_page(node_page, 1); 573 continue; 574 } 575 576 err = f2fs_move_node_page(node_page, gc_type); 577 if (!err && gc_type == FG_GC) 578 submitted++; 579 stat_inc_node_blk_count(sbi, 1, gc_type); 580 } 581 582 if (++phase < 3) 583 goto next_step; 584 585 if (fggc) 586 atomic_dec(&sbi->wb_sync_req[NODE]); 587 return submitted; 588 } 589 590 /* 591 * Calculate start block index indicating the given node offset. 592 * Be careful, caller should give this node offset only indicating direct node 593 * blocks. If any node offsets, which point the other types of node blocks such 594 * as indirect or double indirect node blocks, are given, it must be a caller's 595 * bug. 596 */ 597 block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) 598 { 599 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; 600 unsigned int bidx; 601 602 if (node_ofs == 0) 603 return 0; 604 605 if (node_ofs <= 2) { 606 bidx = node_ofs - 1; 607 } else if (node_ofs <= indirect_blks) { 608 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); 609 bidx = node_ofs - 2 - dec; 610 } else { 611 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); 612 bidx = node_ofs - 5 - dec; 613 } 614 return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode); 615 } 616 617 static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 618 struct node_info *dni, block_t blkaddr, unsigned int *nofs) 619 { 620 struct page *node_page; 621 nid_t nid; 622 unsigned int ofs_in_node; 623 block_t source_blkaddr; 624 625 nid = le32_to_cpu(sum->nid); 626 ofs_in_node = le16_to_cpu(sum->ofs_in_node); 627 628 node_page = f2fs_get_node_page(sbi, nid); 629 if (IS_ERR(node_page)) 630 return false; 631 632 if (f2fs_get_node_info(sbi, nid, dni)) { 633 f2fs_put_page(node_page, 1); 634 return false; 635 } 636 637 if (sum->version != dni->version) { 638 f2fs_warn(sbi, "%s: valid data with mismatched node version.", 639 __func__); 640 set_sbi_flag(sbi, SBI_NEED_FSCK); 641 } 642 643 *nofs = ofs_of_node(node_page); 644 source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); 645 f2fs_put_page(node_page, 1); 646 647 if (source_blkaddr != blkaddr) { 648 #ifdef CONFIG_F2FS_CHECK_FS 649 unsigned int segno = GET_SEGNO(sbi, blkaddr); 650 unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 651 652 if (unlikely(check_valid_map(sbi, segno, offset))) { 653 if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) { 654 f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n", 655 blkaddr, source_blkaddr, segno); 656 f2fs_bug_on(sbi, 1); 657 } 658 } 659 #endif 660 return false; 661 } 662 return true; 663 } 664 665 static int ra_data_block(struct inode *inode, pgoff_t index) 666 { 667 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 668 struct address_space *mapping = inode->i_mapping; 669 struct dnode_of_data dn; 670 struct page *page; 671 struct extent_info ei = {0, 0, 0}; 672 struct f2fs_io_info fio = { 673 .sbi = sbi, 674 .ino = inode->i_ino, 675 .type = DATA, 676 .temp = COLD, 677 .op = REQ_OP_READ, 678 .op_flags = 0, 679 .encrypted_page = NULL, 680 .in_list = false, 681 .retry = false, 682 }; 683 int err; 684 685 page = f2fs_grab_cache_page(mapping, index, true); 686 if (!page) 687 return -ENOMEM; 688 689 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 690 dn.data_blkaddr = ei.blk + index - ei.fofs; 691 if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, 692 DATA_GENERIC_ENHANCE_READ))) { 693 err = -EFSCORRUPTED; 694 goto put_page; 695 } 696 goto got_it; 697 } 698 699 set_new_dnode(&dn, inode, NULL, NULL, 0); 700 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 701 if (err) 702 goto put_page; 703 f2fs_put_dnode(&dn); 704 705 if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { 706 err = -ENOENT; 707 goto put_page; 708 } 709 if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, 710 DATA_GENERIC_ENHANCE))) { 711 err = -EFSCORRUPTED; 712 goto put_page; 713 } 714 got_it: 715 /* read page */ 716 fio.page = page; 717 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; 718 719 /* 720 * don't cache encrypted data into meta inode until previous dirty 721 * data were writebacked to avoid racing between GC and flush. 722 */ 723 f2fs_wait_on_page_writeback(page, DATA, true, true); 724 725 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 726 727 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), 728 dn.data_blkaddr, 729 FGP_LOCK | FGP_CREAT, GFP_NOFS); 730 if (!fio.encrypted_page) { 731 err = -ENOMEM; 732 goto put_page; 733 } 734 735 err = f2fs_submit_page_bio(&fio); 736 if (err) 737 goto put_encrypted_page; 738 f2fs_put_page(fio.encrypted_page, 0); 739 f2fs_put_page(page, 1); 740 return 0; 741 put_encrypted_page: 742 f2fs_put_page(fio.encrypted_page, 1); 743 put_page: 744 f2fs_put_page(page, 1); 745 return err; 746 } 747 748 /* 749 * Move data block via META_MAPPING while keeping locked data page. 750 * This can be used to move blocks, aka LBAs, directly on disk. 751 */ 752 static int move_data_block(struct inode *inode, block_t bidx, 753 int gc_type, unsigned int segno, int off) 754 { 755 struct f2fs_io_info fio = { 756 .sbi = F2FS_I_SB(inode), 757 .ino = inode->i_ino, 758 .type = DATA, 759 .temp = COLD, 760 .op = REQ_OP_READ, 761 .op_flags = 0, 762 .encrypted_page = NULL, 763 .in_list = false, 764 .retry = false, 765 }; 766 struct dnode_of_data dn; 767 struct f2fs_summary sum; 768 struct node_info ni; 769 struct page *page, *mpage; 770 block_t newaddr; 771 int err = 0; 772 bool lfs_mode = f2fs_lfs_mode(fio.sbi); 773 774 /* do not read out */ 775 page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); 776 if (!page) 777 return -ENOMEM; 778 779 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { 780 err = -ENOENT; 781 goto out; 782 } 783 784 if (f2fs_is_atomic_file(inode)) { 785 F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; 786 F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; 787 err = -EAGAIN; 788 goto out; 789 } 790 791 if (f2fs_is_pinned_file(inode)) { 792 f2fs_pin_file_control(inode, true); 793 err = -EAGAIN; 794 goto out; 795 } 796 797 set_new_dnode(&dn, inode, NULL, NULL, 0); 798 err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 799 if (err) 800 goto out; 801 802 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 803 ClearPageUptodate(page); 804 err = -ENOENT; 805 goto put_out; 806 } 807 808 /* 809 * don't cache encrypted data into meta inode until previous dirty 810 * data were writebacked to avoid racing between GC and flush. 811 */ 812 f2fs_wait_on_page_writeback(page, DATA, true, true); 813 814 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); 815 816 err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); 817 if (err) 818 goto put_out; 819 820 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); 821 822 /* read page */ 823 fio.page = page; 824 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; 825 826 if (lfs_mode) 827 down_write(&fio.sbi->io_order_lock); 828 829 mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), 830 fio.old_blkaddr, false); 831 if (!mpage) 832 goto up_out; 833 834 fio.encrypted_page = mpage; 835 836 /* read source block in mpage */ 837 if (!PageUptodate(mpage)) { 838 err = f2fs_submit_page_bio(&fio); 839 if (err) { 840 f2fs_put_page(mpage, 1); 841 goto up_out; 842 } 843 lock_page(mpage); 844 if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || 845 !PageUptodate(mpage))) { 846 err = -EIO; 847 f2fs_put_page(mpage, 1); 848 goto up_out; 849 } 850 } 851 852 f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, 853 &sum, CURSEG_COLD_DATA, NULL, false); 854 855 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), 856 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); 857 if (!fio.encrypted_page) { 858 err = -ENOMEM; 859 f2fs_put_page(mpage, 1); 860 goto recover_block; 861 } 862 863 /* write target block */ 864 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); 865 memcpy(page_address(fio.encrypted_page), 866 page_address(mpage), PAGE_SIZE); 867 f2fs_put_page(mpage, 1); 868 invalidate_mapping_pages(META_MAPPING(fio.sbi), 869 fio.old_blkaddr, fio.old_blkaddr); 870 871 set_page_dirty(fio.encrypted_page); 872 if (clear_page_dirty_for_io(fio.encrypted_page)) 873 dec_page_count(fio.sbi, F2FS_DIRTY_META); 874 875 set_page_writeback(fio.encrypted_page); 876 ClearPageError(page); 877 878 /* allocate block address */ 879 f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); 880 881 fio.op = REQ_OP_WRITE; 882 fio.op_flags = REQ_SYNC; 883 fio.new_blkaddr = newaddr; 884 f2fs_submit_page_write(&fio); 885 if (fio.retry) { 886 err = -EAGAIN; 887 if (PageWriteback(fio.encrypted_page)) 888 end_page_writeback(fio.encrypted_page); 889 goto put_page_out; 890 } 891 892 f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); 893 894 f2fs_update_data_blkaddr(&dn, newaddr); 895 set_inode_flag(inode, FI_APPEND_WRITE); 896 if (page->index == 0) 897 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 898 put_page_out: 899 f2fs_put_page(fio.encrypted_page, 1); 900 recover_block: 901 if (err) 902 f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, 903 true, true); 904 up_out: 905 if (lfs_mode) 906 up_write(&fio.sbi->io_order_lock); 907 put_out: 908 f2fs_put_dnode(&dn); 909 out: 910 f2fs_put_page(page, 1); 911 return err; 912 } 913 914 static int move_data_page(struct inode *inode, block_t bidx, int gc_type, 915 unsigned int segno, int off) 916 { 917 struct page *page; 918 int err = 0; 919 920 page = f2fs_get_lock_data_page(inode, bidx, true); 921 if (IS_ERR(page)) 922 return PTR_ERR(page); 923 924 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { 925 err = -ENOENT; 926 goto out; 927 } 928 929 if (f2fs_is_atomic_file(inode)) { 930 F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; 931 F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; 932 err = -EAGAIN; 933 goto out; 934 } 935 if (f2fs_is_pinned_file(inode)) { 936 if (gc_type == FG_GC) 937 f2fs_pin_file_control(inode, true); 938 err = -EAGAIN; 939 goto out; 940 } 941 942 if (gc_type == BG_GC) { 943 if (PageWriteback(page)) { 944 err = -EAGAIN; 945 goto out; 946 } 947 set_page_dirty(page); 948 set_cold_data(page); 949 } else { 950 struct f2fs_io_info fio = { 951 .sbi = F2FS_I_SB(inode), 952 .ino = inode->i_ino, 953 .type = DATA, 954 .temp = COLD, 955 .op = REQ_OP_WRITE, 956 .op_flags = REQ_SYNC, 957 .old_blkaddr = NULL_ADDR, 958 .page = page, 959 .encrypted_page = NULL, 960 .need_lock = LOCK_REQ, 961 .io_type = FS_GC_DATA_IO, 962 }; 963 bool is_dirty = PageDirty(page); 964 965 retry: 966 f2fs_wait_on_page_writeback(page, DATA, true, true); 967 968 set_page_dirty(page); 969 if (clear_page_dirty_for_io(page)) { 970 inode_dec_dirty_pages(inode); 971 f2fs_remove_dirty_inode(inode); 972 } 973 974 set_cold_data(page); 975 976 err = f2fs_do_write_data_page(&fio); 977 if (err) { 978 clear_cold_data(page); 979 if (err == -ENOMEM) { 980 congestion_wait(BLK_RW_ASYNC, 981 DEFAULT_IO_TIMEOUT); 982 goto retry; 983 } 984 if (is_dirty) 985 set_page_dirty(page); 986 } 987 } 988 out: 989 f2fs_put_page(page, 1); 990 return err; 991 } 992 993 /* 994 * This function tries to get parent node of victim data block, and identifies 995 * data block validity. If the block is valid, copy that with cold status and 996 * modify parent node. 997 * If the parent node is not valid or the data block address is different, 998 * the victim data block is ignored. 999 */ 1000 static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 1001 struct gc_inode_list *gc_list, unsigned int segno, int gc_type) 1002 { 1003 struct super_block *sb = sbi->sb; 1004 struct f2fs_summary *entry; 1005 block_t start_addr; 1006 int off; 1007 int phase = 0; 1008 int submitted = 0; 1009 1010 start_addr = START_BLOCK(sbi, segno); 1011 1012 next_step: 1013 entry = sum; 1014 1015 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 1016 struct page *data_page; 1017 struct inode *inode; 1018 struct node_info dni; /* dnode info for the data */ 1019 unsigned int ofs_in_node, nofs; 1020 block_t start_bidx; 1021 nid_t nid = le32_to_cpu(entry->nid); 1022 1023 /* 1024 * stop BG_GC if there is not enough free sections. 1025 * Or, stop GC if the segment becomes fully valid caused by 1026 * race condition along with SSR block allocation. 1027 */ 1028 if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || 1029 get_valid_blocks(sbi, segno, true) == 1030 BLKS_PER_SEC(sbi)) 1031 return submitted; 1032 1033 if (check_valid_map(sbi, segno, off) == 0) 1034 continue; 1035 1036 if (phase == 0) { 1037 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, 1038 META_NAT, true); 1039 continue; 1040 } 1041 1042 if (phase == 1) { 1043 f2fs_ra_node_page(sbi, nid); 1044 continue; 1045 } 1046 1047 /* Get an inode by ino with checking validity */ 1048 if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) 1049 continue; 1050 1051 if (phase == 2) { 1052 f2fs_ra_node_page(sbi, dni.ino); 1053 continue; 1054 } 1055 1056 ofs_in_node = le16_to_cpu(entry->ofs_in_node); 1057 1058 if (phase == 3) { 1059 inode = f2fs_iget(sb, dni.ino); 1060 if (IS_ERR(inode) || is_bad_inode(inode)) { 1061 set_sbi_flag(sbi, SBI_NEED_FSCK); 1062 continue; 1063 } 1064 1065 if (!down_write_trylock( 1066 &F2FS_I(inode)->i_gc_rwsem[WRITE])) { 1067 iput(inode); 1068 sbi->skipped_gc_rwsem++; 1069 continue; 1070 } 1071 1072 start_bidx = f2fs_start_bidx_of_node(nofs, inode) + 1073 ofs_in_node; 1074 1075 if (f2fs_post_read_required(inode)) { 1076 int err = ra_data_block(inode, start_bidx); 1077 1078 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1079 if (err) { 1080 iput(inode); 1081 continue; 1082 } 1083 add_gc_inode(gc_list, inode); 1084 continue; 1085 } 1086 1087 data_page = f2fs_get_read_data_page(inode, 1088 start_bidx, REQ_RAHEAD, true); 1089 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 1090 if (IS_ERR(data_page)) { 1091 iput(inode); 1092 continue; 1093 } 1094 1095 f2fs_put_page(data_page, 0); 1096 add_gc_inode(gc_list, inode); 1097 continue; 1098 } 1099 1100 /* phase 4 */ 1101 inode = find_gc_inode(gc_list, dni.ino); 1102 if (inode) { 1103 struct f2fs_inode_info *fi = F2FS_I(inode); 1104 bool locked = false; 1105 int err; 1106 1107 if (S_ISREG(inode->i_mode)) { 1108 if (!down_write_trylock(&fi->i_gc_rwsem[READ])) 1109 continue; 1110 if (!down_write_trylock( 1111 &fi->i_gc_rwsem[WRITE])) { 1112 sbi->skipped_gc_rwsem++; 1113 up_write(&fi->i_gc_rwsem[READ]); 1114 continue; 1115 } 1116 locked = true; 1117 1118 /* wait for all inflight aio data */ 1119 inode_dio_wait(inode); 1120 } 1121 1122 start_bidx = f2fs_start_bidx_of_node(nofs, inode) 1123 + ofs_in_node; 1124 if (f2fs_post_read_required(inode)) 1125 err = move_data_block(inode, start_bidx, 1126 gc_type, segno, off); 1127 else 1128 err = move_data_page(inode, start_bidx, gc_type, 1129 segno, off); 1130 1131 if (!err && (gc_type == FG_GC || 1132 f2fs_post_read_required(inode))) 1133 submitted++; 1134 1135 if (locked) { 1136 up_write(&fi->i_gc_rwsem[WRITE]); 1137 up_write(&fi->i_gc_rwsem[READ]); 1138 } 1139 1140 stat_inc_data_blk_count(sbi, 1, gc_type); 1141 } 1142 } 1143 1144 if (++phase < 5) 1145 goto next_step; 1146 1147 return submitted; 1148 } 1149 1150 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 1151 int gc_type) 1152 { 1153 struct sit_info *sit_i = SIT_I(sbi); 1154 int ret; 1155 1156 down_write(&sit_i->sentry_lock); 1157 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, 1158 NO_CHECK_TYPE, LFS); 1159 up_write(&sit_i->sentry_lock); 1160 return ret; 1161 } 1162 1163 static int do_garbage_collect(struct f2fs_sb_info *sbi, 1164 unsigned int start_segno, 1165 struct gc_inode_list *gc_list, int gc_type) 1166 { 1167 struct page *sum_page; 1168 struct f2fs_summary_block *sum; 1169 struct blk_plug plug; 1170 unsigned int segno = start_segno; 1171 unsigned int end_segno = start_segno + sbi->segs_per_sec; 1172 int seg_freed = 0, migrated = 0; 1173 unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? 1174 SUM_TYPE_DATA : SUM_TYPE_NODE; 1175 int submitted = 0; 1176 1177 if (__is_large_section(sbi)) 1178 end_segno = rounddown(end_segno, sbi->segs_per_sec); 1179 1180 /* readahead multi ssa blocks those have contiguous address */ 1181 if (__is_large_section(sbi)) 1182 f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), 1183 end_segno - segno, META_SSA, true); 1184 1185 /* reference all summary page */ 1186 while (segno < end_segno) { 1187 sum_page = f2fs_get_sum_page(sbi, segno++); 1188 if (IS_ERR(sum_page)) { 1189 int err = PTR_ERR(sum_page); 1190 1191 end_segno = segno - 1; 1192 for (segno = start_segno; segno < end_segno; segno++) { 1193 sum_page = find_get_page(META_MAPPING(sbi), 1194 GET_SUM_BLOCK(sbi, segno)); 1195 f2fs_put_page(sum_page, 0); 1196 f2fs_put_page(sum_page, 0); 1197 } 1198 return err; 1199 } 1200 unlock_page(sum_page); 1201 } 1202 1203 blk_start_plug(&plug); 1204 1205 for (segno = start_segno; segno < end_segno; segno++) { 1206 1207 /* find segment summary of victim */ 1208 sum_page = find_get_page(META_MAPPING(sbi), 1209 GET_SUM_BLOCK(sbi, segno)); 1210 f2fs_put_page(sum_page, 0); 1211 1212 if (get_valid_blocks(sbi, segno, false) == 0) 1213 goto freed; 1214 if (gc_type == BG_GC && __is_large_section(sbi) && 1215 migrated >= sbi->migration_granularity) 1216 goto skip; 1217 if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) 1218 goto skip; 1219 1220 sum = page_address(sum_page); 1221 if (type != GET_SUM_TYPE((&sum->footer))) { 1222 f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", 1223 segno, type, GET_SUM_TYPE((&sum->footer))); 1224 set_sbi_flag(sbi, SBI_NEED_FSCK); 1225 f2fs_stop_checkpoint(sbi, false); 1226 goto skip; 1227 } 1228 1229 /* 1230 * this is to avoid deadlock: 1231 * - lock_page(sum_page) - f2fs_replace_block 1232 * - check_valid_map() - down_write(sentry_lock) 1233 * - down_read(sentry_lock) - change_curseg() 1234 * - lock_page(sum_page) 1235 */ 1236 if (type == SUM_TYPE_NODE) 1237 submitted += gc_node_segment(sbi, sum->entries, segno, 1238 gc_type); 1239 else 1240 submitted += gc_data_segment(sbi, sum->entries, gc_list, 1241 segno, gc_type); 1242 1243 stat_inc_seg_count(sbi, type, gc_type); 1244 migrated++; 1245 1246 freed: 1247 if (gc_type == FG_GC && 1248 get_valid_blocks(sbi, segno, false) == 0) 1249 seg_freed++; 1250 1251 if (__is_large_section(sbi) && segno + 1 < end_segno) 1252 sbi->next_victim_seg[gc_type] = segno + 1; 1253 skip: 1254 f2fs_put_page(sum_page, 0); 1255 } 1256 1257 if (submitted) 1258 f2fs_submit_merged_write(sbi, 1259 (type == SUM_TYPE_NODE) ? NODE : DATA); 1260 1261 blk_finish_plug(&plug); 1262 1263 stat_inc_call_count(sbi->stat_info); 1264 1265 return seg_freed; 1266 } 1267 1268 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, 1269 bool background, unsigned int segno) 1270 { 1271 int gc_type = sync ? FG_GC : BG_GC; 1272 int sec_freed = 0, seg_freed = 0, total_freed = 0; 1273 int ret = 0; 1274 struct cp_control cpc; 1275 unsigned int init_segno = segno; 1276 struct gc_inode_list gc_list = { 1277 .ilist = LIST_HEAD_INIT(gc_list.ilist), 1278 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), 1279 }; 1280 unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC]; 1281 unsigned long long first_skipped; 1282 unsigned int skipped_round = 0, round = 0; 1283 1284 trace_f2fs_gc_begin(sbi->sb, sync, background, 1285 get_pages(sbi, F2FS_DIRTY_NODES), 1286 get_pages(sbi, F2FS_DIRTY_DENTS), 1287 get_pages(sbi, F2FS_DIRTY_IMETA), 1288 free_sections(sbi), 1289 free_segments(sbi), 1290 reserved_segments(sbi), 1291 prefree_segments(sbi)); 1292 1293 cpc.reason = __get_cp_reason(sbi); 1294 sbi->skipped_gc_rwsem = 0; 1295 first_skipped = last_skipped; 1296 gc_more: 1297 if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) { 1298 ret = -EINVAL; 1299 goto stop; 1300 } 1301 if (unlikely(f2fs_cp_error(sbi))) { 1302 ret = -EIO; 1303 goto stop; 1304 } 1305 1306 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) { 1307 /* 1308 * For example, if there are many prefree_segments below given 1309 * threshold, we can make them free by checkpoint. Then, we 1310 * secure free segments which doesn't need fggc any more. 1311 */ 1312 if (prefree_segments(sbi) && 1313 !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 1314 ret = f2fs_write_checkpoint(sbi, &cpc); 1315 if (ret) 1316 goto stop; 1317 } 1318 if (has_not_enough_free_secs(sbi, 0, 0)) 1319 gc_type = FG_GC; 1320 } 1321 1322 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ 1323 if (gc_type == BG_GC && !background) { 1324 ret = -EINVAL; 1325 goto stop; 1326 } 1327 if (!__get_victim(sbi, &segno, gc_type)) { 1328 ret = -ENODATA; 1329 goto stop; 1330 } 1331 1332 seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); 1333 if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec) 1334 sec_freed++; 1335 total_freed += seg_freed; 1336 1337 if (gc_type == FG_GC) { 1338 if (sbi->skipped_atomic_files[FG_GC] > last_skipped || 1339 sbi->skipped_gc_rwsem) 1340 skipped_round++; 1341 last_skipped = sbi->skipped_atomic_files[FG_GC]; 1342 round++; 1343 } 1344 1345 if (gc_type == FG_GC && seg_freed) 1346 sbi->cur_victim_sec = NULL_SEGNO; 1347 1348 if (sync) 1349 goto stop; 1350 1351 if (has_not_enough_free_secs(sbi, sec_freed, 0)) { 1352 if (skipped_round <= MAX_SKIP_GC_COUNT || 1353 skipped_round * 2 < round) { 1354 segno = NULL_SEGNO; 1355 goto gc_more; 1356 } 1357 1358 if (first_skipped < last_skipped && 1359 (last_skipped - first_skipped) > 1360 sbi->skipped_gc_rwsem) { 1361 f2fs_drop_inmem_pages_all(sbi, true); 1362 segno = NULL_SEGNO; 1363 goto gc_more; 1364 } 1365 if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) 1366 ret = f2fs_write_checkpoint(sbi, &cpc); 1367 } 1368 stop: 1369 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; 1370 SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; 1371 1372 trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed, 1373 get_pages(sbi, F2FS_DIRTY_NODES), 1374 get_pages(sbi, F2FS_DIRTY_DENTS), 1375 get_pages(sbi, F2FS_DIRTY_IMETA), 1376 free_sections(sbi), 1377 free_segments(sbi), 1378 reserved_segments(sbi), 1379 prefree_segments(sbi)); 1380 1381 up_write(&sbi->gc_lock); 1382 1383 put_gc_inode(&gc_list); 1384 1385 if (sync && !ret) 1386 ret = sec_freed ? 0 : -EAGAIN; 1387 return ret; 1388 } 1389 1390 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) 1391 { 1392 DIRTY_I(sbi)->v_ops = &default_v_ops; 1393 1394 sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; 1395 1396 /* give warm/cold data area from slower device */ 1397 if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi)) 1398 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 1399 GET_SEGNO(sbi, FDEV(0).end_blk) + 1; 1400 } 1401 1402 static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, 1403 unsigned int end) 1404 { 1405 int type; 1406 unsigned int segno, next_inuse; 1407 int err = 0; 1408 1409 /* Move out cursegs from the target range */ 1410 for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) 1411 allocate_segment_for_resize(sbi, type, start, end); 1412 1413 /* do GC to move out valid blocks in the range */ 1414 for (segno = start; segno <= end; segno += sbi->segs_per_sec) { 1415 struct gc_inode_list gc_list = { 1416 .ilist = LIST_HEAD_INIT(gc_list.ilist), 1417 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), 1418 }; 1419 1420 down_write(&sbi->gc_lock); 1421 do_garbage_collect(sbi, segno, &gc_list, FG_GC); 1422 up_write(&sbi->gc_lock); 1423 put_gc_inode(&gc_list); 1424 1425 if (get_valid_blocks(sbi, segno, true)) 1426 return -EAGAIN; 1427 } 1428 1429 err = f2fs_sync_fs(sbi->sb, 1); 1430 if (err) 1431 return err; 1432 1433 next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); 1434 if (next_inuse <= end) { 1435 f2fs_err(sbi, "segno %u should be free but still inuse!", 1436 next_inuse); 1437 f2fs_bug_on(sbi, 1); 1438 } 1439 return err; 1440 } 1441 1442 static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) 1443 { 1444 struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); 1445 int section_count; 1446 int segment_count; 1447 int segment_count_main; 1448 long long block_count; 1449 int segs = secs * sbi->segs_per_sec; 1450 1451 down_write(&sbi->sb_lock); 1452 1453 section_count = le32_to_cpu(raw_sb->section_count); 1454 segment_count = le32_to_cpu(raw_sb->segment_count); 1455 segment_count_main = le32_to_cpu(raw_sb->segment_count_main); 1456 block_count = le64_to_cpu(raw_sb->block_count); 1457 1458 raw_sb->section_count = cpu_to_le32(section_count + secs); 1459 raw_sb->segment_count = cpu_to_le32(segment_count + segs); 1460 raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); 1461 raw_sb->block_count = cpu_to_le64(block_count + 1462 (long long)segs * sbi->blocks_per_seg); 1463 if (f2fs_is_multi_device(sbi)) { 1464 int last_dev = sbi->s_ndevs - 1; 1465 int dev_segs = 1466 le32_to_cpu(raw_sb->devs[last_dev].total_segments); 1467 1468 raw_sb->devs[last_dev].total_segments = 1469 cpu_to_le32(dev_segs + segs); 1470 } 1471 1472 up_write(&sbi->sb_lock); 1473 } 1474 1475 static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) 1476 { 1477 int segs = secs * sbi->segs_per_sec; 1478 long long blks = (long long)segs * sbi->blocks_per_seg; 1479 long long user_block_count = 1480 le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); 1481 1482 SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; 1483 MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; 1484 FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; 1485 FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; 1486 F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); 1487 1488 if (f2fs_is_multi_device(sbi)) { 1489 int last_dev = sbi->s_ndevs - 1; 1490 1491 FDEV(last_dev).total_segments = 1492 (int)FDEV(last_dev).total_segments + segs; 1493 FDEV(last_dev).end_blk = 1494 (long long)FDEV(last_dev).end_blk + blks; 1495 #ifdef CONFIG_BLK_DEV_ZONED 1496 FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + 1497 (int)(blks >> sbi->log_blocks_per_blkz); 1498 #endif 1499 } 1500 } 1501 1502 int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) 1503 { 1504 __u64 old_block_count, shrunk_blocks; 1505 unsigned int secs; 1506 int gc_mode, gc_type; 1507 int err = 0; 1508 __u32 rem; 1509 1510 old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); 1511 if (block_count > old_block_count) 1512 return -EINVAL; 1513 1514 if (f2fs_is_multi_device(sbi)) { 1515 int last_dev = sbi->s_ndevs - 1; 1516 __u64 last_segs = FDEV(last_dev).total_segments; 1517 1518 if (block_count + last_segs * sbi->blocks_per_seg <= 1519 old_block_count) 1520 return -EINVAL; 1521 } 1522 1523 /* new fs size should align to section size */ 1524 div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); 1525 if (rem) 1526 return -EINVAL; 1527 1528 if (block_count == old_block_count) 1529 return 0; 1530 1531 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 1532 f2fs_err(sbi, "Should run fsck to repair first."); 1533 return -EFSCORRUPTED; 1534 } 1535 1536 if (test_opt(sbi, DISABLE_CHECKPOINT)) { 1537 f2fs_err(sbi, "Checkpoint should be enabled."); 1538 return -EINVAL; 1539 } 1540 1541 freeze_bdev(sbi->sb->s_bdev); 1542 1543 shrunk_blocks = old_block_count - block_count; 1544 secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); 1545 spin_lock(&sbi->stat_lock); 1546 if (shrunk_blocks + valid_user_blocks(sbi) + 1547 sbi->current_reserved_blocks + sbi->unusable_block_count + 1548 F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) 1549 err = -ENOSPC; 1550 else 1551 sbi->user_block_count -= shrunk_blocks; 1552 spin_unlock(&sbi->stat_lock); 1553 if (err) { 1554 thaw_bdev(sbi->sb->s_bdev, sbi->sb); 1555 return err; 1556 } 1557 1558 mutex_lock(&sbi->resize_mutex); 1559 set_sbi_flag(sbi, SBI_IS_RESIZEFS); 1560 1561 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 1562 1563 MAIN_SECS(sbi) -= secs; 1564 1565 for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) 1566 if (SIT_I(sbi)->last_victim[gc_mode] >= 1567 MAIN_SECS(sbi) * sbi->segs_per_sec) 1568 SIT_I(sbi)->last_victim[gc_mode] = 0; 1569 1570 for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) 1571 if (sbi->next_victim_seg[gc_type] >= 1572 MAIN_SECS(sbi) * sbi->segs_per_sec) 1573 sbi->next_victim_seg[gc_type] = NULL_SEGNO; 1574 1575 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 1576 1577 err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, 1578 MAIN_SEGS(sbi) - 1); 1579 if (err) 1580 goto out; 1581 1582 update_sb_metadata(sbi, -secs); 1583 1584 err = f2fs_commit_super(sbi, false); 1585 if (err) { 1586 update_sb_metadata(sbi, secs); 1587 goto out; 1588 } 1589 1590 mutex_lock(&sbi->cp_mutex); 1591 update_fs_metadata(sbi, -secs); 1592 clear_sbi_flag(sbi, SBI_IS_RESIZEFS); 1593 set_sbi_flag(sbi, SBI_IS_DIRTY); 1594 mutex_unlock(&sbi->cp_mutex); 1595 1596 err = f2fs_sync_fs(sbi->sb, 1); 1597 if (err) { 1598 mutex_lock(&sbi->cp_mutex); 1599 update_fs_metadata(sbi, secs); 1600 mutex_unlock(&sbi->cp_mutex); 1601 update_sb_metadata(sbi, secs); 1602 f2fs_commit_super(sbi, false); 1603 } 1604 out: 1605 if (err) { 1606 set_sbi_flag(sbi, SBI_NEED_FSCK); 1607 f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); 1608 1609 MAIN_SECS(sbi) += secs; 1610 spin_lock(&sbi->stat_lock); 1611 sbi->user_block_count += shrunk_blocks; 1612 spin_unlock(&sbi->stat_lock); 1613 } 1614 clear_sbi_flag(sbi, SBI_IS_RESIZEFS); 1615 mutex_unlock(&sbi->resize_mutex); 1616 thaw_bdev(sbi->sb->s_bdev, sbi->sb); 1617 return err; 1618 } 1619