1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/segment.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/bio.h> 11 #include <linux/blkdev.h> 12 #include <linux/sched/mm.h> 13 #include <linux/prefetch.h> 14 #include <linux/kthread.h> 15 #include <linux/swap.h> 16 #include <linux/timer.h> 17 #include <linux/freezer.h> 18 #include <linux/sched/signal.h> 19 #include <linux/random.h> 20 21 #include "f2fs.h" 22 #include "segment.h" 23 #include "node.h" 24 #include "gc.h" 25 #include "iostat.h" 26 #include <trace/events/f2fs.h> 27 28 #define __reverse_ffz(x) __reverse_ffs(~(x)) 29 30 static struct kmem_cache *discard_entry_slab; 31 static struct kmem_cache *discard_cmd_slab; 32 static struct kmem_cache *sit_entry_set_slab; 33 static struct kmem_cache *revoke_entry_slab; 34 35 static unsigned long __reverse_ulong(unsigned char *str) 36 { 37 unsigned long tmp = 0; 38 int shift = 24, idx = 0; 39 40 #if BITS_PER_LONG == 64 41 shift = 56; 42 #endif 43 while (shift >= 0) { 44 tmp |= (unsigned long)str[idx++] << shift; 45 shift -= BITS_PER_BYTE; 46 } 47 return tmp; 48 } 49 50 /* 51 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 52 * MSB and LSB are reversed in a byte by f2fs_set_bit. 53 */ 54 static inline unsigned long __reverse_ffs(unsigned long word) 55 { 56 int num = 0; 57 58 #if BITS_PER_LONG == 64 59 if ((word & 0xffffffff00000000UL) == 0) 60 num += 32; 61 else 62 word >>= 32; 63 #endif 64 if ((word & 0xffff0000) == 0) 65 num += 16; 66 else 67 word >>= 16; 68 69 if ((word & 0xff00) == 0) 70 num += 8; 71 else 72 word >>= 8; 73 74 if ((word & 0xf0) == 0) 75 num += 4; 76 else 77 word >>= 4; 78 79 if ((word & 0xc) == 0) 80 num += 2; 81 else 82 word >>= 2; 83 84 if ((word & 0x2) == 0) 85 num += 1; 86 return num; 87 } 88 89 /* 90 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 91 * f2fs_set_bit makes MSB and LSB reversed in a byte. 92 * @size must be integral times of unsigned long. 93 * Example: 94 * MSB <--> LSB 95 * f2fs_set_bit(0, bitmap) => 1000 0000 96 * f2fs_set_bit(7, bitmap) => 0000 0001 97 */ 98 static unsigned long __find_rev_next_bit(const unsigned long *addr, 99 unsigned long size, unsigned long offset) 100 { 101 const unsigned long *p = addr + BIT_WORD(offset); 102 unsigned long result = size; 103 unsigned long tmp; 104 105 if (offset >= size) 106 return size; 107 108 size -= (offset & ~(BITS_PER_LONG - 1)); 109 offset %= BITS_PER_LONG; 110 111 while (1) { 112 if (*p == 0) 113 goto pass; 114 115 tmp = __reverse_ulong((unsigned char *)p); 116 117 tmp &= ~0UL >> offset; 118 if (size < BITS_PER_LONG) 119 tmp &= (~0UL << (BITS_PER_LONG - size)); 120 if (tmp) 121 goto found; 122 pass: 123 if (size <= BITS_PER_LONG) 124 break; 125 size -= BITS_PER_LONG; 126 offset = 0; 127 p++; 128 } 129 return result; 130 found: 131 return result - size + __reverse_ffs(tmp); 132 } 133 134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 135 unsigned long size, unsigned long offset) 136 { 137 const unsigned long *p = addr + BIT_WORD(offset); 138 unsigned long result = size; 139 unsigned long tmp; 140 141 if (offset >= size) 142 return size; 143 144 size -= (offset & ~(BITS_PER_LONG - 1)); 145 offset %= BITS_PER_LONG; 146 147 while (1) { 148 if (*p == ~0UL) 149 goto pass; 150 151 tmp = __reverse_ulong((unsigned char *)p); 152 153 if (offset) 154 tmp |= ~0UL << (BITS_PER_LONG - offset); 155 if (size < BITS_PER_LONG) 156 tmp |= ~0UL >> size; 157 if (tmp != ~0UL) 158 goto found; 159 pass: 160 if (size <= BITS_PER_LONG) 161 break; 162 size -= BITS_PER_LONG; 163 offset = 0; 164 p++; 165 } 166 return result; 167 found: 168 return result - size + __reverse_ffz(tmp); 169 } 170 171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi) 172 { 173 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 174 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 175 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); 176 177 if (f2fs_lfs_mode(sbi)) 178 return false; 179 if (sbi->gc_mode == GC_URGENT_HIGH) 180 return true; 181 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 182 return true; 183 184 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + 185 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); 186 } 187 188 void f2fs_abort_atomic_write(struct inode *inode, bool clean) 189 { 190 struct f2fs_inode_info *fi = F2FS_I(inode); 191 192 if (!f2fs_is_atomic_file(inode)) 193 return; 194 195 if (clean) 196 truncate_inode_pages_final(inode->i_mapping); 197 clear_inode_flag(fi->cow_inode, FI_COW_FILE); 198 iput(fi->cow_inode); 199 fi->cow_inode = NULL; 200 release_atomic_write_cnt(inode); 201 clear_inode_flag(inode, FI_ATOMIC_FILE); 202 stat_dec_atomic_inode(inode); 203 } 204 205 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, 206 block_t new_addr, block_t *old_addr, bool recover) 207 { 208 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 209 struct dnode_of_data dn; 210 struct node_info ni; 211 int err; 212 213 retry: 214 set_new_dnode(&dn, inode, NULL, NULL, 0); 215 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA); 216 if (err) { 217 if (err == -ENOMEM) { 218 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); 219 goto retry; 220 } 221 return err; 222 } 223 224 err = f2fs_get_node_info(sbi, dn.nid, &ni, false); 225 if (err) { 226 f2fs_put_dnode(&dn); 227 return err; 228 } 229 230 if (recover) { 231 /* dn.data_blkaddr is always valid */ 232 if (!__is_valid_data_blkaddr(new_addr)) { 233 if (new_addr == NULL_ADDR) 234 dec_valid_block_count(sbi, inode, 1); 235 f2fs_invalidate_blocks(sbi, dn.data_blkaddr); 236 f2fs_update_data_blkaddr(&dn, new_addr); 237 } else { 238 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 239 new_addr, ni.version, true, true); 240 } 241 } else { 242 blkcnt_t count = 1; 243 244 *old_addr = dn.data_blkaddr; 245 f2fs_truncate_data_blocks_range(&dn, 1); 246 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count); 247 inc_valid_block_count(sbi, inode, &count); 248 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, 249 ni.version, true, false); 250 } 251 252 f2fs_put_dnode(&dn); 253 return 0; 254 } 255 256 static void __complete_revoke_list(struct inode *inode, struct list_head *head, 257 bool revoke) 258 { 259 struct revoke_entry *cur, *tmp; 260 261 list_for_each_entry_safe(cur, tmp, head, list) { 262 if (revoke) 263 __replace_atomic_write_block(inode, cur->index, 264 cur->old_addr, NULL, true); 265 list_del(&cur->list); 266 kmem_cache_free(revoke_entry_slab, cur); 267 } 268 } 269 270 static int __f2fs_commit_atomic_write(struct inode *inode) 271 { 272 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 273 struct f2fs_inode_info *fi = F2FS_I(inode); 274 struct inode *cow_inode = fi->cow_inode; 275 struct revoke_entry *new; 276 struct list_head revoke_list; 277 block_t blkaddr; 278 struct dnode_of_data dn; 279 pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 280 pgoff_t off = 0, blen, index; 281 int ret = 0, i; 282 283 INIT_LIST_HEAD(&revoke_list); 284 285 while (len) { 286 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len); 287 288 set_new_dnode(&dn, cow_inode, NULL, NULL, 0); 289 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 290 if (ret && ret != -ENOENT) { 291 goto out; 292 } else if (ret == -ENOENT) { 293 ret = 0; 294 if (dn.max_level == 0) 295 goto out; 296 goto next; 297 } 298 299 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode), 300 len); 301 index = off; 302 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { 303 blkaddr = f2fs_data_blkaddr(&dn); 304 305 if (!__is_valid_data_blkaddr(blkaddr)) { 306 continue; 307 } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 308 DATA_GENERIC_ENHANCE)) { 309 f2fs_put_dnode(&dn); 310 ret = -EFSCORRUPTED; 311 f2fs_handle_error(sbi, 312 ERROR_INVALID_BLKADDR); 313 goto out; 314 } 315 316 new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS, 317 true, NULL); 318 319 ret = __replace_atomic_write_block(inode, index, blkaddr, 320 &new->old_addr, false); 321 if (ret) { 322 f2fs_put_dnode(&dn); 323 kmem_cache_free(revoke_entry_slab, new); 324 goto out; 325 } 326 327 f2fs_update_data_blkaddr(&dn, NULL_ADDR); 328 new->index = index; 329 list_add_tail(&new->list, &revoke_list); 330 } 331 f2fs_put_dnode(&dn); 332 next: 333 off += blen; 334 len -= blen; 335 } 336 337 out: 338 if (ret) 339 sbi->revoked_atomic_block += fi->atomic_write_cnt; 340 else 341 sbi->committed_atomic_block += fi->atomic_write_cnt; 342 343 __complete_revoke_list(inode, &revoke_list, ret ? true : false); 344 345 return ret; 346 } 347 348 int f2fs_commit_atomic_write(struct inode *inode) 349 { 350 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 351 struct f2fs_inode_info *fi = F2FS_I(inode); 352 int err; 353 354 err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 355 if (err) 356 return err; 357 358 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 359 f2fs_lock_op(sbi); 360 361 err = __f2fs_commit_atomic_write(inode); 362 363 f2fs_unlock_op(sbi); 364 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); 365 366 return err; 367 } 368 369 /* 370 * This function balances dirty node and dentry pages. 371 * In addition, it controls garbage collection. 372 */ 373 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) 374 { 375 if (time_to_inject(sbi, FAULT_CHECKPOINT)) { 376 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); 377 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); 378 } 379 380 /* balance_fs_bg is able to be pending */ 381 if (need && excess_cached_nats(sbi)) 382 f2fs_balance_fs_bg(sbi, false); 383 384 if (!f2fs_is_checkpoint_ready(sbi)) 385 return; 386 387 /* 388 * We should do GC or end up with checkpoint, if there are so many dirty 389 * dir/node pages without enough free segments. 390 */ 391 if (has_not_enough_free_secs(sbi, 0, 0)) { 392 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && 393 sbi->gc_thread->f2fs_gc_task) { 394 DEFINE_WAIT(wait); 395 396 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, 397 TASK_UNINTERRUPTIBLE); 398 wake_up(&sbi->gc_thread->gc_wait_queue_head); 399 io_schedule(); 400 finish_wait(&sbi->gc_thread->fggc_wq, &wait); 401 } else { 402 struct f2fs_gc_control gc_control = { 403 .victim_segno = NULL_SEGNO, 404 .init_gc_type = BG_GC, 405 .no_bg_gc = true, 406 .should_migrate_blocks = false, 407 .err_gc_skipped = false, 408 .nr_free_secs = 1 }; 409 f2fs_down_write(&sbi->gc_lock); 410 f2fs_gc(sbi, &gc_control); 411 } 412 } 413 } 414 415 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) 416 { 417 int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; 418 unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS); 419 unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA); 420 unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); 421 unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); 422 unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); 423 unsigned int threshold = sbi->blocks_per_seg * factor * 424 DEFAULT_DIRTY_THRESHOLD; 425 unsigned int global_threshold = threshold * 3 / 2; 426 427 if (dents >= threshold || qdata >= threshold || 428 nodes >= threshold || meta >= threshold || 429 imeta >= threshold) 430 return true; 431 return dents + qdata + nodes + meta + imeta > global_threshold; 432 } 433 434 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) 435 { 436 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 437 return; 438 439 /* try to shrink extent cache when there is no enough memory */ 440 if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) 441 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 442 443 /* check the # of cached NAT entries */ 444 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) 445 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); 446 447 if (!f2fs_available_free_memory(sbi, FREE_NIDS)) 448 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); 449 else 450 f2fs_build_free_nids(sbi, false, false); 451 452 if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) || 453 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi)) 454 goto do_sync; 455 456 /* there is background inflight IO or foreground operation recently */ 457 if (is_inflight_io(sbi, REQ_TIME) || 458 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem))) 459 return; 460 461 /* exceed periodical checkpoint timeout threshold */ 462 if (f2fs_time_over(sbi, CP_TIME)) 463 goto do_sync; 464 465 /* checkpoint is the only way to shrink partial cached entries */ 466 if (f2fs_available_free_memory(sbi, NAT_ENTRIES) && 467 f2fs_available_free_memory(sbi, INO_ENTRIES)) 468 return; 469 470 do_sync: 471 if (test_opt(sbi, DATA_FLUSH) && from_bg) { 472 struct blk_plug plug; 473 474 mutex_lock(&sbi->flush_lock); 475 476 blk_start_plug(&plug); 477 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false); 478 blk_finish_plug(&plug); 479 480 mutex_unlock(&sbi->flush_lock); 481 } 482 f2fs_sync_fs(sbi->sb, 1); 483 stat_inc_bg_cp_count(sbi->stat_info); 484 } 485 486 static int __submit_flush_wait(struct f2fs_sb_info *sbi, 487 struct block_device *bdev) 488 { 489 int ret = blkdev_issue_flush(bdev); 490 491 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), 492 test_opt(sbi, FLUSH_MERGE), ret); 493 return ret; 494 } 495 496 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) 497 { 498 int ret = 0; 499 int i; 500 501 if (!f2fs_is_multi_device(sbi)) 502 return __submit_flush_wait(sbi, sbi->sb->s_bdev); 503 504 for (i = 0; i < sbi->s_ndevs; i++) { 505 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) 506 continue; 507 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 508 if (ret) 509 break; 510 } 511 return ret; 512 } 513 514 static int issue_flush_thread(void *data) 515 { 516 struct f2fs_sb_info *sbi = data; 517 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 518 wait_queue_head_t *q = &fcc->flush_wait_queue; 519 repeat: 520 if (kthread_should_stop()) 521 return 0; 522 523 if (!llist_empty(&fcc->issue_list)) { 524 struct flush_cmd *cmd, *next; 525 int ret; 526 527 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 528 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 529 530 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); 531 532 ret = submit_flush_wait(sbi, cmd->ino); 533 atomic_inc(&fcc->issued_flush); 534 535 llist_for_each_entry_safe(cmd, next, 536 fcc->dispatch_list, llnode) { 537 cmd->ret = ret; 538 complete(&cmd->wait); 539 } 540 fcc->dispatch_list = NULL; 541 } 542 543 wait_event_interruptible(*q, 544 kthread_should_stop() || !llist_empty(&fcc->issue_list)); 545 goto repeat; 546 } 547 548 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) 549 { 550 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 551 struct flush_cmd cmd; 552 int ret; 553 554 if (test_opt(sbi, NOBARRIER)) 555 return 0; 556 557 if (!test_opt(sbi, FLUSH_MERGE)) { 558 atomic_inc(&fcc->queued_flush); 559 ret = submit_flush_wait(sbi, ino); 560 atomic_dec(&fcc->queued_flush); 561 atomic_inc(&fcc->issued_flush); 562 return ret; 563 } 564 565 if (atomic_inc_return(&fcc->queued_flush) == 1 || 566 f2fs_is_multi_device(sbi)) { 567 ret = submit_flush_wait(sbi, ino); 568 atomic_dec(&fcc->queued_flush); 569 570 atomic_inc(&fcc->issued_flush); 571 return ret; 572 } 573 574 cmd.ino = ino; 575 init_completion(&cmd.wait); 576 577 llist_add(&cmd.llnode, &fcc->issue_list); 578 579 /* 580 * update issue_list before we wake up issue_flush thread, this 581 * smp_mb() pairs with another barrier in ___wait_event(), see 582 * more details in comments of waitqueue_active(). 583 */ 584 smp_mb(); 585 586 if (waitqueue_active(&fcc->flush_wait_queue)) 587 wake_up(&fcc->flush_wait_queue); 588 589 if (fcc->f2fs_issue_flush) { 590 wait_for_completion(&cmd.wait); 591 atomic_dec(&fcc->queued_flush); 592 } else { 593 struct llist_node *list; 594 595 list = llist_del_all(&fcc->issue_list); 596 if (!list) { 597 wait_for_completion(&cmd.wait); 598 atomic_dec(&fcc->queued_flush); 599 } else { 600 struct flush_cmd *tmp, *next; 601 602 ret = submit_flush_wait(sbi, ino); 603 604 llist_for_each_entry_safe(tmp, next, list, llnode) { 605 if (tmp == &cmd) { 606 cmd.ret = ret; 607 atomic_dec(&fcc->queued_flush); 608 continue; 609 } 610 tmp->ret = ret; 611 complete(&tmp->wait); 612 } 613 } 614 } 615 616 return cmd.ret; 617 } 618 619 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) 620 { 621 dev_t dev = sbi->sb->s_bdev->bd_dev; 622 struct flush_cmd_control *fcc; 623 int err = 0; 624 625 if (SM_I(sbi)->fcc_info) { 626 fcc = SM_I(sbi)->fcc_info; 627 if (fcc->f2fs_issue_flush) 628 return err; 629 goto init_thread; 630 } 631 632 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); 633 if (!fcc) 634 return -ENOMEM; 635 atomic_set(&fcc->issued_flush, 0); 636 atomic_set(&fcc->queued_flush, 0); 637 init_waitqueue_head(&fcc->flush_wait_queue); 638 init_llist_head(&fcc->issue_list); 639 SM_I(sbi)->fcc_info = fcc; 640 if (!test_opt(sbi, FLUSH_MERGE)) 641 return err; 642 643 init_thread: 644 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 645 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 646 if (IS_ERR(fcc->f2fs_issue_flush)) { 647 err = PTR_ERR(fcc->f2fs_issue_flush); 648 kfree(fcc); 649 SM_I(sbi)->fcc_info = NULL; 650 return err; 651 } 652 653 return err; 654 } 655 656 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) 657 { 658 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 659 660 if (fcc && fcc->f2fs_issue_flush) { 661 struct task_struct *flush_thread = fcc->f2fs_issue_flush; 662 663 fcc->f2fs_issue_flush = NULL; 664 kthread_stop(flush_thread); 665 } 666 if (free) { 667 kfree(fcc); 668 SM_I(sbi)->fcc_info = NULL; 669 } 670 } 671 672 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) 673 { 674 int ret = 0, i; 675 676 if (!f2fs_is_multi_device(sbi)) 677 return 0; 678 679 if (test_opt(sbi, NOBARRIER)) 680 return 0; 681 682 for (i = 1; i < sbi->s_ndevs; i++) { 683 int count = DEFAULT_RETRY_IO_COUNT; 684 685 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) 686 continue; 687 688 do { 689 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 690 if (ret) 691 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); 692 } while (ret && --count); 693 694 if (ret) { 695 f2fs_stop_checkpoint(sbi, false, 696 STOP_CP_REASON_FLUSH_FAIL); 697 break; 698 } 699 700 spin_lock(&sbi->dev_lock); 701 f2fs_clear_bit(i, (char *)&sbi->dirty_device); 702 spin_unlock(&sbi->dev_lock); 703 } 704 705 return ret; 706 } 707 708 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 709 enum dirty_type dirty_type) 710 { 711 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 712 713 /* need not be added */ 714 if (IS_CURSEG(sbi, segno)) 715 return; 716 717 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) 718 dirty_i->nr_dirty[dirty_type]++; 719 720 if (dirty_type == DIRTY) { 721 struct seg_entry *sentry = get_seg_entry(sbi, segno); 722 enum dirty_type t = sentry->type; 723 724 if (unlikely(t >= DIRTY)) { 725 f2fs_bug_on(sbi, 1); 726 return; 727 } 728 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 729 dirty_i->nr_dirty[t]++; 730 731 if (__is_large_section(sbi)) { 732 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 733 block_t valid_blocks = 734 get_valid_blocks(sbi, segno, true); 735 736 f2fs_bug_on(sbi, unlikely(!valid_blocks || 737 valid_blocks == CAP_BLKS_PER_SEC(sbi))); 738 739 if (!IS_CURSEC(sbi, secno)) 740 set_bit(secno, dirty_i->dirty_secmap); 741 } 742 } 743 } 744 745 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 746 enum dirty_type dirty_type) 747 { 748 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 749 block_t valid_blocks; 750 751 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) 752 dirty_i->nr_dirty[dirty_type]--; 753 754 if (dirty_type == DIRTY) { 755 struct seg_entry *sentry = get_seg_entry(sbi, segno); 756 enum dirty_type t = sentry->type; 757 758 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) 759 dirty_i->nr_dirty[t]--; 760 761 valid_blocks = get_valid_blocks(sbi, segno, true); 762 if (valid_blocks == 0) { 763 clear_bit(GET_SEC_FROM_SEG(sbi, segno), 764 dirty_i->victim_secmap); 765 #ifdef CONFIG_F2FS_CHECK_FS 766 clear_bit(segno, SIT_I(sbi)->invalid_segmap); 767 #endif 768 } 769 if (__is_large_section(sbi)) { 770 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 771 772 if (!valid_blocks || 773 valid_blocks == CAP_BLKS_PER_SEC(sbi)) { 774 clear_bit(secno, dirty_i->dirty_secmap); 775 return; 776 } 777 778 if (!IS_CURSEC(sbi, secno)) 779 set_bit(secno, dirty_i->dirty_secmap); 780 } 781 } 782 } 783 784 /* 785 * Should not occur error such as -ENOMEM. 786 * Adding dirty entry into seglist is not critical operation. 787 * If a given segment is one of current working segments, it won't be added. 788 */ 789 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) 790 { 791 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 792 unsigned short valid_blocks, ckpt_valid_blocks; 793 unsigned int usable_blocks; 794 795 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) 796 return; 797 798 usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); 799 mutex_lock(&dirty_i->seglist_lock); 800 801 valid_blocks = get_valid_blocks(sbi, segno, false); 802 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false); 803 804 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || 805 ckpt_valid_blocks == usable_blocks)) { 806 __locate_dirty_segment(sbi, segno, PRE); 807 __remove_dirty_segment(sbi, segno, DIRTY); 808 } else if (valid_blocks < usable_blocks) { 809 __locate_dirty_segment(sbi, segno, DIRTY); 810 } else { 811 /* Recovery routine with SSR needs this */ 812 __remove_dirty_segment(sbi, segno, DIRTY); 813 } 814 815 mutex_unlock(&dirty_i->seglist_lock); 816 } 817 818 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ 819 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) 820 { 821 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 822 unsigned int segno; 823 824 mutex_lock(&dirty_i->seglist_lock); 825 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 826 if (get_valid_blocks(sbi, segno, false)) 827 continue; 828 if (IS_CURSEG(sbi, segno)) 829 continue; 830 __locate_dirty_segment(sbi, segno, PRE); 831 __remove_dirty_segment(sbi, segno, DIRTY); 832 } 833 mutex_unlock(&dirty_i->seglist_lock); 834 } 835 836 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) 837 { 838 int ovp_hole_segs = 839 (overprovision_segments(sbi) - reserved_segments(sbi)); 840 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; 841 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 842 block_t holes[2] = {0, 0}; /* DATA and NODE */ 843 block_t unusable; 844 struct seg_entry *se; 845 unsigned int segno; 846 847 mutex_lock(&dirty_i->seglist_lock); 848 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 849 se = get_seg_entry(sbi, segno); 850 if (IS_NODESEG(se->type)) 851 holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) - 852 se->valid_blocks; 853 else 854 holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) - 855 se->valid_blocks; 856 } 857 mutex_unlock(&dirty_i->seglist_lock); 858 859 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; 860 if (unusable > ovp_holes) 861 return unusable - ovp_holes; 862 return 0; 863 } 864 865 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) 866 { 867 int ovp_hole_segs = 868 (overprovision_segments(sbi) - reserved_segments(sbi)); 869 if (unusable > F2FS_OPTION(sbi).unusable_cap) 870 return -EAGAIN; 871 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && 872 dirty_segments(sbi) > ovp_hole_segs) 873 return -EAGAIN; 874 return 0; 875 } 876 877 /* This is only used by SBI_CP_DISABLED */ 878 static unsigned int get_free_segment(struct f2fs_sb_info *sbi) 879 { 880 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 881 unsigned int segno = 0; 882 883 mutex_lock(&dirty_i->seglist_lock); 884 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 885 if (get_valid_blocks(sbi, segno, false)) 886 continue; 887 if (get_ckpt_valid_blocks(sbi, segno, false)) 888 continue; 889 mutex_unlock(&dirty_i->seglist_lock); 890 return segno; 891 } 892 mutex_unlock(&dirty_i->seglist_lock); 893 return NULL_SEGNO; 894 } 895 896 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, 897 struct block_device *bdev, block_t lstart, 898 block_t start, block_t len) 899 { 900 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 901 struct list_head *pend_list; 902 struct discard_cmd *dc; 903 904 f2fs_bug_on(sbi, !len); 905 906 pend_list = &dcc->pend_list[plist_idx(len)]; 907 908 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL); 909 INIT_LIST_HEAD(&dc->list); 910 dc->bdev = bdev; 911 dc->lstart = lstart; 912 dc->start = start; 913 dc->len = len; 914 dc->ref = 0; 915 dc->state = D_PREP; 916 dc->queued = 0; 917 dc->error = 0; 918 init_completion(&dc->wait); 919 list_add_tail(&dc->list, pend_list); 920 spin_lock_init(&dc->lock); 921 dc->bio_ref = 0; 922 atomic_inc(&dcc->discard_cmd_cnt); 923 dcc->undiscard_blks += len; 924 925 return dc; 926 } 927 928 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, 929 struct block_device *bdev, block_t lstart, 930 block_t start, block_t len, 931 struct rb_node *parent, struct rb_node **p, 932 bool leftmost) 933 { 934 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 935 struct discard_cmd *dc; 936 937 dc = __create_discard_cmd(sbi, bdev, lstart, start, len); 938 939 rb_link_node(&dc->rb_node, parent, p); 940 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); 941 942 return dc; 943 } 944 945 static void __detach_discard_cmd(struct discard_cmd_control *dcc, 946 struct discard_cmd *dc) 947 { 948 if (dc->state == D_DONE) 949 atomic_sub(dc->queued, &dcc->queued_discard); 950 951 list_del(&dc->list); 952 rb_erase_cached(&dc->rb_node, &dcc->root); 953 dcc->undiscard_blks -= dc->len; 954 955 kmem_cache_free(discard_cmd_slab, dc); 956 957 atomic_dec(&dcc->discard_cmd_cnt); 958 } 959 960 static void __remove_discard_cmd(struct f2fs_sb_info *sbi, 961 struct discard_cmd *dc) 962 { 963 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 964 unsigned long flags; 965 966 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); 967 968 spin_lock_irqsave(&dc->lock, flags); 969 if (dc->bio_ref) { 970 spin_unlock_irqrestore(&dc->lock, flags); 971 return; 972 } 973 spin_unlock_irqrestore(&dc->lock, flags); 974 975 f2fs_bug_on(sbi, dc->ref); 976 977 if (dc->error == -EOPNOTSUPP) 978 dc->error = 0; 979 980 if (dc->error) 981 printk_ratelimited( 982 "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", 983 KERN_INFO, sbi->sb->s_id, 984 dc->lstart, dc->start, dc->len, dc->error); 985 __detach_discard_cmd(dcc, dc); 986 } 987 988 static void f2fs_submit_discard_endio(struct bio *bio) 989 { 990 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 991 unsigned long flags; 992 993 spin_lock_irqsave(&dc->lock, flags); 994 if (!dc->error) 995 dc->error = blk_status_to_errno(bio->bi_status); 996 dc->bio_ref--; 997 if (!dc->bio_ref && dc->state == D_SUBMIT) { 998 dc->state = D_DONE; 999 complete_all(&dc->wait); 1000 } 1001 spin_unlock_irqrestore(&dc->lock, flags); 1002 bio_put(bio); 1003 } 1004 1005 static void __check_sit_bitmap(struct f2fs_sb_info *sbi, 1006 block_t start, block_t end) 1007 { 1008 #ifdef CONFIG_F2FS_CHECK_FS 1009 struct seg_entry *sentry; 1010 unsigned int segno; 1011 block_t blk = start; 1012 unsigned long offset, size, max_blocks = sbi->blocks_per_seg; 1013 unsigned long *map; 1014 1015 while (blk < end) { 1016 segno = GET_SEGNO(sbi, blk); 1017 sentry = get_seg_entry(sbi, segno); 1018 offset = GET_BLKOFF_FROM_SEG0(sbi, blk); 1019 1020 if (end < START_BLOCK(sbi, segno + 1)) 1021 size = GET_BLKOFF_FROM_SEG0(sbi, end); 1022 else 1023 size = max_blocks; 1024 map = (unsigned long *)(sentry->cur_valid_map); 1025 offset = __find_rev_next_bit(map, size, offset); 1026 f2fs_bug_on(sbi, offset != size); 1027 blk = START_BLOCK(sbi, segno + 1); 1028 } 1029 #endif 1030 } 1031 1032 static void __init_discard_policy(struct f2fs_sb_info *sbi, 1033 struct discard_policy *dpolicy, 1034 int discard_type, unsigned int granularity) 1035 { 1036 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1037 1038 /* common policy */ 1039 dpolicy->type = discard_type; 1040 dpolicy->sync = true; 1041 dpolicy->ordered = false; 1042 dpolicy->granularity = granularity; 1043 1044 dpolicy->max_requests = dcc->max_discard_request; 1045 dpolicy->io_aware_gran = MAX_PLIST_NUM; 1046 dpolicy->timeout = false; 1047 1048 if (discard_type == DPOLICY_BG) { 1049 dpolicy->min_interval = dcc->min_discard_issue_time; 1050 dpolicy->mid_interval = dcc->mid_discard_issue_time; 1051 dpolicy->max_interval = dcc->max_discard_issue_time; 1052 dpolicy->io_aware = true; 1053 dpolicy->sync = false; 1054 dpolicy->ordered = true; 1055 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { 1056 dpolicy->granularity = 1; 1057 if (atomic_read(&dcc->discard_cmd_cnt)) 1058 dpolicy->max_interval = 1059 dcc->min_discard_issue_time; 1060 } 1061 } else if (discard_type == DPOLICY_FORCE) { 1062 dpolicy->min_interval = dcc->min_discard_issue_time; 1063 dpolicy->mid_interval = dcc->mid_discard_issue_time; 1064 dpolicy->max_interval = dcc->max_discard_issue_time; 1065 dpolicy->io_aware = false; 1066 } else if (discard_type == DPOLICY_FSTRIM) { 1067 dpolicy->io_aware = false; 1068 } else if (discard_type == DPOLICY_UMOUNT) { 1069 dpolicy->io_aware = false; 1070 /* we need to issue all to keep CP_TRIMMED_FLAG */ 1071 dpolicy->granularity = 1; 1072 dpolicy->timeout = true; 1073 } 1074 } 1075 1076 static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1077 struct block_device *bdev, block_t lstart, 1078 block_t start, block_t len); 1079 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 1080 static int __submit_discard_cmd(struct f2fs_sb_info *sbi, 1081 struct discard_policy *dpolicy, 1082 struct discard_cmd *dc, 1083 unsigned int *issued) 1084 { 1085 struct block_device *bdev = dc->bdev; 1086 unsigned int max_discard_blocks = 1087 SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); 1088 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1089 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1090 &(dcc->fstrim_list) : &(dcc->wait_list); 1091 blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0; 1092 block_t lstart, start, len, total_len; 1093 int err = 0; 1094 1095 if (dc->state != D_PREP) 1096 return 0; 1097 1098 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1099 return 0; 1100 1101 trace_f2fs_issue_discard(bdev, dc->start, dc->len); 1102 1103 lstart = dc->lstart; 1104 start = dc->start; 1105 len = dc->len; 1106 total_len = len; 1107 1108 dc->len = 0; 1109 1110 while (total_len && *issued < dpolicy->max_requests && !err) { 1111 struct bio *bio = NULL; 1112 unsigned long flags; 1113 bool last = true; 1114 1115 if (len > max_discard_blocks) { 1116 len = max_discard_blocks; 1117 last = false; 1118 } 1119 1120 (*issued)++; 1121 if (*issued == dpolicy->max_requests) 1122 last = true; 1123 1124 dc->len += len; 1125 1126 if (time_to_inject(sbi, FAULT_DISCARD)) { 1127 f2fs_show_injection_info(sbi, FAULT_DISCARD); 1128 err = -EIO; 1129 goto submit; 1130 } 1131 err = __blkdev_issue_discard(bdev, 1132 SECTOR_FROM_BLOCK(start), 1133 SECTOR_FROM_BLOCK(len), 1134 GFP_NOFS, &bio); 1135 submit: 1136 if (err) { 1137 spin_lock_irqsave(&dc->lock, flags); 1138 if (dc->state == D_PARTIAL) 1139 dc->state = D_SUBMIT; 1140 spin_unlock_irqrestore(&dc->lock, flags); 1141 1142 break; 1143 } 1144 1145 f2fs_bug_on(sbi, !bio); 1146 1147 /* 1148 * should keep before submission to avoid D_DONE 1149 * right away 1150 */ 1151 spin_lock_irqsave(&dc->lock, flags); 1152 if (last) 1153 dc->state = D_SUBMIT; 1154 else 1155 dc->state = D_PARTIAL; 1156 dc->bio_ref++; 1157 spin_unlock_irqrestore(&dc->lock, flags); 1158 1159 atomic_inc(&dcc->queued_discard); 1160 dc->queued++; 1161 list_move_tail(&dc->list, wait_list); 1162 1163 /* sanity check on discard range */ 1164 __check_sit_bitmap(sbi, lstart, lstart + len); 1165 1166 bio->bi_private = dc; 1167 bio->bi_end_io = f2fs_submit_discard_endio; 1168 bio->bi_opf |= flag; 1169 submit_bio(bio); 1170 1171 atomic_inc(&dcc->issued_discard); 1172 1173 f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1); 1174 1175 lstart += len; 1176 start += len; 1177 total_len -= len; 1178 len = total_len; 1179 } 1180 1181 if (!err && len) { 1182 dcc->undiscard_blks -= len; 1183 __update_discard_tree_range(sbi, bdev, lstart, start, len); 1184 } 1185 return err; 1186 } 1187 1188 static void __insert_discard_tree(struct f2fs_sb_info *sbi, 1189 struct block_device *bdev, block_t lstart, 1190 block_t start, block_t len, 1191 struct rb_node **insert_p, 1192 struct rb_node *insert_parent) 1193 { 1194 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1195 struct rb_node **p; 1196 struct rb_node *parent = NULL; 1197 bool leftmost = true; 1198 1199 if (insert_p && insert_parent) { 1200 parent = insert_parent; 1201 p = insert_p; 1202 goto do_insert; 1203 } 1204 1205 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, 1206 lstart, &leftmost); 1207 do_insert: 1208 __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, 1209 p, leftmost); 1210 } 1211 1212 static void __relocate_discard_cmd(struct discard_cmd_control *dcc, 1213 struct discard_cmd *dc) 1214 { 1215 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); 1216 } 1217 1218 static void __punch_discard_cmd(struct f2fs_sb_info *sbi, 1219 struct discard_cmd *dc, block_t blkaddr) 1220 { 1221 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1222 struct discard_info di = dc->di; 1223 bool modified = false; 1224 1225 if (dc->state == D_DONE || dc->len == 1) { 1226 __remove_discard_cmd(sbi, dc); 1227 return; 1228 } 1229 1230 dcc->undiscard_blks -= di.len; 1231 1232 if (blkaddr > di.lstart) { 1233 dc->len = blkaddr - dc->lstart; 1234 dcc->undiscard_blks += dc->len; 1235 __relocate_discard_cmd(dcc, dc); 1236 modified = true; 1237 } 1238 1239 if (blkaddr < di.lstart + di.len - 1) { 1240 if (modified) { 1241 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, 1242 di.start + blkaddr + 1 - di.lstart, 1243 di.lstart + di.len - 1 - blkaddr, 1244 NULL, NULL); 1245 } else { 1246 dc->lstart++; 1247 dc->len--; 1248 dc->start++; 1249 dcc->undiscard_blks += dc->len; 1250 __relocate_discard_cmd(dcc, dc); 1251 } 1252 } 1253 } 1254 1255 static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1256 struct block_device *bdev, block_t lstart, 1257 block_t start, block_t len) 1258 { 1259 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1260 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 1261 struct discard_cmd *dc; 1262 struct discard_info di = {0}; 1263 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1264 unsigned int max_discard_blocks = 1265 SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); 1266 block_t end = lstart + len; 1267 1268 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1269 NULL, lstart, 1270 (struct rb_entry **)&prev_dc, 1271 (struct rb_entry **)&next_dc, 1272 &insert_p, &insert_parent, true, NULL); 1273 if (dc) 1274 prev_dc = dc; 1275 1276 if (!prev_dc) { 1277 di.lstart = lstart; 1278 di.len = next_dc ? next_dc->lstart - lstart : len; 1279 di.len = min(di.len, len); 1280 di.start = start; 1281 } 1282 1283 while (1) { 1284 struct rb_node *node; 1285 bool merged = false; 1286 struct discard_cmd *tdc = NULL; 1287 1288 if (prev_dc) { 1289 di.lstart = prev_dc->lstart + prev_dc->len; 1290 if (di.lstart < lstart) 1291 di.lstart = lstart; 1292 if (di.lstart >= end) 1293 break; 1294 1295 if (!next_dc || next_dc->lstart > end) 1296 di.len = end - di.lstart; 1297 else 1298 di.len = next_dc->lstart - di.lstart; 1299 di.start = start + di.lstart - lstart; 1300 } 1301 1302 if (!di.len) 1303 goto next; 1304 1305 if (prev_dc && prev_dc->state == D_PREP && 1306 prev_dc->bdev == bdev && 1307 __is_discard_back_mergeable(&di, &prev_dc->di, 1308 max_discard_blocks)) { 1309 prev_dc->di.len += di.len; 1310 dcc->undiscard_blks += di.len; 1311 __relocate_discard_cmd(dcc, prev_dc); 1312 di = prev_dc->di; 1313 tdc = prev_dc; 1314 merged = true; 1315 } 1316 1317 if (next_dc && next_dc->state == D_PREP && 1318 next_dc->bdev == bdev && 1319 __is_discard_front_mergeable(&di, &next_dc->di, 1320 max_discard_blocks)) { 1321 next_dc->di.lstart = di.lstart; 1322 next_dc->di.len += di.len; 1323 next_dc->di.start = di.start; 1324 dcc->undiscard_blks += di.len; 1325 __relocate_discard_cmd(dcc, next_dc); 1326 if (tdc) 1327 __remove_discard_cmd(sbi, tdc); 1328 merged = true; 1329 } 1330 1331 if (!merged) { 1332 __insert_discard_tree(sbi, bdev, di.lstart, di.start, 1333 di.len, NULL, NULL); 1334 } 1335 next: 1336 prev_dc = next_dc; 1337 if (!prev_dc) 1338 break; 1339 1340 node = rb_next(&prev_dc->rb_node); 1341 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1342 } 1343 } 1344 1345 static int __queue_discard_cmd(struct f2fs_sb_info *sbi, 1346 struct block_device *bdev, block_t blkstart, block_t blklen) 1347 { 1348 block_t lblkstart = blkstart; 1349 1350 if (!f2fs_bdev_support_discard(bdev)) 1351 return 0; 1352 1353 trace_f2fs_queue_discard(bdev, blkstart, blklen); 1354 1355 if (f2fs_is_multi_device(sbi)) { 1356 int devi = f2fs_target_device_index(sbi, blkstart); 1357 1358 blkstart -= FDEV(devi).start_blk; 1359 } 1360 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); 1361 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); 1362 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); 1363 return 0; 1364 } 1365 1366 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, 1367 struct discard_policy *dpolicy) 1368 { 1369 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1370 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 1371 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1372 struct discard_cmd *dc; 1373 struct blk_plug plug; 1374 unsigned int pos = dcc->next_pos; 1375 unsigned int issued = 0; 1376 bool io_interrupted = false; 1377 1378 mutex_lock(&dcc->cmd_lock); 1379 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1380 NULL, pos, 1381 (struct rb_entry **)&prev_dc, 1382 (struct rb_entry **)&next_dc, 1383 &insert_p, &insert_parent, true, NULL); 1384 if (!dc) 1385 dc = next_dc; 1386 1387 blk_start_plug(&plug); 1388 1389 while (dc) { 1390 struct rb_node *node; 1391 int err = 0; 1392 1393 if (dc->state != D_PREP) 1394 goto next; 1395 1396 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { 1397 io_interrupted = true; 1398 break; 1399 } 1400 1401 dcc->next_pos = dc->lstart + dc->len; 1402 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); 1403 1404 if (issued >= dpolicy->max_requests) 1405 break; 1406 next: 1407 node = rb_next(&dc->rb_node); 1408 if (err) 1409 __remove_discard_cmd(sbi, dc); 1410 dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1411 } 1412 1413 blk_finish_plug(&plug); 1414 1415 if (!dc) 1416 dcc->next_pos = 0; 1417 1418 mutex_unlock(&dcc->cmd_lock); 1419 1420 if (!issued && io_interrupted) 1421 issued = -1; 1422 1423 return issued; 1424 } 1425 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, 1426 struct discard_policy *dpolicy); 1427 1428 static int __issue_discard_cmd(struct f2fs_sb_info *sbi, 1429 struct discard_policy *dpolicy) 1430 { 1431 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1432 struct list_head *pend_list; 1433 struct discard_cmd *dc, *tmp; 1434 struct blk_plug plug; 1435 int i, issued; 1436 bool io_interrupted = false; 1437 1438 if (dpolicy->timeout) 1439 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); 1440 1441 retry: 1442 issued = 0; 1443 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1444 if (dpolicy->timeout && 1445 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) 1446 break; 1447 1448 if (i + 1 < dpolicy->granularity) 1449 break; 1450 1451 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) 1452 return __issue_discard_cmd_orderly(sbi, dpolicy); 1453 1454 pend_list = &dcc->pend_list[i]; 1455 1456 mutex_lock(&dcc->cmd_lock); 1457 if (list_empty(pend_list)) 1458 goto next; 1459 if (unlikely(dcc->rbtree_check)) 1460 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 1461 &dcc->root, false)); 1462 blk_start_plug(&plug); 1463 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1464 f2fs_bug_on(sbi, dc->state != D_PREP); 1465 1466 if (dpolicy->timeout && 1467 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) 1468 break; 1469 1470 if (dpolicy->io_aware && i < dpolicy->io_aware_gran && 1471 !is_idle(sbi, DISCARD_TIME)) { 1472 io_interrupted = true; 1473 break; 1474 } 1475 1476 __submit_discard_cmd(sbi, dpolicy, dc, &issued); 1477 1478 if (issued >= dpolicy->max_requests) 1479 break; 1480 } 1481 blk_finish_plug(&plug); 1482 next: 1483 mutex_unlock(&dcc->cmd_lock); 1484 1485 if (issued >= dpolicy->max_requests || io_interrupted) 1486 break; 1487 } 1488 1489 if (dpolicy->type == DPOLICY_UMOUNT && issued) { 1490 __wait_all_discard_cmd(sbi, dpolicy); 1491 goto retry; 1492 } 1493 1494 if (!issued && io_interrupted) 1495 issued = -1; 1496 1497 return issued; 1498 } 1499 1500 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) 1501 { 1502 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1503 struct list_head *pend_list; 1504 struct discard_cmd *dc, *tmp; 1505 int i; 1506 bool dropped = false; 1507 1508 mutex_lock(&dcc->cmd_lock); 1509 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1510 pend_list = &dcc->pend_list[i]; 1511 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1512 f2fs_bug_on(sbi, dc->state != D_PREP); 1513 __remove_discard_cmd(sbi, dc); 1514 dropped = true; 1515 } 1516 } 1517 mutex_unlock(&dcc->cmd_lock); 1518 1519 return dropped; 1520 } 1521 1522 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) 1523 { 1524 __drop_discard_cmd(sbi); 1525 } 1526 1527 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, 1528 struct discard_cmd *dc) 1529 { 1530 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1531 unsigned int len = 0; 1532 1533 wait_for_completion_io(&dc->wait); 1534 mutex_lock(&dcc->cmd_lock); 1535 f2fs_bug_on(sbi, dc->state != D_DONE); 1536 dc->ref--; 1537 if (!dc->ref) { 1538 if (!dc->error) 1539 len = dc->len; 1540 __remove_discard_cmd(sbi, dc); 1541 } 1542 mutex_unlock(&dcc->cmd_lock); 1543 1544 return len; 1545 } 1546 1547 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, 1548 struct discard_policy *dpolicy, 1549 block_t start, block_t end) 1550 { 1551 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1552 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1553 &(dcc->fstrim_list) : &(dcc->wait_list); 1554 struct discard_cmd *dc = NULL, *iter, *tmp; 1555 unsigned int trimmed = 0; 1556 1557 next: 1558 dc = NULL; 1559 1560 mutex_lock(&dcc->cmd_lock); 1561 list_for_each_entry_safe(iter, tmp, wait_list, list) { 1562 if (iter->lstart + iter->len <= start || end <= iter->lstart) 1563 continue; 1564 if (iter->len < dpolicy->granularity) 1565 continue; 1566 if (iter->state == D_DONE && !iter->ref) { 1567 wait_for_completion_io(&iter->wait); 1568 if (!iter->error) 1569 trimmed += iter->len; 1570 __remove_discard_cmd(sbi, iter); 1571 } else { 1572 iter->ref++; 1573 dc = iter; 1574 break; 1575 } 1576 } 1577 mutex_unlock(&dcc->cmd_lock); 1578 1579 if (dc) { 1580 trimmed += __wait_one_discard_bio(sbi, dc); 1581 goto next; 1582 } 1583 1584 return trimmed; 1585 } 1586 1587 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, 1588 struct discard_policy *dpolicy) 1589 { 1590 struct discard_policy dp; 1591 unsigned int discard_blks; 1592 1593 if (dpolicy) 1594 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); 1595 1596 /* wait all */ 1597 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); 1598 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); 1599 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); 1600 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); 1601 1602 return discard_blks; 1603 } 1604 1605 /* This should be covered by global mutex, &sit_i->sentry_lock */ 1606 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) 1607 { 1608 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1609 struct discard_cmd *dc; 1610 bool need_wait = false; 1611 1612 mutex_lock(&dcc->cmd_lock); 1613 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, 1614 NULL, blkaddr); 1615 if (dc) { 1616 if (dc->state == D_PREP) { 1617 __punch_discard_cmd(sbi, dc, blkaddr); 1618 } else { 1619 dc->ref++; 1620 need_wait = true; 1621 } 1622 } 1623 mutex_unlock(&dcc->cmd_lock); 1624 1625 if (need_wait) 1626 __wait_one_discard_bio(sbi, dc); 1627 } 1628 1629 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) 1630 { 1631 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1632 1633 if (dcc && dcc->f2fs_issue_discard) { 1634 struct task_struct *discard_thread = dcc->f2fs_issue_discard; 1635 1636 dcc->f2fs_issue_discard = NULL; 1637 kthread_stop(discard_thread); 1638 } 1639 } 1640 1641 /* This comes from f2fs_put_super */ 1642 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) 1643 { 1644 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1645 struct discard_policy dpolicy; 1646 bool dropped; 1647 1648 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 1649 dcc->discard_granularity); 1650 __issue_discard_cmd(sbi, &dpolicy); 1651 dropped = __drop_discard_cmd(sbi); 1652 1653 /* just to make sure there is no pending discard commands */ 1654 __wait_all_discard_cmd(sbi, NULL); 1655 1656 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); 1657 return dropped; 1658 } 1659 1660 static int issue_discard_thread(void *data) 1661 { 1662 struct f2fs_sb_info *sbi = data; 1663 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1664 wait_queue_head_t *q = &dcc->discard_wait_queue; 1665 struct discard_policy dpolicy; 1666 unsigned int wait_ms = dcc->min_discard_issue_time; 1667 int issued; 1668 1669 set_freezable(); 1670 1671 do { 1672 if (sbi->gc_mode == GC_URGENT_HIGH || 1673 !f2fs_available_free_memory(sbi, DISCARD_CACHE)) 1674 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); 1675 else 1676 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, 1677 dcc->discard_granularity); 1678 1679 if (!atomic_read(&dcc->discard_cmd_cnt)) 1680 wait_ms = dpolicy.max_interval; 1681 1682 wait_event_interruptible_timeout(*q, 1683 kthread_should_stop() || freezing(current) || 1684 dcc->discard_wake, 1685 msecs_to_jiffies(wait_ms)); 1686 1687 if (dcc->discard_wake) 1688 dcc->discard_wake = 0; 1689 1690 /* clean up pending candidates before going to sleep */ 1691 if (atomic_read(&dcc->queued_discard)) 1692 __wait_all_discard_cmd(sbi, NULL); 1693 1694 if (try_to_freeze()) 1695 continue; 1696 if (f2fs_readonly(sbi->sb)) 1697 continue; 1698 if (kthread_should_stop()) 1699 return 0; 1700 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 1701 wait_ms = dpolicy.max_interval; 1702 continue; 1703 } 1704 if (!atomic_read(&dcc->discard_cmd_cnt)) 1705 continue; 1706 1707 sb_start_intwrite(sbi->sb); 1708 1709 issued = __issue_discard_cmd(sbi, &dpolicy); 1710 if (issued > 0) { 1711 __wait_all_discard_cmd(sbi, &dpolicy); 1712 wait_ms = dpolicy.min_interval; 1713 } else if (issued == -1) { 1714 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); 1715 if (!wait_ms) 1716 wait_ms = dpolicy.mid_interval; 1717 } else { 1718 wait_ms = dpolicy.max_interval; 1719 } 1720 1721 sb_end_intwrite(sbi->sb); 1722 1723 } while (!kthread_should_stop()); 1724 return 0; 1725 } 1726 1727 #ifdef CONFIG_BLK_DEV_ZONED 1728 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, 1729 struct block_device *bdev, block_t blkstart, block_t blklen) 1730 { 1731 sector_t sector, nr_sects; 1732 block_t lblkstart = blkstart; 1733 int devi = 0; 1734 1735 if (f2fs_is_multi_device(sbi)) { 1736 devi = f2fs_target_device_index(sbi, blkstart); 1737 if (blkstart < FDEV(devi).start_blk || 1738 blkstart > FDEV(devi).end_blk) { 1739 f2fs_err(sbi, "Invalid block %x", blkstart); 1740 return -EIO; 1741 } 1742 blkstart -= FDEV(devi).start_blk; 1743 } 1744 1745 /* For sequential zones, reset the zone write pointer */ 1746 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { 1747 sector = SECTOR_FROM_BLOCK(blkstart); 1748 nr_sects = SECTOR_FROM_BLOCK(blklen); 1749 1750 if (sector & (bdev_zone_sectors(bdev) - 1) || 1751 nr_sects != bdev_zone_sectors(bdev)) { 1752 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", 1753 devi, sbi->s_ndevs ? FDEV(devi).path : "", 1754 blkstart, blklen); 1755 return -EIO; 1756 } 1757 trace_f2fs_issue_reset_zone(bdev, blkstart); 1758 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 1759 sector, nr_sects, GFP_NOFS); 1760 } 1761 1762 /* For conventional zones, use regular discard if supported */ 1763 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); 1764 } 1765 #endif 1766 1767 static int __issue_discard_async(struct f2fs_sb_info *sbi, 1768 struct block_device *bdev, block_t blkstart, block_t blklen) 1769 { 1770 #ifdef CONFIG_BLK_DEV_ZONED 1771 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) 1772 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); 1773 #endif 1774 return __queue_discard_cmd(sbi, bdev, blkstart, blklen); 1775 } 1776 1777 static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 1778 block_t blkstart, block_t blklen) 1779 { 1780 sector_t start = blkstart, len = 0; 1781 struct block_device *bdev; 1782 struct seg_entry *se; 1783 unsigned int offset; 1784 block_t i; 1785 int err = 0; 1786 1787 bdev = f2fs_target_device(sbi, blkstart, NULL); 1788 1789 for (i = blkstart; i < blkstart + blklen; i++, len++) { 1790 if (i != start) { 1791 struct block_device *bdev2 = 1792 f2fs_target_device(sbi, i, NULL); 1793 1794 if (bdev2 != bdev) { 1795 err = __issue_discard_async(sbi, bdev, 1796 start, len); 1797 if (err) 1798 return err; 1799 bdev = bdev2; 1800 start = i; 1801 len = 0; 1802 } 1803 } 1804 1805 se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); 1806 offset = GET_BLKOFF_FROM_SEG0(sbi, i); 1807 1808 if (f2fs_block_unit_discard(sbi) && 1809 !f2fs_test_and_set_bit(offset, se->discard_map)) 1810 sbi->discard_blks--; 1811 } 1812 1813 if (len) 1814 err = __issue_discard_async(sbi, bdev, start, len); 1815 return err; 1816 } 1817 1818 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, 1819 bool check_only) 1820 { 1821 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 1822 int max_blocks = sbi->blocks_per_seg; 1823 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 1824 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 1825 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 1826 unsigned long *discard_map = (unsigned long *)se->discard_map; 1827 unsigned long *dmap = SIT_I(sbi)->tmp_map; 1828 unsigned int start = 0, end = -1; 1829 bool force = (cpc->reason & CP_DISCARD); 1830 struct discard_entry *de = NULL; 1831 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; 1832 int i; 1833 1834 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) || 1835 !f2fs_block_unit_discard(sbi)) 1836 return false; 1837 1838 if (!force) { 1839 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || 1840 SM_I(sbi)->dcc_info->nr_discards >= 1841 SM_I(sbi)->dcc_info->max_discards) 1842 return false; 1843 } 1844 1845 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 1846 for (i = 0; i < entries; i++) 1847 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 1848 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 1849 1850 while (force || SM_I(sbi)->dcc_info->nr_discards <= 1851 SM_I(sbi)->dcc_info->max_discards) { 1852 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 1853 if (start >= max_blocks) 1854 break; 1855 1856 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 1857 if (force && start && end != max_blocks 1858 && (end - start) < cpc->trim_minlen) 1859 continue; 1860 1861 if (check_only) 1862 return true; 1863 1864 if (!de) { 1865 de = f2fs_kmem_cache_alloc(discard_entry_slab, 1866 GFP_F2FS_ZERO, true, NULL); 1867 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); 1868 list_add_tail(&de->list, head); 1869 } 1870 1871 for (i = start; i < end; i++) 1872 __set_bit_le(i, (void *)de->discard_map); 1873 1874 SM_I(sbi)->dcc_info->nr_discards += end - start; 1875 } 1876 return false; 1877 } 1878 1879 static void release_discard_addr(struct discard_entry *entry) 1880 { 1881 list_del(&entry->list); 1882 kmem_cache_free(discard_entry_slab, entry); 1883 } 1884 1885 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) 1886 { 1887 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 1888 struct discard_entry *entry, *this; 1889 1890 /* drop caches */ 1891 list_for_each_entry_safe(entry, this, head, list) 1892 release_discard_addr(entry); 1893 } 1894 1895 /* 1896 * Should call f2fs_clear_prefree_segments after checkpoint is done. 1897 */ 1898 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 1899 { 1900 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1901 unsigned int segno; 1902 1903 mutex_lock(&dirty_i->seglist_lock); 1904 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) 1905 __set_test_and_free(sbi, segno, false); 1906 mutex_unlock(&dirty_i->seglist_lock); 1907 } 1908 1909 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, 1910 struct cp_control *cpc) 1911 { 1912 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1913 struct list_head *head = &dcc->entry_list; 1914 struct discard_entry *entry, *this; 1915 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1916 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 1917 unsigned int start = 0, end = -1; 1918 unsigned int secno, start_segno; 1919 bool force = (cpc->reason & CP_DISCARD); 1920 bool section_alignment = F2FS_OPTION(sbi).discard_unit == 1921 DISCARD_UNIT_SECTION; 1922 1923 if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) 1924 section_alignment = true; 1925 1926 mutex_lock(&dirty_i->seglist_lock); 1927 1928 while (1) { 1929 int i; 1930 1931 if (section_alignment && end != -1) 1932 end--; 1933 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); 1934 if (start >= MAIN_SEGS(sbi)) 1935 break; 1936 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), 1937 start + 1); 1938 1939 if (section_alignment) { 1940 start = rounddown(start, sbi->segs_per_sec); 1941 end = roundup(end, sbi->segs_per_sec); 1942 } 1943 1944 for (i = start; i < end; i++) { 1945 if (test_and_clear_bit(i, prefree_map)) 1946 dirty_i->nr_dirty[PRE]--; 1947 } 1948 1949 if (!f2fs_realtime_discard_enable(sbi)) 1950 continue; 1951 1952 if (force && start >= cpc->trim_start && 1953 (end - 1) <= cpc->trim_end) 1954 continue; 1955 1956 if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { 1957 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 1958 (end - start) << sbi->log_blocks_per_seg); 1959 continue; 1960 } 1961 next: 1962 secno = GET_SEC_FROM_SEG(sbi, start); 1963 start_segno = GET_SEG_FROM_SEC(sbi, secno); 1964 if (!IS_CURSEC(sbi, secno) && 1965 !get_valid_blocks(sbi, start, true)) 1966 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), 1967 sbi->segs_per_sec << sbi->log_blocks_per_seg); 1968 1969 start = start_segno + sbi->segs_per_sec; 1970 if (start < end) 1971 goto next; 1972 else 1973 end = start - 1; 1974 } 1975 mutex_unlock(&dirty_i->seglist_lock); 1976 1977 if (!f2fs_block_unit_discard(sbi)) 1978 goto wakeup; 1979 1980 /* send small discards */ 1981 list_for_each_entry_safe(entry, this, head, list) { 1982 unsigned int cur_pos = 0, next_pos, len, total_len = 0; 1983 bool is_valid = test_bit_le(0, entry->discard_map); 1984 1985 find_next: 1986 if (is_valid) { 1987 next_pos = find_next_zero_bit_le(entry->discard_map, 1988 sbi->blocks_per_seg, cur_pos); 1989 len = next_pos - cur_pos; 1990 1991 if (f2fs_sb_has_blkzoned(sbi) || 1992 (force && len < cpc->trim_minlen)) 1993 goto skip; 1994 1995 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, 1996 len); 1997 total_len += len; 1998 } else { 1999 next_pos = find_next_bit_le(entry->discard_map, 2000 sbi->blocks_per_seg, cur_pos); 2001 } 2002 skip: 2003 cur_pos = next_pos; 2004 is_valid = !is_valid; 2005 2006 if (cur_pos < sbi->blocks_per_seg) 2007 goto find_next; 2008 2009 release_discard_addr(entry); 2010 dcc->nr_discards -= total_len; 2011 } 2012 2013 wakeup: 2014 wake_up_discard_thread(sbi, false); 2015 } 2016 2017 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) 2018 { 2019 dev_t dev = sbi->sb->s_bdev->bd_dev; 2020 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2021 int err = 0; 2022 2023 if (!f2fs_realtime_discard_enable(sbi)) 2024 return 0; 2025 2026 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, 2027 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); 2028 if (IS_ERR(dcc->f2fs_issue_discard)) 2029 err = PTR_ERR(dcc->f2fs_issue_discard); 2030 2031 return err; 2032 } 2033 2034 static int create_discard_cmd_control(struct f2fs_sb_info *sbi) 2035 { 2036 struct discard_cmd_control *dcc; 2037 int err = 0, i; 2038 2039 if (SM_I(sbi)->dcc_info) { 2040 dcc = SM_I(sbi)->dcc_info; 2041 goto init_thread; 2042 } 2043 2044 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); 2045 if (!dcc) 2046 return -ENOMEM; 2047 2048 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; 2049 if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) 2050 dcc->discard_granularity = sbi->blocks_per_seg; 2051 else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) 2052 dcc->discard_granularity = BLKS_PER_SEC(sbi); 2053 2054 INIT_LIST_HEAD(&dcc->entry_list); 2055 for (i = 0; i < MAX_PLIST_NUM; i++) 2056 INIT_LIST_HEAD(&dcc->pend_list[i]); 2057 INIT_LIST_HEAD(&dcc->wait_list); 2058 INIT_LIST_HEAD(&dcc->fstrim_list); 2059 mutex_init(&dcc->cmd_lock); 2060 atomic_set(&dcc->issued_discard, 0); 2061 atomic_set(&dcc->queued_discard, 0); 2062 atomic_set(&dcc->discard_cmd_cnt, 0); 2063 dcc->nr_discards = 0; 2064 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; 2065 dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; 2066 dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; 2067 dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; 2068 dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; 2069 dcc->undiscard_blks = 0; 2070 dcc->next_pos = 0; 2071 dcc->root = RB_ROOT_CACHED; 2072 dcc->rbtree_check = false; 2073 2074 init_waitqueue_head(&dcc->discard_wait_queue); 2075 SM_I(sbi)->dcc_info = dcc; 2076 init_thread: 2077 err = f2fs_start_discard_thread(sbi); 2078 if (err) { 2079 kfree(dcc); 2080 SM_I(sbi)->dcc_info = NULL; 2081 } 2082 2083 return err; 2084 } 2085 2086 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) 2087 { 2088 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2089 2090 if (!dcc) 2091 return; 2092 2093 f2fs_stop_discard_thread(sbi); 2094 2095 /* 2096 * Recovery can cache discard commands, so in error path of 2097 * fill_super(), it needs to give a chance to handle them. 2098 */ 2099 if (unlikely(atomic_read(&dcc->discard_cmd_cnt))) 2100 f2fs_issue_discard_timeout(sbi); 2101 2102 kfree(dcc); 2103 SM_I(sbi)->dcc_info = NULL; 2104 } 2105 2106 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 2107 { 2108 struct sit_info *sit_i = SIT_I(sbi); 2109 2110 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { 2111 sit_i->dirty_sentries++; 2112 return false; 2113 } 2114 2115 return true; 2116 } 2117 2118 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 2119 unsigned int segno, int modified) 2120 { 2121 struct seg_entry *se = get_seg_entry(sbi, segno); 2122 2123 se->type = type; 2124 if (modified) 2125 __mark_sit_entry_dirty(sbi, segno); 2126 } 2127 2128 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi, 2129 block_t blkaddr) 2130 { 2131 unsigned int segno = GET_SEGNO(sbi, blkaddr); 2132 2133 if (segno == NULL_SEGNO) 2134 return 0; 2135 return get_seg_entry(sbi, segno)->mtime; 2136 } 2137 2138 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, 2139 unsigned long long old_mtime) 2140 { 2141 struct seg_entry *se; 2142 unsigned int segno = GET_SEGNO(sbi, blkaddr); 2143 unsigned long long ctime = get_mtime(sbi, false); 2144 unsigned long long mtime = old_mtime ? old_mtime : ctime; 2145 2146 if (segno == NULL_SEGNO) 2147 return; 2148 2149 se = get_seg_entry(sbi, segno); 2150 2151 if (!se->mtime) 2152 se->mtime = mtime; 2153 else 2154 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime, 2155 se->valid_blocks + 1); 2156 2157 if (ctime > SIT_I(sbi)->max_mtime) 2158 SIT_I(sbi)->max_mtime = ctime; 2159 } 2160 2161 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) 2162 { 2163 struct seg_entry *se; 2164 unsigned int segno, offset; 2165 long int new_vblocks; 2166 bool exist; 2167 #ifdef CONFIG_F2FS_CHECK_FS 2168 bool mir_exist; 2169 #endif 2170 2171 segno = GET_SEGNO(sbi, blkaddr); 2172 2173 se = get_seg_entry(sbi, segno); 2174 new_vblocks = se->valid_blocks + del; 2175 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 2176 2177 f2fs_bug_on(sbi, (new_vblocks < 0 || 2178 (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); 2179 2180 se->valid_blocks = new_vblocks; 2181 2182 /* Update valid block bitmap */ 2183 if (del > 0) { 2184 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); 2185 #ifdef CONFIG_F2FS_CHECK_FS 2186 mir_exist = f2fs_test_and_set_bit(offset, 2187 se->cur_valid_map_mir); 2188 if (unlikely(exist != mir_exist)) { 2189 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", 2190 blkaddr, exist); 2191 f2fs_bug_on(sbi, 1); 2192 } 2193 #endif 2194 if (unlikely(exist)) { 2195 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", 2196 blkaddr); 2197 f2fs_bug_on(sbi, 1); 2198 se->valid_blocks--; 2199 del = 0; 2200 } 2201 2202 if (f2fs_block_unit_discard(sbi) && 2203 !f2fs_test_and_set_bit(offset, se->discard_map)) 2204 sbi->discard_blks--; 2205 2206 /* 2207 * SSR should never reuse block which is checkpointed 2208 * or newly invalidated. 2209 */ 2210 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 2211 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) 2212 se->ckpt_valid_blocks++; 2213 } 2214 } else { 2215 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); 2216 #ifdef CONFIG_F2FS_CHECK_FS 2217 mir_exist = f2fs_test_and_clear_bit(offset, 2218 se->cur_valid_map_mir); 2219 if (unlikely(exist != mir_exist)) { 2220 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", 2221 blkaddr, exist); 2222 f2fs_bug_on(sbi, 1); 2223 } 2224 #endif 2225 if (unlikely(!exist)) { 2226 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", 2227 blkaddr); 2228 f2fs_bug_on(sbi, 1); 2229 se->valid_blocks++; 2230 del = 0; 2231 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2232 /* 2233 * If checkpoints are off, we must not reuse data that 2234 * was used in the previous checkpoint. If it was used 2235 * before, we must track that to know how much space we 2236 * really have. 2237 */ 2238 if (f2fs_test_bit(offset, se->ckpt_valid_map)) { 2239 spin_lock(&sbi->stat_lock); 2240 sbi->unusable_block_count++; 2241 spin_unlock(&sbi->stat_lock); 2242 } 2243 } 2244 2245 if (f2fs_block_unit_discard(sbi) && 2246 f2fs_test_and_clear_bit(offset, se->discard_map)) 2247 sbi->discard_blks++; 2248 } 2249 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 2250 se->ckpt_valid_blocks += del; 2251 2252 __mark_sit_entry_dirty(sbi, segno); 2253 2254 /* update total number of valid blocks to be written in ckpt area */ 2255 SIT_I(sbi)->written_valid_blocks += del; 2256 2257 if (__is_large_section(sbi)) 2258 get_sec_entry(sbi, segno)->valid_blocks += del; 2259 } 2260 2261 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 2262 { 2263 unsigned int segno = GET_SEGNO(sbi, addr); 2264 struct sit_info *sit_i = SIT_I(sbi); 2265 2266 f2fs_bug_on(sbi, addr == NULL_ADDR); 2267 if (addr == NEW_ADDR || addr == COMPRESS_ADDR) 2268 return; 2269 2270 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); 2271 f2fs_invalidate_compress_page(sbi, addr); 2272 2273 /* add it into sit main buffer */ 2274 down_write(&sit_i->sentry_lock); 2275 2276 update_segment_mtime(sbi, addr, 0); 2277 update_sit_entry(sbi, addr, -1); 2278 2279 /* add it into dirty seglist */ 2280 locate_dirty_segment(sbi, segno); 2281 2282 up_write(&sit_i->sentry_lock); 2283 } 2284 2285 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 2286 { 2287 struct sit_info *sit_i = SIT_I(sbi); 2288 unsigned int segno, offset; 2289 struct seg_entry *se; 2290 bool is_cp = false; 2291 2292 if (!__is_valid_data_blkaddr(blkaddr)) 2293 return true; 2294 2295 down_read(&sit_i->sentry_lock); 2296 2297 segno = GET_SEGNO(sbi, blkaddr); 2298 se = get_seg_entry(sbi, segno); 2299 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 2300 2301 if (f2fs_test_bit(offset, se->ckpt_valid_map)) 2302 is_cp = true; 2303 2304 up_read(&sit_i->sentry_lock); 2305 2306 return is_cp; 2307 } 2308 2309 /* 2310 * This function should be resided under the curseg_mutex lock 2311 */ 2312 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, 2313 struct f2fs_summary *sum) 2314 { 2315 struct curseg_info *curseg = CURSEG_I(sbi, type); 2316 void *addr = curseg->sum_blk; 2317 2318 addr += curseg->next_blkoff * sizeof(struct f2fs_summary); 2319 memcpy(addr, sum, sizeof(struct f2fs_summary)); 2320 } 2321 2322 /* 2323 * Calculate the number of current summary pages for writing 2324 */ 2325 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) 2326 { 2327 int valid_sum_count = 0; 2328 int i, sum_in_page; 2329 2330 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 2331 if (sbi->ckpt->alloc_type[i] == SSR) 2332 valid_sum_count += sbi->blocks_per_seg; 2333 else { 2334 if (for_ra) 2335 valid_sum_count += le16_to_cpu( 2336 F2FS_CKPT(sbi)->cur_data_blkoff[i]); 2337 else 2338 valid_sum_count += curseg_blkoff(sbi, i); 2339 } 2340 } 2341 2342 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE - 2343 SUM_FOOTER_SIZE) / SUMMARY_SIZE; 2344 if (valid_sum_count <= sum_in_page) 2345 return 1; 2346 else if ((valid_sum_count - sum_in_page) <= 2347 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) 2348 return 2; 2349 return 3; 2350 } 2351 2352 /* 2353 * Caller should put this summary page 2354 */ 2355 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) 2356 { 2357 if (unlikely(f2fs_cp_error(sbi))) 2358 return ERR_PTR(-EIO); 2359 return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno)); 2360 } 2361 2362 void f2fs_update_meta_page(struct f2fs_sb_info *sbi, 2363 void *src, block_t blk_addr) 2364 { 2365 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 2366 2367 memcpy(page_address(page), src, PAGE_SIZE); 2368 set_page_dirty(page); 2369 f2fs_put_page(page, 1); 2370 } 2371 2372 static void write_sum_page(struct f2fs_sb_info *sbi, 2373 struct f2fs_summary_block *sum_blk, block_t blk_addr) 2374 { 2375 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); 2376 } 2377 2378 static void write_current_sum_page(struct f2fs_sb_info *sbi, 2379 int type, block_t blk_addr) 2380 { 2381 struct curseg_info *curseg = CURSEG_I(sbi, type); 2382 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 2383 struct f2fs_summary_block *src = curseg->sum_blk; 2384 struct f2fs_summary_block *dst; 2385 2386 dst = (struct f2fs_summary_block *)page_address(page); 2387 memset(dst, 0, PAGE_SIZE); 2388 2389 mutex_lock(&curseg->curseg_mutex); 2390 2391 down_read(&curseg->journal_rwsem); 2392 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE); 2393 up_read(&curseg->journal_rwsem); 2394 2395 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE); 2396 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE); 2397 2398 mutex_unlock(&curseg->curseg_mutex); 2399 2400 set_page_dirty(page); 2401 f2fs_put_page(page, 1); 2402 } 2403 2404 static int is_next_segment_free(struct f2fs_sb_info *sbi, 2405 struct curseg_info *curseg, int type) 2406 { 2407 unsigned int segno = curseg->segno + 1; 2408 struct free_segmap_info *free_i = FREE_I(sbi); 2409 2410 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 2411 return !test_bit(segno, free_i->free_segmap); 2412 return 0; 2413 } 2414 2415 /* 2416 * Find a new segment from the free segments bitmap to right order 2417 * This function should be returned with success, otherwise BUG 2418 */ 2419 static void get_new_segment(struct f2fs_sb_info *sbi, 2420 unsigned int *newseg, bool new_sec, int dir) 2421 { 2422 struct free_segmap_info *free_i = FREE_I(sbi); 2423 unsigned int segno, secno, zoneno; 2424 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 2425 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); 2426 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); 2427 unsigned int left_start = hint; 2428 bool init = true; 2429 int go_left = 0; 2430 int i; 2431 2432 spin_lock(&free_i->segmap_lock); 2433 2434 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 2435 segno = find_next_zero_bit(free_i->free_segmap, 2436 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); 2437 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) 2438 goto got_it; 2439 } 2440 find_other_zone: 2441 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); 2442 if (secno >= MAIN_SECS(sbi)) { 2443 if (dir == ALLOC_RIGHT) { 2444 secno = find_first_zero_bit(free_i->free_secmap, 2445 MAIN_SECS(sbi)); 2446 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); 2447 } else { 2448 go_left = 1; 2449 left_start = hint - 1; 2450 } 2451 } 2452 if (go_left == 0) 2453 goto skip_left; 2454 2455 while (test_bit(left_start, free_i->free_secmap)) { 2456 if (left_start > 0) { 2457 left_start--; 2458 continue; 2459 } 2460 left_start = find_first_zero_bit(free_i->free_secmap, 2461 MAIN_SECS(sbi)); 2462 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); 2463 break; 2464 } 2465 secno = left_start; 2466 skip_left: 2467 segno = GET_SEG_FROM_SEC(sbi, secno); 2468 zoneno = GET_ZONE_FROM_SEC(sbi, secno); 2469 2470 /* give up on finding another zone */ 2471 if (!init) 2472 goto got_it; 2473 if (sbi->secs_per_zone == 1) 2474 goto got_it; 2475 if (zoneno == old_zoneno) 2476 goto got_it; 2477 if (dir == ALLOC_LEFT) { 2478 if (!go_left && zoneno + 1 >= total_zones) 2479 goto got_it; 2480 if (go_left && zoneno == 0) 2481 goto got_it; 2482 } 2483 for (i = 0; i < NR_CURSEG_TYPE; i++) 2484 if (CURSEG_I(sbi, i)->zone == zoneno) 2485 break; 2486 2487 if (i < NR_CURSEG_TYPE) { 2488 /* zone is in user, try another */ 2489 if (go_left) 2490 hint = zoneno * sbi->secs_per_zone - 1; 2491 else if (zoneno + 1 >= total_zones) 2492 hint = 0; 2493 else 2494 hint = (zoneno + 1) * sbi->secs_per_zone; 2495 init = false; 2496 goto find_other_zone; 2497 } 2498 got_it: 2499 /* set it as dirty segment in free segmap */ 2500 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 2501 __set_inuse(sbi, segno); 2502 *newseg = segno; 2503 spin_unlock(&free_i->segmap_lock); 2504 } 2505 2506 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) 2507 { 2508 struct curseg_info *curseg = CURSEG_I(sbi, type); 2509 struct summary_footer *sum_footer; 2510 unsigned short seg_type = curseg->seg_type; 2511 2512 curseg->inited = true; 2513 curseg->segno = curseg->next_segno; 2514 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); 2515 curseg->next_blkoff = 0; 2516 curseg->next_segno = NULL_SEGNO; 2517 2518 sum_footer = &(curseg->sum_blk->footer); 2519 memset(sum_footer, 0, sizeof(struct summary_footer)); 2520 2521 sanity_check_seg_type(sbi, seg_type); 2522 2523 if (IS_DATASEG(seg_type)) 2524 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); 2525 if (IS_NODESEG(seg_type)) 2526 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); 2527 __set_sit_entry_type(sbi, seg_type, curseg->segno, modified); 2528 } 2529 2530 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) 2531 { 2532 struct curseg_info *curseg = CURSEG_I(sbi, type); 2533 unsigned short seg_type = curseg->seg_type; 2534 2535 sanity_check_seg_type(sbi, seg_type); 2536 if (f2fs_need_rand_seg(sbi)) 2537 return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); 2538 2539 /* if segs_per_sec is large than 1, we need to keep original policy. */ 2540 if (__is_large_section(sbi)) 2541 return curseg->segno; 2542 2543 /* inmem log may not locate on any segment after mount */ 2544 if (!curseg->inited) 2545 return 0; 2546 2547 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 2548 return 0; 2549 2550 if (test_opt(sbi, NOHEAP) && 2551 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))) 2552 return 0; 2553 2554 if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) 2555 return SIT_I(sbi)->last_victim[ALLOC_NEXT]; 2556 2557 /* find segments from 0 to reuse freed segments */ 2558 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) 2559 return 0; 2560 2561 return curseg->segno; 2562 } 2563 2564 /* 2565 * Allocate a current working segment. 2566 * This function always allocates a free segment in LFS manner. 2567 */ 2568 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) 2569 { 2570 struct curseg_info *curseg = CURSEG_I(sbi, type); 2571 unsigned short seg_type = curseg->seg_type; 2572 unsigned int segno = curseg->segno; 2573 int dir = ALLOC_LEFT; 2574 2575 if (curseg->inited) 2576 write_sum_page(sbi, curseg->sum_blk, 2577 GET_SUM_BLOCK(sbi, segno)); 2578 if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA) 2579 dir = ALLOC_RIGHT; 2580 2581 if (test_opt(sbi, NOHEAP)) 2582 dir = ALLOC_RIGHT; 2583 2584 segno = __get_next_segno(sbi, type); 2585 get_new_segment(sbi, &segno, new_sec, dir); 2586 curseg->next_segno = segno; 2587 reset_curseg(sbi, type, 1); 2588 curseg->alloc_type = LFS; 2589 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) 2590 curseg->fragment_remained_chunk = 2591 prandom_u32_max(sbi->max_fragment_chunk) + 1; 2592 } 2593 2594 static int __next_free_blkoff(struct f2fs_sb_info *sbi, 2595 int segno, block_t start) 2596 { 2597 struct seg_entry *se = get_seg_entry(sbi, segno); 2598 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 2599 unsigned long *target_map = SIT_I(sbi)->tmp_map; 2600 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 2601 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 2602 int i; 2603 2604 for (i = 0; i < entries; i++) 2605 target_map[i] = ckpt_map[i] | cur_map[i]; 2606 2607 return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); 2608 } 2609 2610 /* 2611 * If a segment is written by LFS manner, next block offset is just obtained 2612 * by increasing the current block offset. However, if a segment is written by 2613 * SSR manner, next block offset obtained by calling __next_free_blkoff 2614 */ 2615 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, 2616 struct curseg_info *seg) 2617 { 2618 if (seg->alloc_type == SSR) { 2619 seg->next_blkoff = 2620 __next_free_blkoff(sbi, seg->segno, 2621 seg->next_blkoff + 1); 2622 } else { 2623 seg->next_blkoff++; 2624 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) { 2625 /* To allocate block chunks in different sizes, use random number */ 2626 if (--seg->fragment_remained_chunk <= 0) { 2627 seg->fragment_remained_chunk = 2628 prandom_u32_max(sbi->max_fragment_chunk) + 1; 2629 seg->next_blkoff += 2630 prandom_u32_max(sbi->max_fragment_hole) + 1; 2631 } 2632 } 2633 } 2634 } 2635 2636 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) 2637 { 2638 return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; 2639 } 2640 2641 /* 2642 * This function always allocates a used segment(from dirty seglist) by SSR 2643 * manner, so it should recover the existing segment information of valid blocks 2644 */ 2645 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) 2646 { 2647 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2648 struct curseg_info *curseg = CURSEG_I(sbi, type); 2649 unsigned int new_segno = curseg->next_segno; 2650 struct f2fs_summary_block *sum_node; 2651 struct page *sum_page; 2652 2653 if (flush) 2654 write_sum_page(sbi, curseg->sum_blk, 2655 GET_SUM_BLOCK(sbi, curseg->segno)); 2656 2657 __set_test_and_inuse(sbi, new_segno); 2658 2659 mutex_lock(&dirty_i->seglist_lock); 2660 __remove_dirty_segment(sbi, new_segno, PRE); 2661 __remove_dirty_segment(sbi, new_segno, DIRTY); 2662 mutex_unlock(&dirty_i->seglist_lock); 2663 2664 reset_curseg(sbi, type, 1); 2665 curseg->alloc_type = SSR; 2666 curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); 2667 2668 sum_page = f2fs_get_sum_page(sbi, new_segno); 2669 if (IS_ERR(sum_page)) { 2670 /* GC won't be able to use stale summary pages by cp_error */ 2671 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); 2672 return; 2673 } 2674 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 2675 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); 2676 f2fs_put_page(sum_page, 1); 2677 } 2678 2679 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, 2680 int alloc_mode, unsigned long long age); 2681 2682 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, 2683 int target_type, int alloc_mode, 2684 unsigned long long age) 2685 { 2686 struct curseg_info *curseg = CURSEG_I(sbi, type); 2687 2688 curseg->seg_type = target_type; 2689 2690 if (get_ssr_segment(sbi, type, alloc_mode, age)) { 2691 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); 2692 2693 curseg->seg_type = se->type; 2694 change_curseg(sbi, type, true); 2695 } else { 2696 /* allocate cold segment by default */ 2697 curseg->seg_type = CURSEG_COLD_DATA; 2698 new_curseg(sbi, type, true); 2699 } 2700 stat_inc_seg_type(sbi, curseg); 2701 } 2702 2703 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) 2704 { 2705 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); 2706 2707 if (!sbi->am.atgc_enabled) 2708 return; 2709 2710 f2fs_down_read(&SM_I(sbi)->curseg_lock); 2711 2712 mutex_lock(&curseg->curseg_mutex); 2713 down_write(&SIT_I(sbi)->sentry_lock); 2714 2715 get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); 2716 2717 up_write(&SIT_I(sbi)->sentry_lock); 2718 mutex_unlock(&curseg->curseg_mutex); 2719 2720 f2fs_up_read(&SM_I(sbi)->curseg_lock); 2721 2722 } 2723 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) 2724 { 2725 __f2fs_init_atgc_curseg(sbi); 2726 } 2727 2728 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) 2729 { 2730 struct curseg_info *curseg = CURSEG_I(sbi, type); 2731 2732 mutex_lock(&curseg->curseg_mutex); 2733 if (!curseg->inited) 2734 goto out; 2735 2736 if (get_valid_blocks(sbi, curseg->segno, false)) { 2737 write_sum_page(sbi, curseg->sum_blk, 2738 GET_SUM_BLOCK(sbi, curseg->segno)); 2739 } else { 2740 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 2741 __set_test_and_free(sbi, curseg->segno, true); 2742 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 2743 } 2744 out: 2745 mutex_unlock(&curseg->curseg_mutex); 2746 } 2747 2748 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi) 2749 { 2750 __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); 2751 2752 if (sbi->am.atgc_enabled) 2753 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); 2754 } 2755 2756 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type) 2757 { 2758 struct curseg_info *curseg = CURSEG_I(sbi, type); 2759 2760 mutex_lock(&curseg->curseg_mutex); 2761 if (!curseg->inited) 2762 goto out; 2763 if (get_valid_blocks(sbi, curseg->segno, false)) 2764 goto out; 2765 2766 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 2767 __set_test_and_inuse(sbi, curseg->segno); 2768 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 2769 out: 2770 mutex_unlock(&curseg->curseg_mutex); 2771 } 2772 2773 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi) 2774 { 2775 __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); 2776 2777 if (sbi->am.atgc_enabled) 2778 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); 2779 } 2780 2781 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, 2782 int alloc_mode, unsigned long long age) 2783 { 2784 struct curseg_info *curseg = CURSEG_I(sbi, type); 2785 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 2786 unsigned segno = NULL_SEGNO; 2787 unsigned short seg_type = curseg->seg_type; 2788 int i, cnt; 2789 bool reversed = false; 2790 2791 sanity_check_seg_type(sbi, seg_type); 2792 2793 /* f2fs_need_SSR() already forces to do this */ 2794 if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { 2795 curseg->next_segno = segno; 2796 return 1; 2797 } 2798 2799 /* For node segments, let's do SSR more intensively */ 2800 if (IS_NODESEG(seg_type)) { 2801 if (seg_type >= CURSEG_WARM_NODE) { 2802 reversed = true; 2803 i = CURSEG_COLD_NODE; 2804 } else { 2805 i = CURSEG_HOT_NODE; 2806 } 2807 cnt = NR_CURSEG_NODE_TYPE; 2808 } else { 2809 if (seg_type >= CURSEG_WARM_DATA) { 2810 reversed = true; 2811 i = CURSEG_COLD_DATA; 2812 } else { 2813 i = CURSEG_HOT_DATA; 2814 } 2815 cnt = NR_CURSEG_DATA_TYPE; 2816 } 2817 2818 for (; cnt-- > 0; reversed ? i-- : i++) { 2819 if (i == seg_type) 2820 continue; 2821 if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { 2822 curseg->next_segno = segno; 2823 return 1; 2824 } 2825 } 2826 2827 /* find valid_blocks=0 in dirty list */ 2828 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2829 segno = get_free_segment(sbi); 2830 if (segno != NULL_SEGNO) { 2831 curseg->next_segno = segno; 2832 return 1; 2833 } 2834 } 2835 return 0; 2836 } 2837 2838 /* 2839 * flush out current segment and replace it with new segment 2840 * This function should be returned with success, otherwise BUG 2841 */ 2842 static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 2843 int type, bool force) 2844 { 2845 struct curseg_info *curseg = CURSEG_I(sbi, type); 2846 2847 if (force) 2848 new_curseg(sbi, type, true); 2849 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && 2850 curseg->seg_type == CURSEG_WARM_NODE) 2851 new_curseg(sbi, type, false); 2852 else if (curseg->alloc_type == LFS && 2853 is_next_segment_free(sbi, curseg, type) && 2854 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 2855 new_curseg(sbi, type, false); 2856 else if (f2fs_need_SSR(sbi) && 2857 get_ssr_segment(sbi, type, SSR, 0)) 2858 change_curseg(sbi, type, true); 2859 else 2860 new_curseg(sbi, type, false); 2861 2862 stat_inc_seg_type(sbi, curseg); 2863 } 2864 2865 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, 2866 unsigned int start, unsigned int end) 2867 { 2868 struct curseg_info *curseg = CURSEG_I(sbi, type); 2869 unsigned int segno; 2870 2871 f2fs_down_read(&SM_I(sbi)->curseg_lock); 2872 mutex_lock(&curseg->curseg_mutex); 2873 down_write(&SIT_I(sbi)->sentry_lock); 2874 2875 segno = CURSEG_I(sbi, type)->segno; 2876 if (segno < start || segno > end) 2877 goto unlock; 2878 2879 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) 2880 change_curseg(sbi, type, true); 2881 else 2882 new_curseg(sbi, type, true); 2883 2884 stat_inc_seg_type(sbi, curseg); 2885 2886 locate_dirty_segment(sbi, segno); 2887 unlock: 2888 up_write(&SIT_I(sbi)->sentry_lock); 2889 2890 if (segno != curseg->segno) 2891 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", 2892 type, segno, curseg->segno); 2893 2894 mutex_unlock(&curseg->curseg_mutex); 2895 f2fs_up_read(&SM_I(sbi)->curseg_lock); 2896 } 2897 2898 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, 2899 bool new_sec, bool force) 2900 { 2901 struct curseg_info *curseg = CURSEG_I(sbi, type); 2902 unsigned int old_segno; 2903 2904 if (!curseg->inited) 2905 goto alloc; 2906 2907 if (force || curseg->next_blkoff || 2908 get_valid_blocks(sbi, curseg->segno, new_sec)) 2909 goto alloc; 2910 2911 if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) 2912 return; 2913 alloc: 2914 old_segno = curseg->segno; 2915 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); 2916 locate_dirty_segment(sbi, old_segno); 2917 } 2918 2919 static void __allocate_new_section(struct f2fs_sb_info *sbi, 2920 int type, bool force) 2921 { 2922 __allocate_new_segment(sbi, type, true, force); 2923 } 2924 2925 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) 2926 { 2927 f2fs_down_read(&SM_I(sbi)->curseg_lock); 2928 down_write(&SIT_I(sbi)->sentry_lock); 2929 __allocate_new_section(sbi, type, force); 2930 up_write(&SIT_I(sbi)->sentry_lock); 2931 f2fs_up_read(&SM_I(sbi)->curseg_lock); 2932 } 2933 2934 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) 2935 { 2936 int i; 2937 2938 f2fs_down_read(&SM_I(sbi)->curseg_lock); 2939 down_write(&SIT_I(sbi)->sentry_lock); 2940 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) 2941 __allocate_new_segment(sbi, i, false, false); 2942 up_write(&SIT_I(sbi)->sentry_lock); 2943 f2fs_up_read(&SM_I(sbi)->curseg_lock); 2944 } 2945 2946 static const struct segment_allocation default_salloc_ops = { 2947 .allocate_segment = allocate_segment_by_default, 2948 }; 2949 2950 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, 2951 struct cp_control *cpc) 2952 { 2953 __u64 trim_start = cpc->trim_start; 2954 bool has_candidate = false; 2955 2956 down_write(&SIT_I(sbi)->sentry_lock); 2957 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { 2958 if (add_discard_addrs(sbi, cpc, true)) { 2959 has_candidate = true; 2960 break; 2961 } 2962 } 2963 up_write(&SIT_I(sbi)->sentry_lock); 2964 2965 cpc->trim_start = trim_start; 2966 return has_candidate; 2967 } 2968 2969 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, 2970 struct discard_policy *dpolicy, 2971 unsigned int start, unsigned int end) 2972 { 2973 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2974 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 2975 struct rb_node **insert_p = NULL, *insert_parent = NULL; 2976 struct discard_cmd *dc; 2977 struct blk_plug plug; 2978 int issued; 2979 unsigned int trimmed = 0; 2980 2981 next: 2982 issued = 0; 2983 2984 mutex_lock(&dcc->cmd_lock); 2985 if (unlikely(dcc->rbtree_check)) 2986 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 2987 &dcc->root, false)); 2988 2989 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 2990 NULL, start, 2991 (struct rb_entry **)&prev_dc, 2992 (struct rb_entry **)&next_dc, 2993 &insert_p, &insert_parent, true, NULL); 2994 if (!dc) 2995 dc = next_dc; 2996 2997 blk_start_plug(&plug); 2998 2999 while (dc && dc->lstart <= end) { 3000 struct rb_node *node; 3001 int err = 0; 3002 3003 if (dc->len < dpolicy->granularity) 3004 goto skip; 3005 3006 if (dc->state != D_PREP) { 3007 list_move_tail(&dc->list, &dcc->fstrim_list); 3008 goto skip; 3009 } 3010 3011 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); 3012 3013 if (issued >= dpolicy->max_requests) { 3014 start = dc->lstart + dc->len; 3015 3016 if (err) 3017 __remove_discard_cmd(sbi, dc); 3018 3019 blk_finish_plug(&plug); 3020 mutex_unlock(&dcc->cmd_lock); 3021 trimmed += __wait_all_discard_cmd(sbi, NULL); 3022 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); 3023 goto next; 3024 } 3025 skip: 3026 node = rb_next(&dc->rb_node); 3027 if (err) 3028 __remove_discard_cmd(sbi, dc); 3029 dc = rb_entry_safe(node, struct discard_cmd, rb_node); 3030 3031 if (fatal_signal_pending(current)) 3032 break; 3033 } 3034 3035 blk_finish_plug(&plug); 3036 mutex_unlock(&dcc->cmd_lock); 3037 3038 return trimmed; 3039 } 3040 3041 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 3042 { 3043 __u64 start = F2FS_BYTES_TO_BLK(range->start); 3044 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 3045 unsigned int start_segno, end_segno; 3046 block_t start_block, end_block; 3047 struct cp_control cpc; 3048 struct discard_policy dpolicy; 3049 unsigned long long trimmed = 0; 3050 int err = 0; 3051 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); 3052 3053 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 3054 return -EINVAL; 3055 3056 if (end < MAIN_BLKADDR(sbi)) 3057 goto out; 3058 3059 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 3060 f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); 3061 return -EFSCORRUPTED; 3062 } 3063 3064 /* start/end segment number in main_area */ 3065 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 3066 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 3067 GET_SEGNO(sbi, end); 3068 if (need_align) { 3069 start_segno = rounddown(start_segno, sbi->segs_per_sec); 3070 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; 3071 } 3072 3073 cpc.reason = CP_DISCARD; 3074 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); 3075 cpc.trim_start = start_segno; 3076 cpc.trim_end = end_segno; 3077 3078 if (sbi->discard_blks == 0) 3079 goto out; 3080 3081 f2fs_down_write(&sbi->gc_lock); 3082 err = f2fs_write_checkpoint(sbi, &cpc); 3083 f2fs_up_write(&sbi->gc_lock); 3084 if (err) 3085 goto out; 3086 3087 /* 3088 * We filed discard candidates, but actually we don't need to wait for 3089 * all of them, since they'll be issued in idle time along with runtime 3090 * discard option. User configuration looks like using runtime discard 3091 * or periodic fstrim instead of it. 3092 */ 3093 if (f2fs_realtime_discard_enable(sbi)) 3094 goto out; 3095 3096 start_block = START_BLOCK(sbi, start_segno); 3097 end_block = START_BLOCK(sbi, end_segno + 1); 3098 3099 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); 3100 trimmed = __issue_discard_cmd_range(sbi, &dpolicy, 3101 start_block, end_block); 3102 3103 trimmed += __wait_discard_cmd_range(sbi, &dpolicy, 3104 start_block, end_block); 3105 out: 3106 if (!err) 3107 range->len = F2FS_BLK_TO_BYTES(trimmed); 3108 return err; 3109 } 3110 3111 static bool __has_curseg_space(struct f2fs_sb_info *sbi, 3112 struct curseg_info *curseg) 3113 { 3114 return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi, 3115 curseg->segno); 3116 } 3117 3118 int f2fs_rw_hint_to_seg_type(enum rw_hint hint) 3119 { 3120 switch (hint) { 3121 case WRITE_LIFE_SHORT: 3122 return CURSEG_HOT_DATA; 3123 case WRITE_LIFE_EXTREME: 3124 return CURSEG_COLD_DATA; 3125 default: 3126 return CURSEG_WARM_DATA; 3127 } 3128 } 3129 3130 static int __get_segment_type_2(struct f2fs_io_info *fio) 3131 { 3132 if (fio->type == DATA) 3133 return CURSEG_HOT_DATA; 3134 else 3135 return CURSEG_HOT_NODE; 3136 } 3137 3138 static int __get_segment_type_4(struct f2fs_io_info *fio) 3139 { 3140 if (fio->type == DATA) { 3141 struct inode *inode = fio->page->mapping->host; 3142 3143 if (S_ISDIR(inode->i_mode)) 3144 return CURSEG_HOT_DATA; 3145 else 3146 return CURSEG_COLD_DATA; 3147 } else { 3148 if (IS_DNODE(fio->page) && is_cold_node(fio->page)) 3149 return CURSEG_WARM_NODE; 3150 else 3151 return CURSEG_COLD_NODE; 3152 } 3153 } 3154 3155 static int __get_segment_type_6(struct f2fs_io_info *fio) 3156 { 3157 if (fio->type == DATA) { 3158 struct inode *inode = fio->page->mapping->host; 3159 3160 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) 3161 return CURSEG_COLD_DATA_PINNED; 3162 3163 if (page_private_gcing(fio->page)) { 3164 if (fio->sbi->am.atgc_enabled && 3165 (fio->io_type == FS_DATA_IO) && 3166 (fio->sbi->gc_mode != GC_URGENT_HIGH)) 3167 return CURSEG_ALL_DATA_ATGC; 3168 else 3169 return CURSEG_COLD_DATA; 3170 } 3171 if (file_is_cold(inode) || f2fs_need_compress_data(inode)) 3172 return CURSEG_COLD_DATA; 3173 if (file_is_hot(inode) || 3174 is_inode_flag_set(inode, FI_HOT_DATA) || 3175 f2fs_is_cow_file(inode)) 3176 return CURSEG_HOT_DATA; 3177 return f2fs_rw_hint_to_seg_type(inode->i_write_hint); 3178 } else { 3179 if (IS_DNODE(fio->page)) 3180 return is_cold_node(fio->page) ? CURSEG_WARM_NODE : 3181 CURSEG_HOT_NODE; 3182 return CURSEG_COLD_NODE; 3183 } 3184 } 3185 3186 static int __get_segment_type(struct f2fs_io_info *fio) 3187 { 3188 int type = 0; 3189 3190 switch (F2FS_OPTION(fio->sbi).active_logs) { 3191 case 2: 3192 type = __get_segment_type_2(fio); 3193 break; 3194 case 4: 3195 type = __get_segment_type_4(fio); 3196 break; 3197 case 6: 3198 type = __get_segment_type_6(fio); 3199 break; 3200 default: 3201 f2fs_bug_on(fio->sbi, true); 3202 } 3203 3204 if (IS_HOT(type)) 3205 fio->temp = HOT; 3206 else if (IS_WARM(type)) 3207 fio->temp = WARM; 3208 else 3209 fio->temp = COLD; 3210 return type; 3211 } 3212 3213 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, 3214 block_t old_blkaddr, block_t *new_blkaddr, 3215 struct f2fs_summary *sum, int type, 3216 struct f2fs_io_info *fio) 3217 { 3218 struct sit_info *sit_i = SIT_I(sbi); 3219 struct curseg_info *curseg = CURSEG_I(sbi, type); 3220 unsigned long long old_mtime; 3221 bool from_gc = (type == CURSEG_ALL_DATA_ATGC); 3222 struct seg_entry *se = NULL; 3223 3224 f2fs_down_read(&SM_I(sbi)->curseg_lock); 3225 3226 mutex_lock(&curseg->curseg_mutex); 3227 down_write(&sit_i->sentry_lock); 3228 3229 if (from_gc) { 3230 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); 3231 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); 3232 sanity_check_seg_type(sbi, se->type); 3233 f2fs_bug_on(sbi, IS_NODESEG(se->type)); 3234 } 3235 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 3236 3237 f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg); 3238 3239 f2fs_wait_discard_bio(sbi, *new_blkaddr); 3240 3241 /* 3242 * __add_sum_entry should be resided under the curseg_mutex 3243 * because, this function updates a summary entry in the 3244 * current summary block. 3245 */ 3246 __add_sum_entry(sbi, type, sum); 3247 3248 __refresh_next_blkoff(sbi, curseg); 3249 3250 stat_inc_block_count(sbi, curseg); 3251 3252 if (from_gc) { 3253 old_mtime = get_segment_mtime(sbi, old_blkaddr); 3254 } else { 3255 update_segment_mtime(sbi, old_blkaddr, 0); 3256 old_mtime = 0; 3257 } 3258 update_segment_mtime(sbi, *new_blkaddr, old_mtime); 3259 3260 /* 3261 * SIT information should be updated before segment allocation, 3262 * since SSR needs latest valid block information. 3263 */ 3264 update_sit_entry(sbi, *new_blkaddr, 1); 3265 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 3266 update_sit_entry(sbi, old_blkaddr, -1); 3267 3268 if (!__has_curseg_space(sbi, curseg)) { 3269 if (from_gc) 3270 get_atssr_segment(sbi, type, se->type, 3271 AT_SSR, se->mtime); 3272 else 3273 sit_i->s_ops->allocate_segment(sbi, type, false); 3274 } 3275 /* 3276 * segment dirty status should be updated after segment allocation, 3277 * so we just need to update status only one time after previous 3278 * segment being closed. 3279 */ 3280 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 3281 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); 3282 3283 up_write(&sit_i->sentry_lock); 3284 3285 if (page && IS_NODESEG(type)) { 3286 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 3287 3288 f2fs_inode_chksum_set(sbi, page); 3289 } 3290 3291 if (fio) { 3292 struct f2fs_bio_info *io; 3293 3294 if (F2FS_IO_ALIGNED(sbi)) 3295 fio->retry = false; 3296 3297 INIT_LIST_HEAD(&fio->list); 3298 fio->in_list = true; 3299 io = sbi->write_io[fio->type] + fio->temp; 3300 spin_lock(&io->io_lock); 3301 list_add_tail(&fio->list, &io->io_list); 3302 spin_unlock(&io->io_lock); 3303 } 3304 3305 mutex_unlock(&curseg->curseg_mutex); 3306 3307 f2fs_up_read(&SM_I(sbi)->curseg_lock); 3308 } 3309 3310 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, 3311 block_t blkaddr, unsigned int blkcnt) 3312 { 3313 if (!f2fs_is_multi_device(sbi)) 3314 return; 3315 3316 while (1) { 3317 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr); 3318 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1; 3319 3320 /* update device state for fsync */ 3321 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO); 3322 3323 /* update device state for checkpoint */ 3324 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { 3325 spin_lock(&sbi->dev_lock); 3326 f2fs_set_bit(devidx, (char *)&sbi->dirty_device); 3327 spin_unlock(&sbi->dev_lock); 3328 } 3329 3330 if (blkcnt <= blks) 3331 break; 3332 blkcnt -= blks; 3333 blkaddr += blks; 3334 } 3335 } 3336 3337 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 3338 { 3339 int type = __get_segment_type(fio); 3340 bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); 3341 3342 if (keep_order) 3343 f2fs_down_read(&fio->sbi->io_order_lock); 3344 reallocate: 3345 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, 3346 &fio->new_blkaddr, sum, type, fio); 3347 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) { 3348 invalidate_mapping_pages(META_MAPPING(fio->sbi), 3349 fio->old_blkaddr, fio->old_blkaddr); 3350 f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr); 3351 } 3352 3353 /* writeout dirty page into bdev */ 3354 f2fs_submit_page_write(fio); 3355 if (fio->retry) { 3356 fio->old_blkaddr = fio->new_blkaddr; 3357 goto reallocate; 3358 } 3359 3360 f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); 3361 3362 if (keep_order) 3363 f2fs_up_read(&fio->sbi->io_order_lock); 3364 } 3365 3366 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, 3367 enum iostat_type io_type) 3368 { 3369 struct f2fs_io_info fio = { 3370 .sbi = sbi, 3371 .type = META, 3372 .temp = HOT, 3373 .op = REQ_OP_WRITE, 3374 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, 3375 .old_blkaddr = page->index, 3376 .new_blkaddr = page->index, 3377 .page = page, 3378 .encrypted_page = NULL, 3379 .in_list = false, 3380 }; 3381 3382 if (unlikely(page->index >= MAIN_BLKADDR(sbi))) 3383 fio.op_flags &= ~REQ_META; 3384 3385 set_page_writeback(page); 3386 ClearPageError(page); 3387 f2fs_submit_page_write(&fio); 3388 3389 stat_inc_meta_count(sbi, page->index); 3390 f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); 3391 } 3392 3393 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) 3394 { 3395 struct f2fs_summary sum; 3396 3397 set_summary(&sum, nid, 0, 0); 3398 do_write_page(&sum, fio); 3399 3400 f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE); 3401 } 3402 3403 void f2fs_outplace_write_data(struct dnode_of_data *dn, 3404 struct f2fs_io_info *fio) 3405 { 3406 struct f2fs_sb_info *sbi = fio->sbi; 3407 struct f2fs_summary sum; 3408 3409 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 3410 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); 3411 do_write_page(&sum, fio); 3412 f2fs_update_data_blkaddr(dn, fio->new_blkaddr); 3413 3414 f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE); 3415 } 3416 3417 int f2fs_inplace_write_data(struct f2fs_io_info *fio) 3418 { 3419 int err; 3420 struct f2fs_sb_info *sbi = fio->sbi; 3421 unsigned int segno; 3422 3423 fio->new_blkaddr = fio->old_blkaddr; 3424 /* i/o temperature is needed for passing down write hints */ 3425 __get_segment_type(fio); 3426 3427 segno = GET_SEGNO(sbi, fio->new_blkaddr); 3428 3429 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { 3430 set_sbi_flag(sbi, SBI_NEED_FSCK); 3431 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", 3432 __func__, segno); 3433 err = -EFSCORRUPTED; 3434 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); 3435 goto drop_bio; 3436 } 3437 3438 if (f2fs_cp_error(sbi)) { 3439 err = -EIO; 3440 goto drop_bio; 3441 } 3442 3443 if (fio->post_read) 3444 invalidate_mapping_pages(META_MAPPING(sbi), 3445 fio->new_blkaddr, fio->new_blkaddr); 3446 3447 stat_inc_inplace_blocks(fio->sbi); 3448 3449 if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) 3450 err = f2fs_merge_page_bio(fio); 3451 else 3452 err = f2fs_submit_page_bio(fio); 3453 if (!err) { 3454 f2fs_update_device_state(fio->sbi, fio->ino, 3455 fio->new_blkaddr, 1); 3456 f2fs_update_iostat(fio->sbi, fio->page->mapping->host, 3457 fio->io_type, F2FS_BLKSIZE); 3458 } 3459 3460 return err; 3461 drop_bio: 3462 if (fio->bio && *(fio->bio)) { 3463 struct bio *bio = *(fio->bio); 3464 3465 bio->bi_status = BLK_STS_IOERR; 3466 bio_endio(bio); 3467 *(fio->bio) = NULL; 3468 } 3469 return err; 3470 } 3471 3472 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, 3473 unsigned int segno) 3474 { 3475 int i; 3476 3477 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { 3478 if (CURSEG_I(sbi, i)->segno == segno) 3479 break; 3480 } 3481 return i; 3482 } 3483 3484 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 3485 block_t old_blkaddr, block_t new_blkaddr, 3486 bool recover_curseg, bool recover_newaddr, 3487 bool from_gc) 3488 { 3489 struct sit_info *sit_i = SIT_I(sbi); 3490 struct curseg_info *curseg; 3491 unsigned int segno, old_cursegno; 3492 struct seg_entry *se; 3493 int type; 3494 unsigned short old_blkoff; 3495 unsigned char old_alloc_type; 3496 3497 segno = GET_SEGNO(sbi, new_blkaddr); 3498 se = get_seg_entry(sbi, segno); 3499 type = se->type; 3500 3501 f2fs_down_write(&SM_I(sbi)->curseg_lock); 3502 3503 if (!recover_curseg) { 3504 /* for recovery flow */ 3505 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { 3506 if (old_blkaddr == NULL_ADDR) 3507 type = CURSEG_COLD_DATA; 3508 else 3509 type = CURSEG_WARM_DATA; 3510 } 3511 } else { 3512 if (IS_CURSEG(sbi, segno)) { 3513 /* se->type is volatile as SSR allocation */ 3514 type = __f2fs_get_curseg(sbi, segno); 3515 f2fs_bug_on(sbi, type == NO_CHECK_TYPE); 3516 } else { 3517 type = CURSEG_WARM_DATA; 3518 } 3519 } 3520 3521 f2fs_bug_on(sbi, !IS_DATASEG(type)); 3522 curseg = CURSEG_I(sbi, type); 3523 3524 mutex_lock(&curseg->curseg_mutex); 3525 down_write(&sit_i->sentry_lock); 3526 3527 old_cursegno = curseg->segno; 3528 old_blkoff = curseg->next_blkoff; 3529 old_alloc_type = curseg->alloc_type; 3530 3531 /* change the current segment */ 3532 if (segno != curseg->segno) { 3533 curseg->next_segno = segno; 3534 change_curseg(sbi, type, true); 3535 } 3536 3537 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); 3538 __add_sum_entry(sbi, type, sum); 3539 3540 if (!recover_curseg || recover_newaddr) { 3541 if (!from_gc) 3542 update_segment_mtime(sbi, new_blkaddr, 0); 3543 update_sit_entry(sbi, new_blkaddr, 1); 3544 } 3545 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { 3546 invalidate_mapping_pages(META_MAPPING(sbi), 3547 old_blkaddr, old_blkaddr); 3548 f2fs_invalidate_compress_page(sbi, old_blkaddr); 3549 if (!from_gc) 3550 update_segment_mtime(sbi, old_blkaddr, 0); 3551 update_sit_entry(sbi, old_blkaddr, -1); 3552 } 3553 3554 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 3555 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); 3556 3557 locate_dirty_segment(sbi, old_cursegno); 3558 3559 if (recover_curseg) { 3560 if (old_cursegno != curseg->segno) { 3561 curseg->next_segno = old_cursegno; 3562 change_curseg(sbi, type, true); 3563 } 3564 curseg->next_blkoff = old_blkoff; 3565 curseg->alloc_type = old_alloc_type; 3566 } 3567 3568 up_write(&sit_i->sentry_lock); 3569 mutex_unlock(&curseg->curseg_mutex); 3570 f2fs_up_write(&SM_I(sbi)->curseg_lock); 3571 } 3572 3573 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, 3574 block_t old_addr, block_t new_addr, 3575 unsigned char version, bool recover_curseg, 3576 bool recover_newaddr) 3577 { 3578 struct f2fs_summary sum; 3579 3580 set_summary(&sum, dn->nid, dn->ofs_in_node, version); 3581 3582 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, 3583 recover_curseg, recover_newaddr, false); 3584 3585 f2fs_update_data_blkaddr(dn, new_addr); 3586 } 3587 3588 void f2fs_wait_on_page_writeback(struct page *page, 3589 enum page_type type, bool ordered, bool locked) 3590 { 3591 if (PageWriteback(page)) { 3592 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 3593 3594 /* submit cached LFS IO */ 3595 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); 3596 /* sbumit cached IPU IO */ 3597 f2fs_submit_merged_ipu_write(sbi, NULL, page); 3598 if (ordered) { 3599 wait_on_page_writeback(page); 3600 f2fs_bug_on(sbi, locked && PageWriteback(page)); 3601 } else { 3602 wait_for_stable_page(page); 3603 } 3604 } 3605 } 3606 3607 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) 3608 { 3609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3610 struct page *cpage; 3611 3612 if (!f2fs_post_read_required(inode)) 3613 return; 3614 3615 if (!__is_valid_data_blkaddr(blkaddr)) 3616 return; 3617 3618 cpage = find_lock_page(META_MAPPING(sbi), blkaddr); 3619 if (cpage) { 3620 f2fs_wait_on_page_writeback(cpage, DATA, true, true); 3621 f2fs_put_page(cpage, 1); 3622 } 3623 } 3624 3625 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, 3626 block_t len) 3627 { 3628 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3629 block_t i; 3630 3631 if (!f2fs_post_read_required(inode)) 3632 return; 3633 3634 for (i = 0; i < len; i++) 3635 f2fs_wait_on_block_writeback(inode, blkaddr + i); 3636 3637 invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); 3638 } 3639 3640 static int read_compacted_summaries(struct f2fs_sb_info *sbi) 3641 { 3642 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3643 struct curseg_info *seg_i; 3644 unsigned char *kaddr; 3645 struct page *page; 3646 block_t start; 3647 int i, j, offset; 3648 3649 start = start_sum_block(sbi); 3650 3651 page = f2fs_get_meta_page(sbi, start++); 3652 if (IS_ERR(page)) 3653 return PTR_ERR(page); 3654 kaddr = (unsigned char *)page_address(page); 3655 3656 /* Step 1: restore nat cache */ 3657 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 3658 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE); 3659 3660 /* Step 2: restore sit cache */ 3661 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 3662 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE); 3663 offset = 2 * SUM_JOURNAL_SIZE; 3664 3665 /* Step 3: restore summary entries */ 3666 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 3667 unsigned short blk_off; 3668 unsigned int segno; 3669 3670 seg_i = CURSEG_I(sbi, i); 3671 segno = le32_to_cpu(ckpt->cur_data_segno[i]); 3672 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); 3673 seg_i->next_segno = segno; 3674 reset_curseg(sbi, i, 0); 3675 seg_i->alloc_type = ckpt->alloc_type[i]; 3676 seg_i->next_blkoff = blk_off; 3677 3678 if (seg_i->alloc_type == SSR) 3679 blk_off = sbi->blocks_per_seg; 3680 3681 for (j = 0; j < blk_off; j++) { 3682 struct f2fs_summary *s; 3683 3684 s = (struct f2fs_summary *)(kaddr + offset); 3685 seg_i->sum_blk->entries[j] = *s; 3686 offset += SUMMARY_SIZE; 3687 if (offset + SUMMARY_SIZE <= PAGE_SIZE - 3688 SUM_FOOTER_SIZE) 3689 continue; 3690 3691 f2fs_put_page(page, 1); 3692 page = NULL; 3693 3694 page = f2fs_get_meta_page(sbi, start++); 3695 if (IS_ERR(page)) 3696 return PTR_ERR(page); 3697 kaddr = (unsigned char *)page_address(page); 3698 offset = 0; 3699 } 3700 } 3701 f2fs_put_page(page, 1); 3702 return 0; 3703 } 3704 3705 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) 3706 { 3707 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3708 struct f2fs_summary_block *sum; 3709 struct curseg_info *curseg; 3710 struct page *new; 3711 unsigned short blk_off; 3712 unsigned int segno = 0; 3713 block_t blk_addr = 0; 3714 int err = 0; 3715 3716 /* get segment number and block addr */ 3717 if (IS_DATASEG(type)) { 3718 segno = le32_to_cpu(ckpt->cur_data_segno[type]); 3719 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - 3720 CURSEG_HOT_DATA]); 3721 if (__exist_node_summaries(sbi)) 3722 blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type); 3723 else 3724 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); 3725 } else { 3726 segno = le32_to_cpu(ckpt->cur_node_segno[type - 3727 CURSEG_HOT_NODE]); 3728 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - 3729 CURSEG_HOT_NODE]); 3730 if (__exist_node_summaries(sbi)) 3731 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, 3732 type - CURSEG_HOT_NODE); 3733 else 3734 blk_addr = GET_SUM_BLOCK(sbi, segno); 3735 } 3736 3737 new = f2fs_get_meta_page(sbi, blk_addr); 3738 if (IS_ERR(new)) 3739 return PTR_ERR(new); 3740 sum = (struct f2fs_summary_block *)page_address(new); 3741 3742 if (IS_NODESEG(type)) { 3743 if (__exist_node_summaries(sbi)) { 3744 struct f2fs_summary *ns = &sum->entries[0]; 3745 int i; 3746 3747 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { 3748 ns->version = 0; 3749 ns->ofs_in_node = 0; 3750 } 3751 } else { 3752 err = f2fs_restore_node_summary(sbi, segno, sum); 3753 if (err) 3754 goto out; 3755 } 3756 } 3757 3758 /* set uncompleted segment to curseg */ 3759 curseg = CURSEG_I(sbi, type); 3760 mutex_lock(&curseg->curseg_mutex); 3761 3762 /* update journal info */ 3763 down_write(&curseg->journal_rwsem); 3764 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE); 3765 up_write(&curseg->journal_rwsem); 3766 3767 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE); 3768 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE); 3769 curseg->next_segno = segno; 3770 reset_curseg(sbi, type, 0); 3771 curseg->alloc_type = ckpt->alloc_type[type]; 3772 curseg->next_blkoff = blk_off; 3773 mutex_unlock(&curseg->curseg_mutex); 3774 out: 3775 f2fs_put_page(new, 1); 3776 return err; 3777 } 3778 3779 static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 3780 { 3781 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; 3782 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; 3783 int type = CURSEG_HOT_DATA; 3784 int err; 3785 3786 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { 3787 int npages = f2fs_npages_for_summary_flush(sbi, true); 3788 3789 if (npages >= 2) 3790 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, 3791 META_CP, true); 3792 3793 /* restore for compacted data summary */ 3794 err = read_compacted_summaries(sbi); 3795 if (err) 3796 return err; 3797 type = CURSEG_HOT_NODE; 3798 } 3799 3800 if (__exist_node_summaries(sbi)) 3801 f2fs_ra_meta_pages(sbi, 3802 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type), 3803 NR_CURSEG_PERSIST_TYPE - type, META_CP, true); 3804 3805 for (; type <= CURSEG_COLD_NODE; type++) { 3806 err = read_normal_summaries(sbi, type); 3807 if (err) 3808 return err; 3809 } 3810 3811 /* sanity check for summary blocks */ 3812 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || 3813 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { 3814 f2fs_err(sbi, "invalid journal entries nats %u sits %u", 3815 nats_in_cursum(nat_j), sits_in_cursum(sit_j)); 3816 return -EINVAL; 3817 } 3818 3819 return 0; 3820 } 3821 3822 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) 3823 { 3824 struct page *page; 3825 unsigned char *kaddr; 3826 struct f2fs_summary *summary; 3827 struct curseg_info *seg_i; 3828 int written_size = 0; 3829 int i, j; 3830 3831 page = f2fs_grab_meta_page(sbi, blkaddr++); 3832 kaddr = (unsigned char *)page_address(page); 3833 memset(kaddr, 0, PAGE_SIZE); 3834 3835 /* Step 1: write nat cache */ 3836 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 3837 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE); 3838 written_size += SUM_JOURNAL_SIZE; 3839 3840 /* Step 2: write sit cache */ 3841 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 3842 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE); 3843 written_size += SUM_JOURNAL_SIZE; 3844 3845 /* Step 3: write summary entries */ 3846 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 3847 unsigned short blkoff; 3848 3849 seg_i = CURSEG_I(sbi, i); 3850 if (sbi->ckpt->alloc_type[i] == SSR) 3851 blkoff = sbi->blocks_per_seg; 3852 else 3853 blkoff = curseg_blkoff(sbi, i); 3854 3855 for (j = 0; j < blkoff; j++) { 3856 if (!page) { 3857 page = f2fs_grab_meta_page(sbi, blkaddr++); 3858 kaddr = (unsigned char *)page_address(page); 3859 memset(kaddr, 0, PAGE_SIZE); 3860 written_size = 0; 3861 } 3862 summary = (struct f2fs_summary *)(kaddr + written_size); 3863 *summary = seg_i->sum_blk->entries[j]; 3864 written_size += SUMMARY_SIZE; 3865 3866 if (written_size + SUMMARY_SIZE <= PAGE_SIZE - 3867 SUM_FOOTER_SIZE) 3868 continue; 3869 3870 set_page_dirty(page); 3871 f2fs_put_page(page, 1); 3872 page = NULL; 3873 } 3874 } 3875 if (page) { 3876 set_page_dirty(page); 3877 f2fs_put_page(page, 1); 3878 } 3879 } 3880 3881 static void write_normal_summaries(struct f2fs_sb_info *sbi, 3882 block_t blkaddr, int type) 3883 { 3884 int i, end; 3885 3886 if (IS_DATASEG(type)) 3887 end = type + NR_CURSEG_DATA_TYPE; 3888 else 3889 end = type + NR_CURSEG_NODE_TYPE; 3890 3891 for (i = type; i < end; i++) 3892 write_current_sum_page(sbi, i, blkaddr + (i - type)); 3893 } 3894 3895 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 3896 { 3897 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) 3898 write_compacted_summaries(sbi, start_blk); 3899 else 3900 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); 3901 } 3902 3903 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 3904 { 3905 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); 3906 } 3907 3908 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, 3909 unsigned int val, int alloc) 3910 { 3911 int i; 3912 3913 if (type == NAT_JOURNAL) { 3914 for (i = 0; i < nats_in_cursum(journal); i++) { 3915 if (le32_to_cpu(nid_in_journal(journal, i)) == val) 3916 return i; 3917 } 3918 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL)) 3919 return update_nats_in_cursum(journal, 1); 3920 } else if (type == SIT_JOURNAL) { 3921 for (i = 0; i < sits_in_cursum(journal); i++) 3922 if (le32_to_cpu(segno_in_journal(journal, i)) == val) 3923 return i; 3924 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL)) 3925 return update_sits_in_cursum(journal, 1); 3926 } 3927 return -1; 3928 } 3929 3930 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, 3931 unsigned int segno) 3932 { 3933 return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); 3934 } 3935 3936 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, 3937 unsigned int start) 3938 { 3939 struct sit_info *sit_i = SIT_I(sbi); 3940 struct page *page; 3941 pgoff_t src_off, dst_off; 3942 3943 src_off = current_sit_addr(sbi, start); 3944 dst_off = next_sit_addr(sbi, src_off); 3945 3946 page = f2fs_grab_meta_page(sbi, dst_off); 3947 seg_info_to_sit_page(sbi, page, start); 3948 3949 set_page_dirty(page); 3950 set_to_next_sit(sit_i, start); 3951 3952 return page; 3953 } 3954 3955 static struct sit_entry_set *grab_sit_entry_set(void) 3956 { 3957 struct sit_entry_set *ses = 3958 f2fs_kmem_cache_alloc(sit_entry_set_slab, 3959 GFP_NOFS, true, NULL); 3960 3961 ses->entry_cnt = 0; 3962 INIT_LIST_HEAD(&ses->set_list); 3963 return ses; 3964 } 3965 3966 static void release_sit_entry_set(struct sit_entry_set *ses) 3967 { 3968 list_del(&ses->set_list); 3969 kmem_cache_free(sit_entry_set_slab, ses); 3970 } 3971 3972 static void adjust_sit_entry_set(struct sit_entry_set *ses, 3973 struct list_head *head) 3974 { 3975 struct sit_entry_set *next = ses; 3976 3977 if (list_is_last(&ses->set_list, head)) 3978 return; 3979 3980 list_for_each_entry_continue(next, head, set_list) 3981 if (ses->entry_cnt <= next->entry_cnt) { 3982 list_move_tail(&ses->set_list, &next->set_list); 3983 return; 3984 } 3985 3986 list_move_tail(&ses->set_list, head); 3987 } 3988 3989 static void add_sit_entry(unsigned int segno, struct list_head *head) 3990 { 3991 struct sit_entry_set *ses; 3992 unsigned int start_segno = START_SEGNO(segno); 3993 3994 list_for_each_entry(ses, head, set_list) { 3995 if (ses->start_segno == start_segno) { 3996 ses->entry_cnt++; 3997 adjust_sit_entry_set(ses, head); 3998 return; 3999 } 4000 } 4001 4002 ses = grab_sit_entry_set(); 4003 4004 ses->start_segno = start_segno; 4005 ses->entry_cnt++; 4006 list_add(&ses->set_list, head); 4007 } 4008 4009 static void add_sits_in_set(struct f2fs_sb_info *sbi) 4010 { 4011 struct f2fs_sm_info *sm_info = SM_I(sbi); 4012 struct list_head *set_list = &sm_info->sit_entry_set; 4013 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; 4014 unsigned int segno; 4015 4016 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) 4017 add_sit_entry(segno, set_list); 4018 } 4019 4020 static void remove_sits_in_journal(struct f2fs_sb_info *sbi) 4021 { 4022 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4023 struct f2fs_journal *journal = curseg->journal; 4024 int i; 4025 4026 down_write(&curseg->journal_rwsem); 4027 for (i = 0; i < sits_in_cursum(journal); i++) { 4028 unsigned int segno; 4029 bool dirtied; 4030 4031 segno = le32_to_cpu(segno_in_journal(journal, i)); 4032 dirtied = __mark_sit_entry_dirty(sbi, segno); 4033 4034 if (!dirtied) 4035 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); 4036 } 4037 update_sits_in_cursum(journal, -i); 4038 up_write(&curseg->journal_rwsem); 4039 } 4040 4041 /* 4042 * CP calls this function, which flushes SIT entries including sit_journal, 4043 * and moves prefree segs to free segs. 4044 */ 4045 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 4046 { 4047 struct sit_info *sit_i = SIT_I(sbi); 4048 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 4049 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4050 struct f2fs_journal *journal = curseg->journal; 4051 struct sit_entry_set *ses, *tmp; 4052 struct list_head *head = &SM_I(sbi)->sit_entry_set; 4053 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); 4054 struct seg_entry *se; 4055 4056 down_write(&sit_i->sentry_lock); 4057 4058 if (!sit_i->dirty_sentries) 4059 goto out; 4060 4061 /* 4062 * add and account sit entries of dirty bitmap in sit entry 4063 * set temporarily 4064 */ 4065 add_sits_in_set(sbi); 4066 4067 /* 4068 * if there are no enough space in journal to store dirty sit 4069 * entries, remove all entries from journal and add and account 4070 * them in sit entry set. 4071 */ 4072 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || 4073 !to_journal) 4074 remove_sits_in_journal(sbi); 4075 4076 /* 4077 * there are two steps to flush sit entries: 4078 * #1, flush sit entries to journal in current cold data summary block. 4079 * #2, flush sit entries to sit page. 4080 */ 4081 list_for_each_entry_safe(ses, tmp, head, set_list) { 4082 struct page *page = NULL; 4083 struct f2fs_sit_block *raw_sit = NULL; 4084 unsigned int start_segno = ses->start_segno; 4085 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, 4086 (unsigned long)MAIN_SEGS(sbi)); 4087 unsigned int segno = start_segno; 4088 4089 if (to_journal && 4090 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL)) 4091 to_journal = false; 4092 4093 if (to_journal) { 4094 down_write(&curseg->journal_rwsem); 4095 } else { 4096 page = get_next_sit_page(sbi, start_segno); 4097 raw_sit = page_address(page); 4098 } 4099 4100 /* flush dirty sit entries in region of current sit set */ 4101 for_each_set_bit_from(segno, bitmap, end) { 4102 int offset, sit_offset; 4103 4104 se = get_seg_entry(sbi, segno); 4105 #ifdef CONFIG_F2FS_CHECK_FS 4106 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, 4107 SIT_VBLOCK_MAP_SIZE)) 4108 f2fs_bug_on(sbi, 1); 4109 #endif 4110 4111 /* add discard candidates */ 4112 if (!(cpc->reason & CP_DISCARD)) { 4113 cpc->trim_start = segno; 4114 add_discard_addrs(sbi, cpc, false); 4115 } 4116 4117 if (to_journal) { 4118 offset = f2fs_lookup_journal_in_cursum(journal, 4119 SIT_JOURNAL, segno, 1); 4120 f2fs_bug_on(sbi, offset < 0); 4121 segno_in_journal(journal, offset) = 4122 cpu_to_le32(segno); 4123 seg_info_to_raw_sit(se, 4124 &sit_in_journal(journal, offset)); 4125 check_block_count(sbi, segno, 4126 &sit_in_journal(journal, offset)); 4127 } else { 4128 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 4129 seg_info_to_raw_sit(se, 4130 &raw_sit->entries[sit_offset]); 4131 check_block_count(sbi, segno, 4132 &raw_sit->entries[sit_offset]); 4133 } 4134 4135 __clear_bit(segno, bitmap); 4136 sit_i->dirty_sentries--; 4137 ses->entry_cnt--; 4138 } 4139 4140 if (to_journal) 4141 up_write(&curseg->journal_rwsem); 4142 else 4143 f2fs_put_page(page, 1); 4144 4145 f2fs_bug_on(sbi, ses->entry_cnt); 4146 release_sit_entry_set(ses); 4147 } 4148 4149 f2fs_bug_on(sbi, !list_empty(head)); 4150 f2fs_bug_on(sbi, sit_i->dirty_sentries); 4151 out: 4152 if (cpc->reason & CP_DISCARD) { 4153 __u64 trim_start = cpc->trim_start; 4154 4155 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 4156 add_discard_addrs(sbi, cpc, false); 4157 4158 cpc->trim_start = trim_start; 4159 } 4160 up_write(&sit_i->sentry_lock); 4161 4162 set_prefree_as_free_segments(sbi); 4163 } 4164 4165 static int build_sit_info(struct f2fs_sb_info *sbi) 4166 { 4167 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 4168 struct sit_info *sit_i; 4169 unsigned int sit_segs, start; 4170 char *src_bitmap, *bitmap; 4171 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size; 4172 unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0; 4173 4174 /* allocate memory for SIT information */ 4175 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); 4176 if (!sit_i) 4177 return -ENOMEM; 4178 4179 SM_I(sbi)->sit_info = sit_i; 4180 4181 sit_i->sentries = 4182 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), 4183 MAIN_SEGS(sbi)), 4184 GFP_KERNEL); 4185 if (!sit_i->sentries) 4186 return -ENOMEM; 4187 4188 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4189 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size, 4190 GFP_KERNEL); 4191 if (!sit_i->dirty_sentries_bitmap) 4192 return -ENOMEM; 4193 4194 #ifdef CONFIG_F2FS_CHECK_FS 4195 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map); 4196 #else 4197 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map); 4198 #endif 4199 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4200 if (!sit_i->bitmap) 4201 return -ENOMEM; 4202 4203 bitmap = sit_i->bitmap; 4204 4205 for (start = 0; start < MAIN_SEGS(sbi); start++) { 4206 sit_i->sentries[start].cur_valid_map = bitmap; 4207 bitmap += SIT_VBLOCK_MAP_SIZE; 4208 4209 sit_i->sentries[start].ckpt_valid_map = bitmap; 4210 bitmap += SIT_VBLOCK_MAP_SIZE; 4211 4212 #ifdef CONFIG_F2FS_CHECK_FS 4213 sit_i->sentries[start].cur_valid_map_mir = bitmap; 4214 bitmap += SIT_VBLOCK_MAP_SIZE; 4215 #endif 4216 4217 if (discard_map) { 4218 sit_i->sentries[start].discard_map = bitmap; 4219 bitmap += SIT_VBLOCK_MAP_SIZE; 4220 } 4221 } 4222 4223 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 4224 if (!sit_i->tmp_map) 4225 return -ENOMEM; 4226 4227 if (__is_large_section(sbi)) { 4228 sit_i->sec_entries = 4229 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), 4230 MAIN_SECS(sbi)), 4231 GFP_KERNEL); 4232 if (!sit_i->sec_entries) 4233 return -ENOMEM; 4234 } 4235 4236 /* get information related with SIT */ 4237 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; 4238 4239 /* setup SIT bitmap from ckeckpoint pack */ 4240 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 4241 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 4242 4243 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); 4244 if (!sit_i->sit_bitmap) 4245 return -ENOMEM; 4246 4247 #ifdef CONFIG_F2FS_CHECK_FS 4248 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, 4249 sit_bitmap_size, GFP_KERNEL); 4250 if (!sit_i->sit_bitmap_mir) 4251 return -ENOMEM; 4252 4253 sit_i->invalid_segmap = f2fs_kvzalloc(sbi, 4254 main_bitmap_size, GFP_KERNEL); 4255 if (!sit_i->invalid_segmap) 4256 return -ENOMEM; 4257 #endif 4258 4259 /* init SIT information */ 4260 sit_i->s_ops = &default_salloc_ops; 4261 4262 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 4263 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 4264 sit_i->written_valid_blocks = 0; 4265 sit_i->bitmap_size = sit_bitmap_size; 4266 sit_i->dirty_sentries = 0; 4267 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 4268 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); 4269 sit_i->mounted_time = ktime_get_boottime_seconds(); 4270 init_rwsem(&sit_i->sentry_lock); 4271 return 0; 4272 } 4273 4274 static int build_free_segmap(struct f2fs_sb_info *sbi) 4275 { 4276 struct free_segmap_info *free_i; 4277 unsigned int bitmap_size, sec_bitmap_size; 4278 4279 /* allocate memory for free segmap information */ 4280 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); 4281 if (!free_i) 4282 return -ENOMEM; 4283 4284 SM_I(sbi)->free_info = free_i; 4285 4286 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4287 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); 4288 if (!free_i->free_segmap) 4289 return -ENOMEM; 4290 4291 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4292 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); 4293 if (!free_i->free_secmap) 4294 return -ENOMEM; 4295 4296 /* set all segments as dirty temporarily */ 4297 memset(free_i->free_segmap, 0xff, bitmap_size); 4298 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 4299 4300 /* init free segmap information */ 4301 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 4302 free_i->free_segments = 0; 4303 free_i->free_sections = 0; 4304 spin_lock_init(&free_i->segmap_lock); 4305 return 0; 4306 } 4307 4308 static int build_curseg(struct f2fs_sb_info *sbi) 4309 { 4310 struct curseg_info *array; 4311 int i; 4312 4313 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, 4314 sizeof(*array)), GFP_KERNEL); 4315 if (!array) 4316 return -ENOMEM; 4317 4318 SM_I(sbi)->curseg_array = array; 4319 4320 for (i = 0; i < NO_CHECK_TYPE; i++) { 4321 mutex_init(&array[i].curseg_mutex); 4322 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); 4323 if (!array[i].sum_blk) 4324 return -ENOMEM; 4325 init_rwsem(&array[i].journal_rwsem); 4326 array[i].journal = f2fs_kzalloc(sbi, 4327 sizeof(struct f2fs_journal), GFP_KERNEL); 4328 if (!array[i].journal) 4329 return -ENOMEM; 4330 if (i < NR_PERSISTENT_LOG) 4331 array[i].seg_type = CURSEG_HOT_DATA + i; 4332 else if (i == CURSEG_COLD_DATA_PINNED) 4333 array[i].seg_type = CURSEG_COLD_DATA; 4334 else if (i == CURSEG_ALL_DATA_ATGC) 4335 array[i].seg_type = CURSEG_COLD_DATA; 4336 array[i].segno = NULL_SEGNO; 4337 array[i].next_blkoff = 0; 4338 array[i].inited = false; 4339 } 4340 return restore_curseg_summaries(sbi); 4341 } 4342 4343 static int build_sit_entries(struct f2fs_sb_info *sbi) 4344 { 4345 struct sit_info *sit_i = SIT_I(sbi); 4346 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4347 struct f2fs_journal *journal = curseg->journal; 4348 struct seg_entry *se; 4349 struct f2fs_sit_entry sit; 4350 int sit_blk_cnt = SIT_BLK_CNT(sbi); 4351 unsigned int i, start, end; 4352 unsigned int readed, start_blk = 0; 4353 int err = 0; 4354 block_t sit_valid_blocks[2] = {0, 0}; 4355 4356 do { 4357 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, 4358 META_SIT, true); 4359 4360 start = start_blk * sit_i->sents_per_block; 4361 end = (start_blk + readed) * sit_i->sents_per_block; 4362 4363 for (; start < end && start < MAIN_SEGS(sbi); start++) { 4364 struct f2fs_sit_block *sit_blk; 4365 struct page *page; 4366 4367 se = &sit_i->sentries[start]; 4368 page = get_current_sit_page(sbi, start); 4369 if (IS_ERR(page)) 4370 return PTR_ERR(page); 4371 sit_blk = (struct f2fs_sit_block *)page_address(page); 4372 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 4373 f2fs_put_page(page, 1); 4374 4375 err = check_block_count(sbi, start, &sit); 4376 if (err) 4377 return err; 4378 seg_info_from_raw_sit(se, &sit); 4379 4380 if (se->type >= NR_PERSISTENT_LOG) { 4381 f2fs_err(sbi, "Invalid segment type: %u, segno: %u", 4382 se->type, start); 4383 f2fs_handle_error(sbi, 4384 ERROR_INCONSISTENT_SUM_TYPE); 4385 return -EFSCORRUPTED; 4386 } 4387 4388 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; 4389 4390 if (f2fs_block_unit_discard(sbi)) { 4391 /* build discard map only one time */ 4392 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 4393 memset(se->discard_map, 0xff, 4394 SIT_VBLOCK_MAP_SIZE); 4395 } else { 4396 memcpy(se->discard_map, 4397 se->cur_valid_map, 4398 SIT_VBLOCK_MAP_SIZE); 4399 sbi->discard_blks += 4400 sbi->blocks_per_seg - 4401 se->valid_blocks; 4402 } 4403 } 4404 4405 if (__is_large_section(sbi)) 4406 get_sec_entry(sbi, start)->valid_blocks += 4407 se->valid_blocks; 4408 } 4409 start_blk += readed; 4410 } while (start_blk < sit_blk_cnt); 4411 4412 down_read(&curseg->journal_rwsem); 4413 for (i = 0; i < sits_in_cursum(journal); i++) { 4414 unsigned int old_valid_blocks; 4415 4416 start = le32_to_cpu(segno_in_journal(journal, i)); 4417 if (start >= MAIN_SEGS(sbi)) { 4418 f2fs_err(sbi, "Wrong journal entry on segno %u", 4419 start); 4420 err = -EFSCORRUPTED; 4421 f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL); 4422 break; 4423 } 4424 4425 se = &sit_i->sentries[start]; 4426 sit = sit_in_journal(journal, i); 4427 4428 old_valid_blocks = se->valid_blocks; 4429 4430 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks; 4431 4432 err = check_block_count(sbi, start, &sit); 4433 if (err) 4434 break; 4435 seg_info_from_raw_sit(se, &sit); 4436 4437 if (se->type >= NR_PERSISTENT_LOG) { 4438 f2fs_err(sbi, "Invalid segment type: %u, segno: %u", 4439 se->type, start); 4440 err = -EFSCORRUPTED; 4441 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); 4442 break; 4443 } 4444 4445 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; 4446 4447 if (f2fs_block_unit_discard(sbi)) { 4448 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 4449 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); 4450 } else { 4451 memcpy(se->discard_map, se->cur_valid_map, 4452 SIT_VBLOCK_MAP_SIZE); 4453 sbi->discard_blks += old_valid_blocks; 4454 sbi->discard_blks -= se->valid_blocks; 4455 } 4456 } 4457 4458 if (__is_large_section(sbi)) { 4459 get_sec_entry(sbi, start)->valid_blocks += 4460 se->valid_blocks; 4461 get_sec_entry(sbi, start)->valid_blocks -= 4462 old_valid_blocks; 4463 } 4464 } 4465 up_read(&curseg->journal_rwsem); 4466 4467 if (err) 4468 return err; 4469 4470 if (sit_valid_blocks[NODE] != valid_node_count(sbi)) { 4471 f2fs_err(sbi, "SIT is corrupted node# %u vs %u", 4472 sit_valid_blocks[NODE], valid_node_count(sbi)); 4473 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT); 4474 return -EFSCORRUPTED; 4475 } 4476 4477 if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] > 4478 valid_user_blocks(sbi)) { 4479 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u", 4480 sit_valid_blocks[DATA], sit_valid_blocks[NODE], 4481 valid_user_blocks(sbi)); 4482 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT); 4483 return -EFSCORRUPTED; 4484 } 4485 4486 return 0; 4487 } 4488 4489 static void init_free_segmap(struct f2fs_sb_info *sbi) 4490 { 4491 unsigned int start; 4492 int type; 4493 struct seg_entry *sentry; 4494 4495 for (start = 0; start < MAIN_SEGS(sbi); start++) { 4496 if (f2fs_usable_blks_in_seg(sbi, start) == 0) 4497 continue; 4498 sentry = get_seg_entry(sbi, start); 4499 if (!sentry->valid_blocks) 4500 __set_free(sbi, start); 4501 else 4502 SIT_I(sbi)->written_valid_blocks += 4503 sentry->valid_blocks; 4504 } 4505 4506 /* set use the current segments */ 4507 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { 4508 struct curseg_info *curseg_t = CURSEG_I(sbi, type); 4509 4510 __set_test_and_inuse(sbi, curseg_t->segno); 4511 } 4512 } 4513 4514 static void init_dirty_segmap(struct f2fs_sb_info *sbi) 4515 { 4516 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 4517 struct free_segmap_info *free_i = FREE_I(sbi); 4518 unsigned int segno = 0, offset = 0, secno; 4519 block_t valid_blocks, usable_blks_in_seg; 4520 4521 while (1) { 4522 /* find dirty segment based on free segmap */ 4523 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); 4524 if (segno >= MAIN_SEGS(sbi)) 4525 break; 4526 offset = segno + 1; 4527 valid_blocks = get_valid_blocks(sbi, segno, false); 4528 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno); 4529 if (valid_blocks == usable_blks_in_seg || !valid_blocks) 4530 continue; 4531 if (valid_blocks > usable_blks_in_seg) { 4532 f2fs_bug_on(sbi, 1); 4533 continue; 4534 } 4535 mutex_lock(&dirty_i->seglist_lock); 4536 __locate_dirty_segment(sbi, segno, DIRTY); 4537 mutex_unlock(&dirty_i->seglist_lock); 4538 } 4539 4540 if (!__is_large_section(sbi)) 4541 return; 4542 4543 mutex_lock(&dirty_i->seglist_lock); 4544 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 4545 valid_blocks = get_valid_blocks(sbi, segno, true); 4546 secno = GET_SEC_FROM_SEG(sbi, segno); 4547 4548 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) 4549 continue; 4550 if (IS_CURSEC(sbi, secno)) 4551 continue; 4552 set_bit(secno, dirty_i->dirty_secmap); 4553 } 4554 mutex_unlock(&dirty_i->seglist_lock); 4555 } 4556 4557 static int init_victim_secmap(struct f2fs_sb_info *sbi) 4558 { 4559 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 4560 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4561 4562 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4563 if (!dirty_i->victim_secmap) 4564 return -ENOMEM; 4565 4566 dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4567 if (!dirty_i->pinned_secmap) 4568 return -ENOMEM; 4569 4570 dirty_i->pinned_secmap_cnt = 0; 4571 dirty_i->enable_pin_section = true; 4572 return 0; 4573 } 4574 4575 static int build_dirty_segmap(struct f2fs_sb_info *sbi) 4576 { 4577 struct dirty_seglist_info *dirty_i; 4578 unsigned int bitmap_size, i; 4579 4580 /* allocate memory for dirty segments list information */ 4581 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), 4582 GFP_KERNEL); 4583 if (!dirty_i) 4584 return -ENOMEM; 4585 4586 SM_I(sbi)->dirty_info = dirty_i; 4587 mutex_init(&dirty_i->seglist_lock); 4588 4589 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4590 4591 for (i = 0; i < NR_DIRTY_TYPE; i++) { 4592 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, 4593 GFP_KERNEL); 4594 if (!dirty_i->dirty_segmap[i]) 4595 return -ENOMEM; 4596 } 4597 4598 if (__is_large_section(sbi)) { 4599 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4600 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi, 4601 bitmap_size, GFP_KERNEL); 4602 if (!dirty_i->dirty_secmap) 4603 return -ENOMEM; 4604 } 4605 4606 init_dirty_segmap(sbi); 4607 return init_victim_secmap(sbi); 4608 } 4609 4610 static int sanity_check_curseg(struct f2fs_sb_info *sbi) 4611 { 4612 int i; 4613 4614 /* 4615 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; 4616 * In LFS curseg, all blkaddr after .next_blkoff should be unused. 4617 */ 4618 for (i = 0; i < NR_PERSISTENT_LOG; i++) { 4619 struct curseg_info *curseg = CURSEG_I(sbi, i); 4620 struct seg_entry *se = get_seg_entry(sbi, curseg->segno); 4621 unsigned int blkofs = curseg->next_blkoff; 4622 4623 if (f2fs_sb_has_readonly(sbi) && 4624 i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE) 4625 continue; 4626 4627 sanity_check_seg_type(sbi, curseg->seg_type); 4628 4629 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { 4630 f2fs_err(sbi, 4631 "Current segment has invalid alloc_type:%d", 4632 curseg->alloc_type); 4633 f2fs_handle_error(sbi, ERROR_INVALID_CURSEG); 4634 return -EFSCORRUPTED; 4635 } 4636 4637 if (f2fs_test_bit(blkofs, se->cur_valid_map)) 4638 goto out; 4639 4640 if (curseg->alloc_type == SSR) 4641 continue; 4642 4643 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { 4644 if (!f2fs_test_bit(blkofs, se->cur_valid_map)) 4645 continue; 4646 out: 4647 f2fs_err(sbi, 4648 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", 4649 i, curseg->segno, curseg->alloc_type, 4650 curseg->next_blkoff, blkofs); 4651 f2fs_handle_error(sbi, ERROR_INVALID_CURSEG); 4652 return -EFSCORRUPTED; 4653 } 4654 } 4655 return 0; 4656 } 4657 4658 #ifdef CONFIG_BLK_DEV_ZONED 4659 4660 static int check_zone_write_pointer(struct f2fs_sb_info *sbi, 4661 struct f2fs_dev_info *fdev, 4662 struct blk_zone *zone) 4663 { 4664 unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; 4665 block_t zone_block, wp_block, last_valid_block; 4666 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; 4667 int i, s, b, ret; 4668 struct seg_entry *se; 4669 4670 if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4671 return 0; 4672 4673 wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); 4674 wp_segno = GET_SEGNO(sbi, wp_block); 4675 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); 4676 zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); 4677 zone_segno = GET_SEGNO(sbi, zone_block); 4678 zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); 4679 4680 if (zone_segno >= MAIN_SEGS(sbi)) 4681 return 0; 4682 4683 /* 4684 * Skip check of zones cursegs point to, since 4685 * fix_curseg_write_pointer() checks them. 4686 */ 4687 for (i = 0; i < NO_CHECK_TYPE; i++) 4688 if (zone_secno == GET_SEC_FROM_SEG(sbi, 4689 CURSEG_I(sbi, i)->segno)) 4690 return 0; 4691 4692 /* 4693 * Get last valid block of the zone. 4694 */ 4695 last_valid_block = zone_block - 1; 4696 for (s = sbi->segs_per_sec - 1; s >= 0; s--) { 4697 segno = zone_segno + s; 4698 se = get_seg_entry(sbi, segno); 4699 for (b = sbi->blocks_per_seg - 1; b >= 0; b--) 4700 if (f2fs_test_bit(b, se->cur_valid_map)) { 4701 last_valid_block = START_BLOCK(sbi, segno) + b; 4702 break; 4703 } 4704 if (last_valid_block >= zone_block) 4705 break; 4706 } 4707 4708 /* 4709 * If last valid block is beyond the write pointer, report the 4710 * inconsistency. This inconsistency does not cause write error 4711 * because the zone will not be selected for write operation until 4712 * it get discarded. Just report it. 4713 */ 4714 if (last_valid_block >= wp_block) { 4715 f2fs_notice(sbi, "Valid block beyond write pointer: " 4716 "valid block[0x%x,0x%x] wp[0x%x,0x%x]", 4717 GET_SEGNO(sbi, last_valid_block), 4718 GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), 4719 wp_segno, wp_blkoff); 4720 return 0; 4721 } 4722 4723 /* 4724 * If there is no valid block in the zone and if write pointer is 4725 * not at zone start, reset the write pointer. 4726 */ 4727 if (last_valid_block + 1 == zone_block && zone->wp != zone->start) { 4728 f2fs_notice(sbi, 4729 "Zone without valid block has non-zero write " 4730 "pointer. Reset the write pointer: wp[0x%x,0x%x]", 4731 wp_segno, wp_blkoff); 4732 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, 4733 zone->len >> log_sectors_per_block); 4734 if (ret) { 4735 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", 4736 fdev->path, ret); 4737 return ret; 4738 } 4739 } 4740 4741 return 0; 4742 } 4743 4744 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, 4745 block_t zone_blkaddr) 4746 { 4747 int i; 4748 4749 for (i = 0; i < sbi->s_ndevs; i++) { 4750 if (!bdev_is_zoned(FDEV(i).bdev)) 4751 continue; 4752 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr && 4753 zone_blkaddr <= FDEV(i).end_blk)) 4754 return &FDEV(i); 4755 } 4756 4757 return NULL; 4758 } 4759 4760 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, 4761 void *data) 4762 { 4763 memcpy(data, zone, sizeof(struct blk_zone)); 4764 return 0; 4765 } 4766 4767 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) 4768 { 4769 struct curseg_info *cs = CURSEG_I(sbi, type); 4770 struct f2fs_dev_info *zbd; 4771 struct blk_zone zone; 4772 unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; 4773 block_t cs_zone_block, wp_block; 4774 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; 4775 sector_t zone_sector; 4776 int err; 4777 4778 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); 4779 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); 4780 4781 zbd = get_target_zoned_dev(sbi, cs_zone_block); 4782 if (!zbd) 4783 return 0; 4784 4785 /* report zone for the sector the curseg points to */ 4786 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) 4787 << log_sectors_per_block; 4788 err = blkdev_report_zones(zbd->bdev, zone_sector, 1, 4789 report_one_zone_cb, &zone); 4790 if (err != 1) { 4791 f2fs_err(sbi, "Report zone failed: %s errno=(%d)", 4792 zbd->path, err); 4793 return err; 4794 } 4795 4796 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4797 return 0; 4798 4799 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); 4800 wp_segno = GET_SEGNO(sbi, wp_block); 4801 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); 4802 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); 4803 4804 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && 4805 wp_sector_off == 0) 4806 return 0; 4807 4808 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " 4809 "curseg[0x%x,0x%x] wp[0x%x,0x%x]", 4810 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); 4811 4812 f2fs_notice(sbi, "Assign new section to curseg[%d]: " 4813 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); 4814 4815 f2fs_allocate_new_section(sbi, type, true); 4816 4817 /* check consistency of the zone curseg pointed to */ 4818 if (check_zone_write_pointer(sbi, zbd, &zone)) 4819 return -EIO; 4820 4821 /* check newly assigned zone */ 4822 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); 4823 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); 4824 4825 zbd = get_target_zoned_dev(sbi, cs_zone_block); 4826 if (!zbd) 4827 return 0; 4828 4829 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) 4830 << log_sectors_per_block; 4831 err = blkdev_report_zones(zbd->bdev, zone_sector, 1, 4832 report_one_zone_cb, &zone); 4833 if (err != 1) { 4834 f2fs_err(sbi, "Report zone failed: %s errno=(%d)", 4835 zbd->path, err); 4836 return err; 4837 } 4838 4839 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4840 return 0; 4841 4842 if (zone.wp != zone.start) { 4843 f2fs_notice(sbi, 4844 "New zone for curseg[%d] is not yet discarded. " 4845 "Reset the zone: curseg[0x%x,0x%x]", 4846 type, cs->segno, cs->next_blkoff); 4847 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, 4848 zone_sector >> log_sectors_per_block, 4849 zone.len >> log_sectors_per_block); 4850 if (err) { 4851 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", 4852 zbd->path, err); 4853 return err; 4854 } 4855 } 4856 4857 return 0; 4858 } 4859 4860 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) 4861 { 4862 int i, ret; 4863 4864 for (i = 0; i < NR_PERSISTENT_LOG; i++) { 4865 ret = fix_curseg_write_pointer(sbi, i); 4866 if (ret) 4867 return ret; 4868 } 4869 4870 return 0; 4871 } 4872 4873 struct check_zone_write_pointer_args { 4874 struct f2fs_sb_info *sbi; 4875 struct f2fs_dev_info *fdev; 4876 }; 4877 4878 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, 4879 void *data) 4880 { 4881 struct check_zone_write_pointer_args *args; 4882 4883 args = (struct check_zone_write_pointer_args *)data; 4884 4885 return check_zone_write_pointer(args->sbi, args->fdev, zone); 4886 } 4887 4888 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) 4889 { 4890 int i, ret; 4891 struct check_zone_write_pointer_args args; 4892 4893 for (i = 0; i < sbi->s_ndevs; i++) { 4894 if (!bdev_is_zoned(FDEV(i).bdev)) 4895 continue; 4896 4897 args.sbi = sbi; 4898 args.fdev = &FDEV(i); 4899 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES, 4900 check_zone_write_pointer_cb, &args); 4901 if (ret < 0) 4902 return ret; 4903 } 4904 4905 return 0; 4906 } 4907 4908 static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx, 4909 unsigned int dev_idx) 4910 { 4911 if (!bdev_is_zoned(FDEV(dev_idx).bdev)) 4912 return true; 4913 return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq); 4914 } 4915 4916 /* Return the zone index in the given device */ 4917 static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno, 4918 int dev_idx) 4919 { 4920 block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); 4921 4922 return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >> 4923 sbi->log_blocks_per_blkz; 4924 } 4925 4926 /* 4927 * Return the usable segments in a section based on the zone's 4928 * corresponding zone capacity. Zone is equal to a section. 4929 */ 4930 static inline unsigned int f2fs_usable_zone_segs_in_sec( 4931 struct f2fs_sb_info *sbi, unsigned int segno) 4932 { 4933 unsigned int dev_idx, zone_idx; 4934 4935 dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno)); 4936 zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx); 4937 4938 /* Conventional zone's capacity is always equal to zone size */ 4939 if (is_conv_zone(sbi, zone_idx, dev_idx)) 4940 return sbi->segs_per_sec; 4941 4942 if (!sbi->unusable_blocks_per_sec) 4943 return sbi->segs_per_sec; 4944 4945 /* Get the segment count beyond zone capacity block */ 4946 return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >> 4947 sbi->log_blocks_per_seg); 4948 } 4949 4950 /* 4951 * Return the number of usable blocks in a segment. The number of blocks 4952 * returned is always equal to the number of blocks in a segment for 4953 * segments fully contained within a sequential zone capacity or a 4954 * conventional zone. For segments partially contained in a sequential 4955 * zone capacity, the number of usable blocks up to the zone capacity 4956 * is returned. 0 is returned in all other cases. 4957 */ 4958 static inline unsigned int f2fs_usable_zone_blks_in_seg( 4959 struct f2fs_sb_info *sbi, unsigned int segno) 4960 { 4961 block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr; 4962 unsigned int zone_idx, dev_idx, secno; 4963 4964 secno = GET_SEC_FROM_SEG(sbi, segno); 4965 seg_start = START_BLOCK(sbi, segno); 4966 dev_idx = f2fs_target_device_index(sbi, seg_start); 4967 zone_idx = get_zone_idx(sbi, secno, dev_idx); 4968 4969 /* 4970 * Conventional zone's capacity is always equal to zone size, 4971 * so, blocks per segment is unchanged. 4972 */ 4973 if (is_conv_zone(sbi, zone_idx, dev_idx)) 4974 return sbi->blocks_per_seg; 4975 4976 if (!sbi->unusable_blocks_per_sec) 4977 return sbi->blocks_per_seg; 4978 4979 sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); 4980 sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi); 4981 4982 /* 4983 * If segment starts before zone capacity and spans beyond 4984 * zone capacity, then usable blocks are from seg start to 4985 * zone capacity. If the segment starts after the zone capacity, 4986 * then there are no usable blocks. 4987 */ 4988 if (seg_start >= sec_cap_blkaddr) 4989 return 0; 4990 if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr) 4991 return sec_cap_blkaddr - seg_start; 4992 4993 return sbi->blocks_per_seg; 4994 } 4995 #else 4996 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) 4997 { 4998 return 0; 4999 } 5000 5001 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) 5002 { 5003 return 0; 5004 } 5005 5006 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi, 5007 unsigned int segno) 5008 { 5009 return 0; 5010 } 5011 5012 static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi, 5013 unsigned int segno) 5014 { 5015 return 0; 5016 } 5017 #endif 5018 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, 5019 unsigned int segno) 5020 { 5021 if (f2fs_sb_has_blkzoned(sbi)) 5022 return f2fs_usable_zone_blks_in_seg(sbi, segno); 5023 5024 return sbi->blocks_per_seg; 5025 } 5026 5027 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, 5028 unsigned int segno) 5029 { 5030 if (f2fs_sb_has_blkzoned(sbi)) 5031 return f2fs_usable_zone_segs_in_sec(sbi, segno); 5032 5033 return sbi->segs_per_sec; 5034 } 5035 5036 /* 5037 * Update min, max modified time for cost-benefit GC algorithm 5038 */ 5039 static void init_min_max_mtime(struct f2fs_sb_info *sbi) 5040 { 5041 struct sit_info *sit_i = SIT_I(sbi); 5042 unsigned int segno; 5043 5044 down_write(&sit_i->sentry_lock); 5045 5046 sit_i->min_mtime = ULLONG_MAX; 5047 5048 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 5049 unsigned int i; 5050 unsigned long long mtime = 0; 5051 5052 for (i = 0; i < sbi->segs_per_sec; i++) 5053 mtime += get_seg_entry(sbi, segno + i)->mtime; 5054 5055 mtime = div_u64(mtime, sbi->segs_per_sec); 5056 5057 if (sit_i->min_mtime > mtime) 5058 sit_i->min_mtime = mtime; 5059 } 5060 sit_i->max_mtime = get_mtime(sbi, false); 5061 sit_i->dirty_max_mtime = 0; 5062 up_write(&sit_i->sentry_lock); 5063 } 5064 5065 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) 5066 { 5067 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 5068 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 5069 struct f2fs_sm_info *sm_info; 5070 int err; 5071 5072 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); 5073 if (!sm_info) 5074 return -ENOMEM; 5075 5076 /* init sm info */ 5077 sbi->sm_info = sm_info; 5078 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 5079 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); 5080 sm_info->segment_count = le32_to_cpu(raw_super->segment_count); 5081 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); 5082 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 5083 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 5084 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 5085 sm_info->rec_prefree_segments = sm_info->main_segments * 5086 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 5087 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) 5088 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; 5089 5090 if (!f2fs_lfs_mode(sbi)) 5091 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 5092 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 5093 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 5094 sm_info->min_seq_blocks = sbi->blocks_per_seg; 5095 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; 5096 sm_info->min_ssr_sections = reserved_sections(sbi); 5097 5098 INIT_LIST_HEAD(&sm_info->sit_entry_set); 5099 5100 init_f2fs_rwsem(&sm_info->curseg_lock); 5101 5102 if (!f2fs_readonly(sbi->sb)) { 5103 err = f2fs_create_flush_cmd_control(sbi); 5104 if (err) 5105 return err; 5106 } 5107 5108 err = create_discard_cmd_control(sbi); 5109 if (err) 5110 return err; 5111 5112 err = build_sit_info(sbi); 5113 if (err) 5114 return err; 5115 err = build_free_segmap(sbi); 5116 if (err) 5117 return err; 5118 err = build_curseg(sbi); 5119 if (err) 5120 return err; 5121 5122 /* reinit free segmap based on SIT */ 5123 err = build_sit_entries(sbi); 5124 if (err) 5125 return err; 5126 5127 init_free_segmap(sbi); 5128 err = build_dirty_segmap(sbi); 5129 if (err) 5130 return err; 5131 5132 err = sanity_check_curseg(sbi); 5133 if (err) 5134 return err; 5135 5136 init_min_max_mtime(sbi); 5137 return 0; 5138 } 5139 5140 static void discard_dirty_segmap(struct f2fs_sb_info *sbi, 5141 enum dirty_type dirty_type) 5142 { 5143 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5144 5145 mutex_lock(&dirty_i->seglist_lock); 5146 kvfree(dirty_i->dirty_segmap[dirty_type]); 5147 dirty_i->nr_dirty[dirty_type] = 0; 5148 mutex_unlock(&dirty_i->seglist_lock); 5149 } 5150 5151 static void destroy_victim_secmap(struct f2fs_sb_info *sbi) 5152 { 5153 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5154 5155 kvfree(dirty_i->pinned_secmap); 5156 kvfree(dirty_i->victim_secmap); 5157 } 5158 5159 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) 5160 { 5161 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5162 int i; 5163 5164 if (!dirty_i) 5165 return; 5166 5167 /* discard pre-free/dirty segments list */ 5168 for (i = 0; i < NR_DIRTY_TYPE; i++) 5169 discard_dirty_segmap(sbi, i); 5170 5171 if (__is_large_section(sbi)) { 5172 mutex_lock(&dirty_i->seglist_lock); 5173 kvfree(dirty_i->dirty_secmap); 5174 mutex_unlock(&dirty_i->seglist_lock); 5175 } 5176 5177 destroy_victim_secmap(sbi); 5178 SM_I(sbi)->dirty_info = NULL; 5179 kfree(dirty_i); 5180 } 5181 5182 static void destroy_curseg(struct f2fs_sb_info *sbi) 5183 { 5184 struct curseg_info *array = SM_I(sbi)->curseg_array; 5185 int i; 5186 5187 if (!array) 5188 return; 5189 SM_I(sbi)->curseg_array = NULL; 5190 for (i = 0; i < NR_CURSEG_TYPE; i++) { 5191 kfree(array[i].sum_blk); 5192 kfree(array[i].journal); 5193 } 5194 kfree(array); 5195 } 5196 5197 static void destroy_free_segmap(struct f2fs_sb_info *sbi) 5198 { 5199 struct free_segmap_info *free_i = SM_I(sbi)->free_info; 5200 5201 if (!free_i) 5202 return; 5203 SM_I(sbi)->free_info = NULL; 5204 kvfree(free_i->free_segmap); 5205 kvfree(free_i->free_secmap); 5206 kfree(free_i); 5207 } 5208 5209 static void destroy_sit_info(struct f2fs_sb_info *sbi) 5210 { 5211 struct sit_info *sit_i = SIT_I(sbi); 5212 5213 if (!sit_i) 5214 return; 5215 5216 if (sit_i->sentries) 5217 kvfree(sit_i->bitmap); 5218 kfree(sit_i->tmp_map); 5219 5220 kvfree(sit_i->sentries); 5221 kvfree(sit_i->sec_entries); 5222 kvfree(sit_i->dirty_sentries_bitmap); 5223 5224 SM_I(sbi)->sit_info = NULL; 5225 kvfree(sit_i->sit_bitmap); 5226 #ifdef CONFIG_F2FS_CHECK_FS 5227 kvfree(sit_i->sit_bitmap_mir); 5228 kvfree(sit_i->invalid_segmap); 5229 #endif 5230 kfree(sit_i); 5231 } 5232 5233 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) 5234 { 5235 struct f2fs_sm_info *sm_info = SM_I(sbi); 5236 5237 if (!sm_info) 5238 return; 5239 f2fs_destroy_flush_cmd_control(sbi, true); 5240 destroy_discard_cmd_control(sbi); 5241 destroy_dirty_segmap(sbi); 5242 destroy_curseg(sbi); 5243 destroy_free_segmap(sbi); 5244 destroy_sit_info(sbi); 5245 sbi->sm_info = NULL; 5246 kfree(sm_info); 5247 } 5248 5249 int __init f2fs_create_segment_manager_caches(void) 5250 { 5251 discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", 5252 sizeof(struct discard_entry)); 5253 if (!discard_entry_slab) 5254 goto fail; 5255 5256 discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", 5257 sizeof(struct discard_cmd)); 5258 if (!discard_cmd_slab) 5259 goto destroy_discard_entry; 5260 5261 sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", 5262 sizeof(struct sit_entry_set)); 5263 if (!sit_entry_set_slab) 5264 goto destroy_discard_cmd; 5265 5266 revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry", 5267 sizeof(struct revoke_entry)); 5268 if (!revoke_entry_slab) 5269 goto destroy_sit_entry_set; 5270 return 0; 5271 5272 destroy_sit_entry_set: 5273 kmem_cache_destroy(sit_entry_set_slab); 5274 destroy_discard_cmd: 5275 kmem_cache_destroy(discard_cmd_slab); 5276 destroy_discard_entry: 5277 kmem_cache_destroy(discard_entry_slab); 5278 fail: 5279 return -ENOMEM; 5280 } 5281 5282 void f2fs_destroy_segment_manager_caches(void) 5283 { 5284 kmem_cache_destroy(sit_entry_set_slab); 5285 kmem_cache_destroy(discard_cmd_slab); 5286 kmem_cache_destroy(discard_entry_slab); 5287 kmem_cache_destroy(revoke_entry_slab); 5288 } 5289