1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/segment.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/bio.h> 11 #include <linux/blkdev.h> 12 #include <linux/prefetch.h> 13 #include <linux/kthread.h> 14 #include <linux/swap.h> 15 #include <linux/timer.h> 16 #include <linux/freezer.h> 17 #include <linux/sched/signal.h> 18 19 #include "f2fs.h" 20 #include "segment.h" 21 #include "node.h" 22 #include "gc.h" 23 #include "iostat.h" 24 #include <trace/events/f2fs.h> 25 26 #define __reverse_ffz(x) __reverse_ffs(~(x)) 27 28 static struct kmem_cache *discard_entry_slab; 29 static struct kmem_cache *discard_cmd_slab; 30 static struct kmem_cache *sit_entry_set_slab; 31 static struct kmem_cache *inmem_entry_slab; 32 33 static unsigned long __reverse_ulong(unsigned char *str) 34 { 35 unsigned long tmp = 0; 36 int shift = 24, idx = 0; 37 38 #if BITS_PER_LONG == 64 39 shift = 56; 40 #endif 41 while (shift >= 0) { 42 tmp |= (unsigned long)str[idx++] << shift; 43 shift -= BITS_PER_BYTE; 44 } 45 return tmp; 46 } 47 48 /* 49 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 50 * MSB and LSB are reversed in a byte by f2fs_set_bit. 51 */ 52 static inline unsigned long __reverse_ffs(unsigned long word) 53 { 54 int num = 0; 55 56 #if BITS_PER_LONG == 64 57 if ((word & 0xffffffff00000000UL) == 0) 58 num += 32; 59 else 60 word >>= 32; 61 #endif 62 if ((word & 0xffff0000) == 0) 63 num += 16; 64 else 65 word >>= 16; 66 67 if ((word & 0xff00) == 0) 68 num += 8; 69 else 70 word >>= 8; 71 72 if ((word & 0xf0) == 0) 73 num += 4; 74 else 75 word >>= 4; 76 77 if ((word & 0xc) == 0) 78 num += 2; 79 else 80 word >>= 2; 81 82 if ((word & 0x2) == 0) 83 num += 1; 84 return num; 85 } 86 87 /* 88 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 89 * f2fs_set_bit makes MSB and LSB reversed in a byte. 90 * @size must be integral times of unsigned long. 91 * Example: 92 * MSB <--> LSB 93 * f2fs_set_bit(0, bitmap) => 1000 0000 94 * f2fs_set_bit(7, bitmap) => 0000 0001 95 */ 96 static unsigned long __find_rev_next_bit(const unsigned long *addr, 97 unsigned long size, unsigned long offset) 98 { 99 const unsigned long *p = addr + BIT_WORD(offset); 100 unsigned long result = size; 101 unsigned long tmp; 102 103 if (offset >= size) 104 return size; 105 106 size -= (offset & ~(BITS_PER_LONG - 1)); 107 offset %= BITS_PER_LONG; 108 109 while (1) { 110 if (*p == 0) 111 goto pass; 112 113 tmp = __reverse_ulong((unsigned char *)p); 114 115 tmp &= ~0UL >> offset; 116 if (size < BITS_PER_LONG) 117 tmp &= (~0UL << (BITS_PER_LONG - size)); 118 if (tmp) 119 goto found; 120 pass: 121 if (size <= BITS_PER_LONG) 122 break; 123 size -= BITS_PER_LONG; 124 offset = 0; 125 p++; 126 } 127 return result; 128 found: 129 return result - size + __reverse_ffs(tmp); 130 } 131 132 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 133 unsigned long size, unsigned long offset) 134 { 135 const unsigned long *p = addr + BIT_WORD(offset); 136 unsigned long result = size; 137 unsigned long tmp; 138 139 if (offset >= size) 140 return size; 141 142 size -= (offset & ~(BITS_PER_LONG - 1)); 143 offset %= BITS_PER_LONG; 144 145 while (1) { 146 if (*p == ~0UL) 147 goto pass; 148 149 tmp = __reverse_ulong((unsigned char *)p); 150 151 if (offset) 152 tmp |= ~0UL << (BITS_PER_LONG - offset); 153 if (size < BITS_PER_LONG) 154 tmp |= ~0UL >> size; 155 if (tmp != ~0UL) 156 goto found; 157 pass: 158 if (size <= BITS_PER_LONG) 159 break; 160 size -= BITS_PER_LONG; 161 offset = 0; 162 p++; 163 } 164 return result; 165 found: 166 return result - size + __reverse_ffz(tmp); 167 } 168 169 bool f2fs_need_SSR(struct f2fs_sb_info *sbi) 170 { 171 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 172 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 173 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); 174 175 if (f2fs_lfs_mode(sbi)) 176 return false; 177 if (sbi->gc_mode == GC_URGENT_HIGH) 178 return true; 179 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 180 return true; 181 182 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + 183 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); 184 } 185 186 void f2fs_register_inmem_page(struct inode *inode, struct page *page) 187 { 188 struct inmem_pages *new; 189 190 set_page_private_atomic(page); 191 192 new = f2fs_kmem_cache_alloc(inmem_entry_slab, 193 GFP_NOFS, true, NULL); 194 195 /* add atomic page indices to the list */ 196 new->page = page; 197 INIT_LIST_HEAD(&new->list); 198 199 /* increase reference count with clean state */ 200 get_page(page); 201 mutex_lock(&F2FS_I(inode)->inmem_lock); 202 list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages); 203 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 204 mutex_unlock(&F2FS_I(inode)->inmem_lock); 205 206 trace_f2fs_register_inmem_page(page, INMEM); 207 } 208 209 static int __revoke_inmem_pages(struct inode *inode, 210 struct list_head *head, bool drop, bool recover, 211 bool trylock) 212 { 213 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 214 struct inmem_pages *cur, *tmp; 215 int err = 0; 216 217 list_for_each_entry_safe(cur, tmp, head, list) { 218 struct page *page = cur->page; 219 220 if (drop) 221 trace_f2fs_commit_inmem_page(page, INMEM_DROP); 222 223 if (trylock) { 224 /* 225 * to avoid deadlock in between page lock and 226 * inmem_lock. 227 */ 228 if (!trylock_page(page)) 229 continue; 230 } else { 231 lock_page(page); 232 } 233 234 f2fs_wait_on_page_writeback(page, DATA, true, true); 235 236 if (recover) { 237 struct dnode_of_data dn; 238 struct node_info ni; 239 240 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); 241 retry: 242 set_new_dnode(&dn, inode, NULL, NULL, 0); 243 err = f2fs_get_dnode_of_data(&dn, page->index, 244 LOOKUP_NODE); 245 if (err) { 246 if (err == -ENOMEM) { 247 congestion_wait(BLK_RW_ASYNC, 248 DEFAULT_IO_TIMEOUT); 249 cond_resched(); 250 goto retry; 251 } 252 err = -EAGAIN; 253 goto next; 254 } 255 256 err = f2fs_get_node_info(sbi, dn.nid, &ni); 257 if (err) { 258 f2fs_put_dnode(&dn); 259 return err; 260 } 261 262 if (cur->old_addr == NEW_ADDR) { 263 f2fs_invalidate_blocks(sbi, dn.data_blkaddr); 264 f2fs_update_data_blkaddr(&dn, NEW_ADDR); 265 } else 266 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 267 cur->old_addr, ni.version, true, true); 268 f2fs_put_dnode(&dn); 269 } 270 next: 271 /* we don't need to invalidate this in the sccessful status */ 272 if (drop || recover) { 273 ClearPageUptodate(page); 274 clear_page_private_gcing(page); 275 } 276 detach_page_private(page); 277 set_page_private(page, 0); 278 f2fs_put_page(page, 1); 279 280 list_del(&cur->list); 281 kmem_cache_free(inmem_entry_slab, cur); 282 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 283 } 284 return err; 285 } 286 287 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) 288 { 289 struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; 290 struct inode *inode; 291 struct f2fs_inode_info *fi; 292 unsigned int count = sbi->atomic_files; 293 unsigned int looped = 0; 294 next: 295 spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 296 if (list_empty(head)) { 297 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 298 return; 299 } 300 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); 301 inode = igrab(&fi->vfs_inode); 302 if (inode) 303 list_move_tail(&fi->inmem_ilist, head); 304 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 305 306 if (inode) { 307 if (gc_failure) { 308 if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) 309 goto skip; 310 } 311 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 312 f2fs_drop_inmem_pages(inode); 313 skip: 314 iput(inode); 315 } 316 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); 317 cond_resched(); 318 if (gc_failure) { 319 if (++looped >= count) 320 return; 321 } 322 goto next; 323 } 324 325 void f2fs_drop_inmem_pages(struct inode *inode) 326 { 327 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 328 struct f2fs_inode_info *fi = F2FS_I(inode); 329 330 do { 331 mutex_lock(&fi->inmem_lock); 332 if (list_empty(&fi->inmem_pages)) { 333 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; 334 335 spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 336 if (!list_empty(&fi->inmem_ilist)) 337 list_del_init(&fi->inmem_ilist); 338 if (f2fs_is_atomic_file(inode)) { 339 clear_inode_flag(inode, FI_ATOMIC_FILE); 340 sbi->atomic_files--; 341 } 342 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 343 344 mutex_unlock(&fi->inmem_lock); 345 break; 346 } 347 __revoke_inmem_pages(inode, &fi->inmem_pages, 348 true, false, true); 349 mutex_unlock(&fi->inmem_lock); 350 } while (1); 351 } 352 353 void f2fs_drop_inmem_page(struct inode *inode, struct page *page) 354 { 355 struct f2fs_inode_info *fi = F2FS_I(inode); 356 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 357 struct list_head *head = &fi->inmem_pages; 358 struct inmem_pages *cur = NULL; 359 360 f2fs_bug_on(sbi, !page_private_atomic(page)); 361 362 mutex_lock(&fi->inmem_lock); 363 list_for_each_entry(cur, head, list) { 364 if (cur->page == page) 365 break; 366 } 367 368 f2fs_bug_on(sbi, list_empty(head) || cur->page != page); 369 list_del(&cur->list); 370 mutex_unlock(&fi->inmem_lock); 371 372 dec_page_count(sbi, F2FS_INMEM_PAGES); 373 kmem_cache_free(inmem_entry_slab, cur); 374 375 ClearPageUptodate(page); 376 clear_page_private_atomic(page); 377 f2fs_put_page(page, 0); 378 379 detach_page_private(page); 380 set_page_private(page, 0); 381 382 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); 383 } 384 385 static int __f2fs_commit_inmem_pages(struct inode *inode) 386 { 387 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 388 struct f2fs_inode_info *fi = F2FS_I(inode); 389 struct inmem_pages *cur, *tmp; 390 struct f2fs_io_info fio = { 391 .sbi = sbi, 392 .ino = inode->i_ino, 393 .type = DATA, 394 .op = REQ_OP_WRITE, 395 .op_flags = REQ_SYNC | REQ_PRIO, 396 .io_type = FS_DATA_IO, 397 }; 398 struct list_head revoke_list; 399 bool submit_bio = false; 400 int err = 0; 401 402 INIT_LIST_HEAD(&revoke_list); 403 404 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 405 struct page *page = cur->page; 406 407 lock_page(page); 408 if (page->mapping == inode->i_mapping) { 409 trace_f2fs_commit_inmem_page(page, INMEM); 410 411 f2fs_wait_on_page_writeback(page, DATA, true, true); 412 413 set_page_dirty(page); 414 if (clear_page_dirty_for_io(page)) { 415 inode_dec_dirty_pages(inode); 416 f2fs_remove_dirty_inode(inode); 417 } 418 retry: 419 fio.page = page; 420 fio.old_blkaddr = NULL_ADDR; 421 fio.encrypted_page = NULL; 422 fio.need_lock = LOCK_DONE; 423 err = f2fs_do_write_data_page(&fio); 424 if (err) { 425 if (err == -ENOMEM) { 426 congestion_wait(BLK_RW_ASYNC, 427 DEFAULT_IO_TIMEOUT); 428 cond_resched(); 429 goto retry; 430 } 431 unlock_page(page); 432 break; 433 } 434 /* record old blkaddr for revoking */ 435 cur->old_addr = fio.old_blkaddr; 436 submit_bio = true; 437 } 438 unlock_page(page); 439 list_move_tail(&cur->list, &revoke_list); 440 } 441 442 if (submit_bio) 443 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); 444 445 if (err) { 446 /* 447 * try to revoke all committed pages, but still we could fail 448 * due to no memory or other reason, if that happened, EAGAIN 449 * will be returned, which means in such case, transaction is 450 * already not integrity, caller should use journal to do the 451 * recovery or rewrite & commit last transaction. For other 452 * error number, revoking was done by filesystem itself. 453 */ 454 err = __revoke_inmem_pages(inode, &revoke_list, 455 false, true, false); 456 457 /* drop all uncommitted pages */ 458 __revoke_inmem_pages(inode, &fi->inmem_pages, 459 true, false, false); 460 } else { 461 __revoke_inmem_pages(inode, &revoke_list, 462 false, false, false); 463 } 464 465 return err; 466 } 467 468 int f2fs_commit_inmem_pages(struct inode *inode) 469 { 470 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 471 struct f2fs_inode_info *fi = F2FS_I(inode); 472 int err; 473 474 f2fs_balance_fs(sbi, true); 475 476 down_write(&fi->i_gc_rwsem[WRITE]); 477 478 f2fs_lock_op(sbi); 479 set_inode_flag(inode, FI_ATOMIC_COMMIT); 480 481 mutex_lock(&fi->inmem_lock); 482 err = __f2fs_commit_inmem_pages(inode); 483 mutex_unlock(&fi->inmem_lock); 484 485 clear_inode_flag(inode, FI_ATOMIC_COMMIT); 486 487 f2fs_unlock_op(sbi); 488 up_write(&fi->i_gc_rwsem[WRITE]); 489 490 return err; 491 } 492 493 /* 494 * This function balances dirty node and dentry pages. 495 * In addition, it controls garbage collection. 496 */ 497 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) 498 { 499 if (time_to_inject(sbi, FAULT_CHECKPOINT)) { 500 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); 501 f2fs_stop_checkpoint(sbi, false); 502 } 503 504 /* balance_fs_bg is able to be pending */ 505 if (need && excess_cached_nats(sbi)) 506 f2fs_balance_fs_bg(sbi, false); 507 508 if (!f2fs_is_checkpoint_ready(sbi)) 509 return; 510 511 /* 512 * We should do GC or end up with checkpoint, if there are so many dirty 513 * dir/node pages without enough free segments. 514 */ 515 if (has_not_enough_free_secs(sbi, 0, 0)) { 516 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && 517 sbi->gc_thread->f2fs_gc_task) { 518 DEFINE_WAIT(wait); 519 520 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, 521 TASK_UNINTERRUPTIBLE); 522 wake_up(&sbi->gc_thread->gc_wait_queue_head); 523 io_schedule(); 524 finish_wait(&sbi->gc_thread->fggc_wq, &wait); 525 } else { 526 down_write(&sbi->gc_lock); 527 f2fs_gc(sbi, false, false, false, NULL_SEGNO); 528 } 529 } 530 } 531 532 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) 533 { 534 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 535 return; 536 537 /* try to shrink extent cache when there is no enough memory */ 538 if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) 539 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 540 541 /* check the # of cached NAT entries */ 542 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) 543 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); 544 545 if (!f2fs_available_free_memory(sbi, FREE_NIDS)) 546 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); 547 else 548 f2fs_build_free_nids(sbi, false, false); 549 550 if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || 551 excess_prefree_segs(sbi)) 552 goto do_sync; 553 554 /* there is background inflight IO or foreground operation recently */ 555 if (is_inflight_io(sbi, REQ_TIME) || 556 (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) 557 return; 558 559 /* exceed periodical checkpoint timeout threshold */ 560 if (f2fs_time_over(sbi, CP_TIME)) 561 goto do_sync; 562 563 /* checkpoint is the only way to shrink partial cached entries */ 564 if (f2fs_available_free_memory(sbi, NAT_ENTRIES) || 565 f2fs_available_free_memory(sbi, INO_ENTRIES)) 566 return; 567 568 do_sync: 569 if (test_opt(sbi, DATA_FLUSH) && from_bg) { 570 struct blk_plug plug; 571 572 mutex_lock(&sbi->flush_lock); 573 574 blk_start_plug(&plug); 575 f2fs_sync_dirty_inodes(sbi, FILE_INODE); 576 blk_finish_plug(&plug); 577 578 mutex_unlock(&sbi->flush_lock); 579 } 580 f2fs_sync_fs(sbi->sb, true); 581 stat_inc_bg_cp_count(sbi->stat_info); 582 } 583 584 static int __submit_flush_wait(struct f2fs_sb_info *sbi, 585 struct block_device *bdev) 586 { 587 int ret = blkdev_issue_flush(bdev); 588 589 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), 590 test_opt(sbi, FLUSH_MERGE), ret); 591 return ret; 592 } 593 594 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) 595 { 596 int ret = 0; 597 int i; 598 599 if (!f2fs_is_multi_device(sbi)) 600 return __submit_flush_wait(sbi, sbi->sb->s_bdev); 601 602 for (i = 0; i < sbi->s_ndevs; i++) { 603 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) 604 continue; 605 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 606 if (ret) 607 break; 608 } 609 return ret; 610 } 611 612 static int issue_flush_thread(void *data) 613 { 614 struct f2fs_sb_info *sbi = data; 615 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 616 wait_queue_head_t *q = &fcc->flush_wait_queue; 617 repeat: 618 if (kthread_should_stop()) 619 return 0; 620 621 if (!llist_empty(&fcc->issue_list)) { 622 struct flush_cmd *cmd, *next; 623 int ret; 624 625 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 626 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 627 628 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); 629 630 ret = submit_flush_wait(sbi, cmd->ino); 631 atomic_inc(&fcc->issued_flush); 632 633 llist_for_each_entry_safe(cmd, next, 634 fcc->dispatch_list, llnode) { 635 cmd->ret = ret; 636 complete(&cmd->wait); 637 } 638 fcc->dispatch_list = NULL; 639 } 640 641 wait_event_interruptible(*q, 642 kthread_should_stop() || !llist_empty(&fcc->issue_list)); 643 goto repeat; 644 } 645 646 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) 647 { 648 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 649 struct flush_cmd cmd; 650 int ret; 651 652 if (test_opt(sbi, NOBARRIER)) 653 return 0; 654 655 if (!test_opt(sbi, FLUSH_MERGE)) { 656 atomic_inc(&fcc->queued_flush); 657 ret = submit_flush_wait(sbi, ino); 658 atomic_dec(&fcc->queued_flush); 659 atomic_inc(&fcc->issued_flush); 660 return ret; 661 } 662 663 if (atomic_inc_return(&fcc->queued_flush) == 1 || 664 f2fs_is_multi_device(sbi)) { 665 ret = submit_flush_wait(sbi, ino); 666 atomic_dec(&fcc->queued_flush); 667 668 atomic_inc(&fcc->issued_flush); 669 return ret; 670 } 671 672 cmd.ino = ino; 673 init_completion(&cmd.wait); 674 675 llist_add(&cmd.llnode, &fcc->issue_list); 676 677 /* 678 * update issue_list before we wake up issue_flush thread, this 679 * smp_mb() pairs with another barrier in ___wait_event(), see 680 * more details in comments of waitqueue_active(). 681 */ 682 smp_mb(); 683 684 if (waitqueue_active(&fcc->flush_wait_queue)) 685 wake_up(&fcc->flush_wait_queue); 686 687 if (fcc->f2fs_issue_flush) { 688 wait_for_completion(&cmd.wait); 689 atomic_dec(&fcc->queued_flush); 690 } else { 691 struct llist_node *list; 692 693 list = llist_del_all(&fcc->issue_list); 694 if (!list) { 695 wait_for_completion(&cmd.wait); 696 atomic_dec(&fcc->queued_flush); 697 } else { 698 struct flush_cmd *tmp, *next; 699 700 ret = submit_flush_wait(sbi, ino); 701 702 llist_for_each_entry_safe(tmp, next, list, llnode) { 703 if (tmp == &cmd) { 704 cmd.ret = ret; 705 atomic_dec(&fcc->queued_flush); 706 continue; 707 } 708 tmp->ret = ret; 709 complete(&tmp->wait); 710 } 711 } 712 } 713 714 return cmd.ret; 715 } 716 717 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) 718 { 719 dev_t dev = sbi->sb->s_bdev->bd_dev; 720 struct flush_cmd_control *fcc; 721 int err = 0; 722 723 if (SM_I(sbi)->fcc_info) { 724 fcc = SM_I(sbi)->fcc_info; 725 if (fcc->f2fs_issue_flush) 726 return err; 727 goto init_thread; 728 } 729 730 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); 731 if (!fcc) 732 return -ENOMEM; 733 atomic_set(&fcc->issued_flush, 0); 734 atomic_set(&fcc->queued_flush, 0); 735 init_waitqueue_head(&fcc->flush_wait_queue); 736 init_llist_head(&fcc->issue_list); 737 SM_I(sbi)->fcc_info = fcc; 738 if (!test_opt(sbi, FLUSH_MERGE)) 739 return err; 740 741 init_thread: 742 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 743 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 744 if (IS_ERR(fcc->f2fs_issue_flush)) { 745 err = PTR_ERR(fcc->f2fs_issue_flush); 746 kfree(fcc); 747 SM_I(sbi)->fcc_info = NULL; 748 return err; 749 } 750 751 return err; 752 } 753 754 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) 755 { 756 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 757 758 if (fcc && fcc->f2fs_issue_flush) { 759 struct task_struct *flush_thread = fcc->f2fs_issue_flush; 760 761 fcc->f2fs_issue_flush = NULL; 762 kthread_stop(flush_thread); 763 } 764 if (free) { 765 kfree(fcc); 766 SM_I(sbi)->fcc_info = NULL; 767 } 768 } 769 770 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) 771 { 772 int ret = 0, i; 773 774 if (!f2fs_is_multi_device(sbi)) 775 return 0; 776 777 if (test_opt(sbi, NOBARRIER)) 778 return 0; 779 780 for (i = 1; i < sbi->s_ndevs; i++) { 781 int count = DEFAULT_RETRY_IO_COUNT; 782 783 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) 784 continue; 785 786 do { 787 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 788 if (ret) 789 congestion_wait(BLK_RW_ASYNC, 790 DEFAULT_IO_TIMEOUT); 791 } while (ret && --count); 792 793 if (ret) { 794 f2fs_stop_checkpoint(sbi, false); 795 break; 796 } 797 798 spin_lock(&sbi->dev_lock); 799 f2fs_clear_bit(i, (char *)&sbi->dirty_device); 800 spin_unlock(&sbi->dev_lock); 801 } 802 803 return ret; 804 } 805 806 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 807 enum dirty_type dirty_type) 808 { 809 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 810 811 /* need not be added */ 812 if (IS_CURSEG(sbi, segno)) 813 return; 814 815 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) 816 dirty_i->nr_dirty[dirty_type]++; 817 818 if (dirty_type == DIRTY) { 819 struct seg_entry *sentry = get_seg_entry(sbi, segno); 820 enum dirty_type t = sentry->type; 821 822 if (unlikely(t >= DIRTY)) { 823 f2fs_bug_on(sbi, 1); 824 return; 825 } 826 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 827 dirty_i->nr_dirty[t]++; 828 829 if (__is_large_section(sbi)) { 830 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 831 block_t valid_blocks = 832 get_valid_blocks(sbi, segno, true); 833 834 f2fs_bug_on(sbi, unlikely(!valid_blocks || 835 valid_blocks == BLKS_PER_SEC(sbi))); 836 837 if (!IS_CURSEC(sbi, secno)) 838 set_bit(secno, dirty_i->dirty_secmap); 839 } 840 } 841 } 842 843 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 844 enum dirty_type dirty_type) 845 { 846 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 847 block_t valid_blocks; 848 849 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) 850 dirty_i->nr_dirty[dirty_type]--; 851 852 if (dirty_type == DIRTY) { 853 struct seg_entry *sentry = get_seg_entry(sbi, segno); 854 enum dirty_type t = sentry->type; 855 856 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) 857 dirty_i->nr_dirty[t]--; 858 859 valid_blocks = get_valid_blocks(sbi, segno, true); 860 if (valid_blocks == 0) { 861 clear_bit(GET_SEC_FROM_SEG(sbi, segno), 862 dirty_i->victim_secmap); 863 #ifdef CONFIG_F2FS_CHECK_FS 864 clear_bit(segno, SIT_I(sbi)->invalid_segmap); 865 #endif 866 } 867 if (__is_large_section(sbi)) { 868 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 869 870 if (!valid_blocks || 871 valid_blocks == BLKS_PER_SEC(sbi)) { 872 clear_bit(secno, dirty_i->dirty_secmap); 873 return; 874 } 875 876 if (!IS_CURSEC(sbi, secno)) 877 set_bit(secno, dirty_i->dirty_secmap); 878 } 879 } 880 } 881 882 /* 883 * Should not occur error such as -ENOMEM. 884 * Adding dirty entry into seglist is not critical operation. 885 * If a given segment is one of current working segments, it won't be added. 886 */ 887 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) 888 { 889 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 890 unsigned short valid_blocks, ckpt_valid_blocks; 891 unsigned int usable_blocks; 892 893 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) 894 return; 895 896 usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); 897 mutex_lock(&dirty_i->seglist_lock); 898 899 valid_blocks = get_valid_blocks(sbi, segno, false); 900 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false); 901 902 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || 903 ckpt_valid_blocks == usable_blocks)) { 904 __locate_dirty_segment(sbi, segno, PRE); 905 __remove_dirty_segment(sbi, segno, DIRTY); 906 } else if (valid_blocks < usable_blocks) { 907 __locate_dirty_segment(sbi, segno, DIRTY); 908 } else { 909 /* Recovery routine with SSR needs this */ 910 __remove_dirty_segment(sbi, segno, DIRTY); 911 } 912 913 mutex_unlock(&dirty_i->seglist_lock); 914 } 915 916 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ 917 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) 918 { 919 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 920 unsigned int segno; 921 922 mutex_lock(&dirty_i->seglist_lock); 923 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 924 if (get_valid_blocks(sbi, segno, false)) 925 continue; 926 if (IS_CURSEG(sbi, segno)) 927 continue; 928 __locate_dirty_segment(sbi, segno, PRE); 929 __remove_dirty_segment(sbi, segno, DIRTY); 930 } 931 mutex_unlock(&dirty_i->seglist_lock); 932 } 933 934 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) 935 { 936 int ovp_hole_segs = 937 (overprovision_segments(sbi) - reserved_segments(sbi)); 938 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; 939 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 940 block_t holes[2] = {0, 0}; /* DATA and NODE */ 941 block_t unusable; 942 struct seg_entry *se; 943 unsigned int segno; 944 945 mutex_lock(&dirty_i->seglist_lock); 946 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 947 se = get_seg_entry(sbi, segno); 948 if (IS_NODESEG(se->type)) 949 holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) - 950 se->valid_blocks; 951 else 952 holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) - 953 se->valid_blocks; 954 } 955 mutex_unlock(&dirty_i->seglist_lock); 956 957 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; 958 if (unusable > ovp_holes) 959 return unusable - ovp_holes; 960 return 0; 961 } 962 963 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) 964 { 965 int ovp_hole_segs = 966 (overprovision_segments(sbi) - reserved_segments(sbi)); 967 if (unusable > F2FS_OPTION(sbi).unusable_cap) 968 return -EAGAIN; 969 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && 970 dirty_segments(sbi) > ovp_hole_segs) 971 return -EAGAIN; 972 return 0; 973 } 974 975 /* This is only used by SBI_CP_DISABLED */ 976 static unsigned int get_free_segment(struct f2fs_sb_info *sbi) 977 { 978 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 979 unsigned int segno = 0; 980 981 mutex_lock(&dirty_i->seglist_lock); 982 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { 983 if (get_valid_blocks(sbi, segno, false)) 984 continue; 985 if (get_ckpt_valid_blocks(sbi, segno, false)) 986 continue; 987 mutex_unlock(&dirty_i->seglist_lock); 988 return segno; 989 } 990 mutex_unlock(&dirty_i->seglist_lock); 991 return NULL_SEGNO; 992 } 993 994 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, 995 struct block_device *bdev, block_t lstart, 996 block_t start, block_t len) 997 { 998 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 999 struct list_head *pend_list; 1000 struct discard_cmd *dc; 1001 1002 f2fs_bug_on(sbi, !len); 1003 1004 pend_list = &dcc->pend_list[plist_idx(len)]; 1005 1006 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL); 1007 INIT_LIST_HEAD(&dc->list); 1008 dc->bdev = bdev; 1009 dc->lstart = lstart; 1010 dc->start = start; 1011 dc->len = len; 1012 dc->ref = 0; 1013 dc->state = D_PREP; 1014 dc->queued = 0; 1015 dc->error = 0; 1016 init_completion(&dc->wait); 1017 list_add_tail(&dc->list, pend_list); 1018 spin_lock_init(&dc->lock); 1019 dc->bio_ref = 0; 1020 atomic_inc(&dcc->discard_cmd_cnt); 1021 dcc->undiscard_blks += len; 1022 1023 return dc; 1024 } 1025 1026 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, 1027 struct block_device *bdev, block_t lstart, 1028 block_t start, block_t len, 1029 struct rb_node *parent, struct rb_node **p, 1030 bool leftmost) 1031 { 1032 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1033 struct discard_cmd *dc; 1034 1035 dc = __create_discard_cmd(sbi, bdev, lstart, start, len); 1036 1037 rb_link_node(&dc->rb_node, parent, p); 1038 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); 1039 1040 return dc; 1041 } 1042 1043 static void __detach_discard_cmd(struct discard_cmd_control *dcc, 1044 struct discard_cmd *dc) 1045 { 1046 if (dc->state == D_DONE) 1047 atomic_sub(dc->queued, &dcc->queued_discard); 1048 1049 list_del(&dc->list); 1050 rb_erase_cached(&dc->rb_node, &dcc->root); 1051 dcc->undiscard_blks -= dc->len; 1052 1053 kmem_cache_free(discard_cmd_slab, dc); 1054 1055 atomic_dec(&dcc->discard_cmd_cnt); 1056 } 1057 1058 static void __remove_discard_cmd(struct f2fs_sb_info *sbi, 1059 struct discard_cmd *dc) 1060 { 1061 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1062 unsigned long flags; 1063 1064 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); 1065 1066 spin_lock_irqsave(&dc->lock, flags); 1067 if (dc->bio_ref) { 1068 spin_unlock_irqrestore(&dc->lock, flags); 1069 return; 1070 } 1071 spin_unlock_irqrestore(&dc->lock, flags); 1072 1073 f2fs_bug_on(sbi, dc->ref); 1074 1075 if (dc->error == -EOPNOTSUPP) 1076 dc->error = 0; 1077 1078 if (dc->error) 1079 printk_ratelimited( 1080 "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", 1081 KERN_INFO, sbi->sb->s_id, 1082 dc->lstart, dc->start, dc->len, dc->error); 1083 __detach_discard_cmd(dcc, dc); 1084 } 1085 1086 static void f2fs_submit_discard_endio(struct bio *bio) 1087 { 1088 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 1089 unsigned long flags; 1090 1091 spin_lock_irqsave(&dc->lock, flags); 1092 if (!dc->error) 1093 dc->error = blk_status_to_errno(bio->bi_status); 1094 dc->bio_ref--; 1095 if (!dc->bio_ref && dc->state == D_SUBMIT) { 1096 dc->state = D_DONE; 1097 complete_all(&dc->wait); 1098 } 1099 spin_unlock_irqrestore(&dc->lock, flags); 1100 bio_put(bio); 1101 } 1102 1103 static void __check_sit_bitmap(struct f2fs_sb_info *sbi, 1104 block_t start, block_t end) 1105 { 1106 #ifdef CONFIG_F2FS_CHECK_FS 1107 struct seg_entry *sentry; 1108 unsigned int segno; 1109 block_t blk = start; 1110 unsigned long offset, size, max_blocks = sbi->blocks_per_seg; 1111 unsigned long *map; 1112 1113 while (blk < end) { 1114 segno = GET_SEGNO(sbi, blk); 1115 sentry = get_seg_entry(sbi, segno); 1116 offset = GET_BLKOFF_FROM_SEG0(sbi, blk); 1117 1118 if (end < START_BLOCK(sbi, segno + 1)) 1119 size = GET_BLKOFF_FROM_SEG0(sbi, end); 1120 else 1121 size = max_blocks; 1122 map = (unsigned long *)(sentry->cur_valid_map); 1123 offset = __find_rev_next_bit(map, size, offset); 1124 f2fs_bug_on(sbi, offset != size); 1125 blk = START_BLOCK(sbi, segno + 1); 1126 } 1127 #endif 1128 } 1129 1130 static void __init_discard_policy(struct f2fs_sb_info *sbi, 1131 struct discard_policy *dpolicy, 1132 int discard_type, unsigned int granularity) 1133 { 1134 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1135 1136 /* common policy */ 1137 dpolicy->type = discard_type; 1138 dpolicy->sync = true; 1139 dpolicy->ordered = false; 1140 dpolicy->granularity = granularity; 1141 1142 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; 1143 dpolicy->io_aware_gran = MAX_PLIST_NUM; 1144 dpolicy->timeout = false; 1145 1146 if (discard_type == DPOLICY_BG) { 1147 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; 1148 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; 1149 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; 1150 dpolicy->io_aware = true; 1151 dpolicy->sync = false; 1152 dpolicy->ordered = true; 1153 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { 1154 dpolicy->granularity = 1; 1155 if (atomic_read(&dcc->discard_cmd_cnt)) 1156 dpolicy->max_interval = 1157 DEF_MIN_DISCARD_ISSUE_TIME; 1158 } 1159 } else if (discard_type == DPOLICY_FORCE) { 1160 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; 1161 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; 1162 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; 1163 dpolicy->io_aware = false; 1164 } else if (discard_type == DPOLICY_FSTRIM) { 1165 dpolicy->io_aware = false; 1166 } else if (discard_type == DPOLICY_UMOUNT) { 1167 dpolicy->io_aware = false; 1168 /* we need to issue all to keep CP_TRIMMED_FLAG */ 1169 dpolicy->granularity = 1; 1170 dpolicy->timeout = true; 1171 } 1172 } 1173 1174 static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1175 struct block_device *bdev, block_t lstart, 1176 block_t start, block_t len); 1177 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 1178 static int __submit_discard_cmd(struct f2fs_sb_info *sbi, 1179 struct discard_policy *dpolicy, 1180 struct discard_cmd *dc, 1181 unsigned int *issued) 1182 { 1183 struct block_device *bdev = dc->bdev; 1184 struct request_queue *q = bdev_get_queue(bdev); 1185 unsigned int max_discard_blocks = 1186 SECTOR_TO_BLOCK(q->limits.max_discard_sectors); 1187 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1188 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1189 &(dcc->fstrim_list) : &(dcc->wait_list); 1190 int flag = dpolicy->sync ? REQ_SYNC : 0; 1191 block_t lstart, start, len, total_len; 1192 int err = 0; 1193 1194 if (dc->state != D_PREP) 1195 return 0; 1196 1197 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1198 return 0; 1199 1200 trace_f2fs_issue_discard(bdev, dc->start, dc->len); 1201 1202 lstart = dc->lstart; 1203 start = dc->start; 1204 len = dc->len; 1205 total_len = len; 1206 1207 dc->len = 0; 1208 1209 while (total_len && *issued < dpolicy->max_requests && !err) { 1210 struct bio *bio = NULL; 1211 unsigned long flags; 1212 bool last = true; 1213 1214 if (len > max_discard_blocks) { 1215 len = max_discard_blocks; 1216 last = false; 1217 } 1218 1219 (*issued)++; 1220 if (*issued == dpolicy->max_requests) 1221 last = true; 1222 1223 dc->len += len; 1224 1225 if (time_to_inject(sbi, FAULT_DISCARD)) { 1226 f2fs_show_injection_info(sbi, FAULT_DISCARD); 1227 err = -EIO; 1228 goto submit; 1229 } 1230 err = __blkdev_issue_discard(bdev, 1231 SECTOR_FROM_BLOCK(start), 1232 SECTOR_FROM_BLOCK(len), 1233 GFP_NOFS, 0, &bio); 1234 submit: 1235 if (err) { 1236 spin_lock_irqsave(&dc->lock, flags); 1237 if (dc->state == D_PARTIAL) 1238 dc->state = D_SUBMIT; 1239 spin_unlock_irqrestore(&dc->lock, flags); 1240 1241 break; 1242 } 1243 1244 f2fs_bug_on(sbi, !bio); 1245 1246 /* 1247 * should keep before submission to avoid D_DONE 1248 * right away 1249 */ 1250 spin_lock_irqsave(&dc->lock, flags); 1251 if (last) 1252 dc->state = D_SUBMIT; 1253 else 1254 dc->state = D_PARTIAL; 1255 dc->bio_ref++; 1256 spin_unlock_irqrestore(&dc->lock, flags); 1257 1258 atomic_inc(&dcc->queued_discard); 1259 dc->queued++; 1260 list_move_tail(&dc->list, wait_list); 1261 1262 /* sanity check on discard range */ 1263 __check_sit_bitmap(sbi, lstart, lstart + len); 1264 1265 bio->bi_private = dc; 1266 bio->bi_end_io = f2fs_submit_discard_endio; 1267 bio->bi_opf |= flag; 1268 submit_bio(bio); 1269 1270 atomic_inc(&dcc->issued_discard); 1271 1272 f2fs_update_iostat(sbi, FS_DISCARD, 1); 1273 1274 lstart += len; 1275 start += len; 1276 total_len -= len; 1277 len = total_len; 1278 } 1279 1280 if (!err && len) { 1281 dcc->undiscard_blks -= len; 1282 __update_discard_tree_range(sbi, bdev, lstart, start, len); 1283 } 1284 return err; 1285 } 1286 1287 static void __insert_discard_tree(struct f2fs_sb_info *sbi, 1288 struct block_device *bdev, block_t lstart, 1289 block_t start, block_t len, 1290 struct rb_node **insert_p, 1291 struct rb_node *insert_parent) 1292 { 1293 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1294 struct rb_node **p; 1295 struct rb_node *parent = NULL; 1296 bool leftmost = true; 1297 1298 if (insert_p && insert_parent) { 1299 parent = insert_parent; 1300 p = insert_p; 1301 goto do_insert; 1302 } 1303 1304 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, 1305 lstart, &leftmost); 1306 do_insert: 1307 __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, 1308 p, leftmost); 1309 } 1310 1311 static void __relocate_discard_cmd(struct discard_cmd_control *dcc, 1312 struct discard_cmd *dc) 1313 { 1314 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); 1315 } 1316 1317 static void __punch_discard_cmd(struct f2fs_sb_info *sbi, 1318 struct discard_cmd *dc, block_t blkaddr) 1319 { 1320 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1321 struct discard_info di = dc->di; 1322 bool modified = false; 1323 1324 if (dc->state == D_DONE || dc->len == 1) { 1325 __remove_discard_cmd(sbi, dc); 1326 return; 1327 } 1328 1329 dcc->undiscard_blks -= di.len; 1330 1331 if (blkaddr > di.lstart) { 1332 dc->len = blkaddr - dc->lstart; 1333 dcc->undiscard_blks += dc->len; 1334 __relocate_discard_cmd(dcc, dc); 1335 modified = true; 1336 } 1337 1338 if (blkaddr < di.lstart + di.len - 1) { 1339 if (modified) { 1340 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, 1341 di.start + blkaddr + 1 - di.lstart, 1342 di.lstart + di.len - 1 - blkaddr, 1343 NULL, NULL); 1344 } else { 1345 dc->lstart++; 1346 dc->len--; 1347 dc->start++; 1348 dcc->undiscard_blks += dc->len; 1349 __relocate_discard_cmd(dcc, dc); 1350 } 1351 } 1352 } 1353 1354 static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1355 struct block_device *bdev, block_t lstart, 1356 block_t start, block_t len) 1357 { 1358 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1359 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 1360 struct discard_cmd *dc; 1361 struct discard_info di = {0}; 1362 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1363 struct request_queue *q = bdev_get_queue(bdev); 1364 unsigned int max_discard_blocks = 1365 SECTOR_TO_BLOCK(q->limits.max_discard_sectors); 1366 block_t end = lstart + len; 1367 1368 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1369 NULL, lstart, 1370 (struct rb_entry **)&prev_dc, 1371 (struct rb_entry **)&next_dc, 1372 &insert_p, &insert_parent, true, NULL); 1373 if (dc) 1374 prev_dc = dc; 1375 1376 if (!prev_dc) { 1377 di.lstart = lstart; 1378 di.len = next_dc ? next_dc->lstart - lstart : len; 1379 di.len = min(di.len, len); 1380 di.start = start; 1381 } 1382 1383 while (1) { 1384 struct rb_node *node; 1385 bool merged = false; 1386 struct discard_cmd *tdc = NULL; 1387 1388 if (prev_dc) { 1389 di.lstart = prev_dc->lstart + prev_dc->len; 1390 if (di.lstart < lstart) 1391 di.lstart = lstart; 1392 if (di.lstart >= end) 1393 break; 1394 1395 if (!next_dc || next_dc->lstart > end) 1396 di.len = end - di.lstart; 1397 else 1398 di.len = next_dc->lstart - di.lstart; 1399 di.start = start + di.lstart - lstart; 1400 } 1401 1402 if (!di.len) 1403 goto next; 1404 1405 if (prev_dc && prev_dc->state == D_PREP && 1406 prev_dc->bdev == bdev && 1407 __is_discard_back_mergeable(&di, &prev_dc->di, 1408 max_discard_blocks)) { 1409 prev_dc->di.len += di.len; 1410 dcc->undiscard_blks += di.len; 1411 __relocate_discard_cmd(dcc, prev_dc); 1412 di = prev_dc->di; 1413 tdc = prev_dc; 1414 merged = true; 1415 } 1416 1417 if (next_dc && next_dc->state == D_PREP && 1418 next_dc->bdev == bdev && 1419 __is_discard_front_mergeable(&di, &next_dc->di, 1420 max_discard_blocks)) { 1421 next_dc->di.lstart = di.lstart; 1422 next_dc->di.len += di.len; 1423 next_dc->di.start = di.start; 1424 dcc->undiscard_blks += di.len; 1425 __relocate_discard_cmd(dcc, next_dc); 1426 if (tdc) 1427 __remove_discard_cmd(sbi, tdc); 1428 merged = true; 1429 } 1430 1431 if (!merged) { 1432 __insert_discard_tree(sbi, bdev, di.lstart, di.start, 1433 di.len, NULL, NULL); 1434 } 1435 next: 1436 prev_dc = next_dc; 1437 if (!prev_dc) 1438 break; 1439 1440 node = rb_next(&prev_dc->rb_node); 1441 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1442 } 1443 } 1444 1445 static int __queue_discard_cmd(struct f2fs_sb_info *sbi, 1446 struct block_device *bdev, block_t blkstart, block_t blklen) 1447 { 1448 block_t lblkstart = blkstart; 1449 1450 if (!f2fs_bdev_support_discard(bdev)) 1451 return 0; 1452 1453 trace_f2fs_queue_discard(bdev, blkstart, blklen); 1454 1455 if (f2fs_is_multi_device(sbi)) { 1456 int devi = f2fs_target_device_index(sbi, blkstart); 1457 1458 blkstart -= FDEV(devi).start_blk; 1459 } 1460 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); 1461 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); 1462 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); 1463 return 0; 1464 } 1465 1466 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, 1467 struct discard_policy *dpolicy) 1468 { 1469 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1470 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 1471 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1472 struct discard_cmd *dc; 1473 struct blk_plug plug; 1474 unsigned int pos = dcc->next_pos; 1475 unsigned int issued = 0; 1476 bool io_interrupted = false; 1477 1478 mutex_lock(&dcc->cmd_lock); 1479 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1480 NULL, pos, 1481 (struct rb_entry **)&prev_dc, 1482 (struct rb_entry **)&next_dc, 1483 &insert_p, &insert_parent, true, NULL); 1484 if (!dc) 1485 dc = next_dc; 1486 1487 blk_start_plug(&plug); 1488 1489 while (dc) { 1490 struct rb_node *node; 1491 int err = 0; 1492 1493 if (dc->state != D_PREP) 1494 goto next; 1495 1496 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { 1497 io_interrupted = true; 1498 break; 1499 } 1500 1501 dcc->next_pos = dc->lstart + dc->len; 1502 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); 1503 1504 if (issued >= dpolicy->max_requests) 1505 break; 1506 next: 1507 node = rb_next(&dc->rb_node); 1508 if (err) 1509 __remove_discard_cmd(sbi, dc); 1510 dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1511 } 1512 1513 blk_finish_plug(&plug); 1514 1515 if (!dc) 1516 dcc->next_pos = 0; 1517 1518 mutex_unlock(&dcc->cmd_lock); 1519 1520 if (!issued && io_interrupted) 1521 issued = -1; 1522 1523 return issued; 1524 } 1525 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, 1526 struct discard_policy *dpolicy); 1527 1528 static int __issue_discard_cmd(struct f2fs_sb_info *sbi, 1529 struct discard_policy *dpolicy) 1530 { 1531 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1532 struct list_head *pend_list; 1533 struct discard_cmd *dc, *tmp; 1534 struct blk_plug plug; 1535 int i, issued; 1536 bool io_interrupted = false; 1537 1538 if (dpolicy->timeout) 1539 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); 1540 1541 retry: 1542 issued = 0; 1543 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1544 if (dpolicy->timeout && 1545 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) 1546 break; 1547 1548 if (i + 1 < dpolicy->granularity) 1549 break; 1550 1551 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) 1552 return __issue_discard_cmd_orderly(sbi, dpolicy); 1553 1554 pend_list = &dcc->pend_list[i]; 1555 1556 mutex_lock(&dcc->cmd_lock); 1557 if (list_empty(pend_list)) 1558 goto next; 1559 if (unlikely(dcc->rbtree_check)) 1560 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 1561 &dcc->root, false)); 1562 blk_start_plug(&plug); 1563 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1564 f2fs_bug_on(sbi, dc->state != D_PREP); 1565 1566 if (dpolicy->timeout && 1567 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) 1568 break; 1569 1570 if (dpolicy->io_aware && i < dpolicy->io_aware_gran && 1571 !is_idle(sbi, DISCARD_TIME)) { 1572 io_interrupted = true; 1573 break; 1574 } 1575 1576 __submit_discard_cmd(sbi, dpolicy, dc, &issued); 1577 1578 if (issued >= dpolicy->max_requests) 1579 break; 1580 } 1581 blk_finish_plug(&plug); 1582 next: 1583 mutex_unlock(&dcc->cmd_lock); 1584 1585 if (issued >= dpolicy->max_requests || io_interrupted) 1586 break; 1587 } 1588 1589 if (dpolicy->type == DPOLICY_UMOUNT && issued) { 1590 __wait_all_discard_cmd(sbi, dpolicy); 1591 goto retry; 1592 } 1593 1594 if (!issued && io_interrupted) 1595 issued = -1; 1596 1597 return issued; 1598 } 1599 1600 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) 1601 { 1602 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1603 struct list_head *pend_list; 1604 struct discard_cmd *dc, *tmp; 1605 int i; 1606 bool dropped = false; 1607 1608 mutex_lock(&dcc->cmd_lock); 1609 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1610 pend_list = &dcc->pend_list[i]; 1611 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1612 f2fs_bug_on(sbi, dc->state != D_PREP); 1613 __remove_discard_cmd(sbi, dc); 1614 dropped = true; 1615 } 1616 } 1617 mutex_unlock(&dcc->cmd_lock); 1618 1619 return dropped; 1620 } 1621 1622 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) 1623 { 1624 __drop_discard_cmd(sbi); 1625 } 1626 1627 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, 1628 struct discard_cmd *dc) 1629 { 1630 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1631 unsigned int len = 0; 1632 1633 wait_for_completion_io(&dc->wait); 1634 mutex_lock(&dcc->cmd_lock); 1635 f2fs_bug_on(sbi, dc->state != D_DONE); 1636 dc->ref--; 1637 if (!dc->ref) { 1638 if (!dc->error) 1639 len = dc->len; 1640 __remove_discard_cmd(sbi, dc); 1641 } 1642 mutex_unlock(&dcc->cmd_lock); 1643 1644 return len; 1645 } 1646 1647 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, 1648 struct discard_policy *dpolicy, 1649 block_t start, block_t end) 1650 { 1651 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1652 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1653 &(dcc->fstrim_list) : &(dcc->wait_list); 1654 struct discard_cmd *dc, *tmp; 1655 bool need_wait; 1656 unsigned int trimmed = 0; 1657 1658 next: 1659 need_wait = false; 1660 1661 mutex_lock(&dcc->cmd_lock); 1662 list_for_each_entry_safe(dc, tmp, wait_list, list) { 1663 if (dc->lstart + dc->len <= start || end <= dc->lstart) 1664 continue; 1665 if (dc->len < dpolicy->granularity) 1666 continue; 1667 if (dc->state == D_DONE && !dc->ref) { 1668 wait_for_completion_io(&dc->wait); 1669 if (!dc->error) 1670 trimmed += dc->len; 1671 __remove_discard_cmd(sbi, dc); 1672 } else { 1673 dc->ref++; 1674 need_wait = true; 1675 break; 1676 } 1677 } 1678 mutex_unlock(&dcc->cmd_lock); 1679 1680 if (need_wait) { 1681 trimmed += __wait_one_discard_bio(sbi, dc); 1682 goto next; 1683 } 1684 1685 return trimmed; 1686 } 1687 1688 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, 1689 struct discard_policy *dpolicy) 1690 { 1691 struct discard_policy dp; 1692 unsigned int discard_blks; 1693 1694 if (dpolicy) 1695 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); 1696 1697 /* wait all */ 1698 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); 1699 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); 1700 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); 1701 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); 1702 1703 return discard_blks; 1704 } 1705 1706 /* This should be covered by global mutex, &sit_i->sentry_lock */ 1707 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) 1708 { 1709 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1710 struct discard_cmd *dc; 1711 bool need_wait = false; 1712 1713 mutex_lock(&dcc->cmd_lock); 1714 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, 1715 NULL, blkaddr); 1716 if (dc) { 1717 if (dc->state == D_PREP) { 1718 __punch_discard_cmd(sbi, dc, blkaddr); 1719 } else { 1720 dc->ref++; 1721 need_wait = true; 1722 } 1723 } 1724 mutex_unlock(&dcc->cmd_lock); 1725 1726 if (need_wait) 1727 __wait_one_discard_bio(sbi, dc); 1728 } 1729 1730 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) 1731 { 1732 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1733 1734 if (dcc && dcc->f2fs_issue_discard) { 1735 struct task_struct *discard_thread = dcc->f2fs_issue_discard; 1736 1737 dcc->f2fs_issue_discard = NULL; 1738 kthread_stop(discard_thread); 1739 } 1740 } 1741 1742 /* This comes from f2fs_put_super */ 1743 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) 1744 { 1745 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1746 struct discard_policy dpolicy; 1747 bool dropped; 1748 1749 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 1750 dcc->discard_granularity); 1751 __issue_discard_cmd(sbi, &dpolicy); 1752 dropped = __drop_discard_cmd(sbi); 1753 1754 /* just to make sure there is no pending discard commands */ 1755 __wait_all_discard_cmd(sbi, NULL); 1756 1757 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); 1758 return dropped; 1759 } 1760 1761 static int issue_discard_thread(void *data) 1762 { 1763 struct f2fs_sb_info *sbi = data; 1764 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1765 wait_queue_head_t *q = &dcc->discard_wait_queue; 1766 struct discard_policy dpolicy; 1767 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; 1768 int issued; 1769 1770 set_freezable(); 1771 1772 do { 1773 if (sbi->gc_mode == GC_URGENT_HIGH || 1774 !f2fs_available_free_memory(sbi, DISCARD_CACHE)) 1775 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); 1776 else 1777 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, 1778 dcc->discard_granularity); 1779 1780 if (!atomic_read(&dcc->discard_cmd_cnt)) 1781 wait_ms = dpolicy.max_interval; 1782 1783 wait_event_interruptible_timeout(*q, 1784 kthread_should_stop() || freezing(current) || 1785 dcc->discard_wake, 1786 msecs_to_jiffies(wait_ms)); 1787 1788 if (dcc->discard_wake) 1789 dcc->discard_wake = 0; 1790 1791 /* clean up pending candidates before going to sleep */ 1792 if (atomic_read(&dcc->queued_discard)) 1793 __wait_all_discard_cmd(sbi, NULL); 1794 1795 if (try_to_freeze()) 1796 continue; 1797 if (f2fs_readonly(sbi->sb)) 1798 continue; 1799 if (kthread_should_stop()) 1800 return 0; 1801 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 1802 wait_ms = dpolicy.max_interval; 1803 continue; 1804 } 1805 if (!atomic_read(&dcc->discard_cmd_cnt)) 1806 continue; 1807 1808 sb_start_intwrite(sbi->sb); 1809 1810 issued = __issue_discard_cmd(sbi, &dpolicy); 1811 if (issued > 0) { 1812 __wait_all_discard_cmd(sbi, &dpolicy); 1813 wait_ms = dpolicy.min_interval; 1814 } else if (issued == -1) { 1815 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); 1816 if (!wait_ms) 1817 wait_ms = dpolicy.mid_interval; 1818 } else { 1819 wait_ms = dpolicy.max_interval; 1820 } 1821 1822 sb_end_intwrite(sbi->sb); 1823 1824 } while (!kthread_should_stop()); 1825 return 0; 1826 } 1827 1828 #ifdef CONFIG_BLK_DEV_ZONED 1829 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, 1830 struct block_device *bdev, block_t blkstart, block_t blklen) 1831 { 1832 sector_t sector, nr_sects; 1833 block_t lblkstart = blkstart; 1834 int devi = 0; 1835 1836 if (f2fs_is_multi_device(sbi)) { 1837 devi = f2fs_target_device_index(sbi, blkstart); 1838 if (blkstart < FDEV(devi).start_blk || 1839 blkstart > FDEV(devi).end_blk) { 1840 f2fs_err(sbi, "Invalid block %x", blkstart); 1841 return -EIO; 1842 } 1843 blkstart -= FDEV(devi).start_blk; 1844 } 1845 1846 /* For sequential zones, reset the zone write pointer */ 1847 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { 1848 sector = SECTOR_FROM_BLOCK(blkstart); 1849 nr_sects = SECTOR_FROM_BLOCK(blklen); 1850 1851 if (sector & (bdev_zone_sectors(bdev) - 1) || 1852 nr_sects != bdev_zone_sectors(bdev)) { 1853 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", 1854 devi, sbi->s_ndevs ? FDEV(devi).path : "", 1855 blkstart, blklen); 1856 return -EIO; 1857 } 1858 trace_f2fs_issue_reset_zone(bdev, blkstart); 1859 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 1860 sector, nr_sects, GFP_NOFS); 1861 } 1862 1863 /* For conventional zones, use regular discard if supported */ 1864 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); 1865 } 1866 #endif 1867 1868 static int __issue_discard_async(struct f2fs_sb_info *sbi, 1869 struct block_device *bdev, block_t blkstart, block_t blklen) 1870 { 1871 #ifdef CONFIG_BLK_DEV_ZONED 1872 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) 1873 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); 1874 #endif 1875 return __queue_discard_cmd(sbi, bdev, blkstart, blklen); 1876 } 1877 1878 static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 1879 block_t blkstart, block_t blklen) 1880 { 1881 sector_t start = blkstart, len = 0; 1882 struct block_device *bdev; 1883 struct seg_entry *se; 1884 unsigned int offset; 1885 block_t i; 1886 int err = 0; 1887 1888 bdev = f2fs_target_device(sbi, blkstart, NULL); 1889 1890 for (i = blkstart; i < blkstart + blklen; i++, len++) { 1891 if (i != start) { 1892 struct block_device *bdev2 = 1893 f2fs_target_device(sbi, i, NULL); 1894 1895 if (bdev2 != bdev) { 1896 err = __issue_discard_async(sbi, bdev, 1897 start, len); 1898 if (err) 1899 return err; 1900 bdev = bdev2; 1901 start = i; 1902 len = 0; 1903 } 1904 } 1905 1906 se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); 1907 offset = GET_BLKOFF_FROM_SEG0(sbi, i); 1908 1909 if (f2fs_block_unit_discard(sbi) && 1910 !f2fs_test_and_set_bit(offset, se->discard_map)) 1911 sbi->discard_blks--; 1912 } 1913 1914 if (len) 1915 err = __issue_discard_async(sbi, bdev, start, len); 1916 return err; 1917 } 1918 1919 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, 1920 bool check_only) 1921 { 1922 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 1923 int max_blocks = sbi->blocks_per_seg; 1924 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 1925 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 1926 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 1927 unsigned long *discard_map = (unsigned long *)se->discard_map; 1928 unsigned long *dmap = SIT_I(sbi)->tmp_map; 1929 unsigned int start = 0, end = -1; 1930 bool force = (cpc->reason & CP_DISCARD); 1931 struct discard_entry *de = NULL; 1932 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; 1933 int i; 1934 1935 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) || 1936 !f2fs_block_unit_discard(sbi)) 1937 return false; 1938 1939 if (!force) { 1940 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || 1941 SM_I(sbi)->dcc_info->nr_discards >= 1942 SM_I(sbi)->dcc_info->max_discards) 1943 return false; 1944 } 1945 1946 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 1947 for (i = 0; i < entries; i++) 1948 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 1949 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 1950 1951 while (force || SM_I(sbi)->dcc_info->nr_discards <= 1952 SM_I(sbi)->dcc_info->max_discards) { 1953 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 1954 if (start >= max_blocks) 1955 break; 1956 1957 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 1958 if (force && start && end != max_blocks 1959 && (end - start) < cpc->trim_minlen) 1960 continue; 1961 1962 if (check_only) 1963 return true; 1964 1965 if (!de) { 1966 de = f2fs_kmem_cache_alloc(discard_entry_slab, 1967 GFP_F2FS_ZERO, true, NULL); 1968 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); 1969 list_add_tail(&de->list, head); 1970 } 1971 1972 for (i = start; i < end; i++) 1973 __set_bit_le(i, (void *)de->discard_map); 1974 1975 SM_I(sbi)->dcc_info->nr_discards += end - start; 1976 } 1977 return false; 1978 } 1979 1980 static void release_discard_addr(struct discard_entry *entry) 1981 { 1982 list_del(&entry->list); 1983 kmem_cache_free(discard_entry_slab, entry); 1984 } 1985 1986 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) 1987 { 1988 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 1989 struct discard_entry *entry, *this; 1990 1991 /* drop caches */ 1992 list_for_each_entry_safe(entry, this, head, list) 1993 release_discard_addr(entry); 1994 } 1995 1996 /* 1997 * Should call f2fs_clear_prefree_segments after checkpoint is done. 1998 */ 1999 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 2000 { 2001 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2002 unsigned int segno; 2003 2004 mutex_lock(&dirty_i->seglist_lock); 2005 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) 2006 __set_test_and_free(sbi, segno, false); 2007 mutex_unlock(&dirty_i->seglist_lock); 2008 } 2009 2010 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, 2011 struct cp_control *cpc) 2012 { 2013 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2014 struct list_head *head = &dcc->entry_list; 2015 struct discard_entry *entry, *this; 2016 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2017 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 2018 unsigned int start = 0, end = -1; 2019 unsigned int secno, start_segno; 2020 bool force = (cpc->reason & CP_DISCARD); 2021 bool section_alignment = F2FS_OPTION(sbi).discard_unit == 2022 DISCARD_UNIT_SECTION; 2023 2024 if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) 2025 section_alignment = true; 2026 2027 mutex_lock(&dirty_i->seglist_lock); 2028 2029 while (1) { 2030 int i; 2031 2032 if (section_alignment && end != -1) 2033 end--; 2034 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); 2035 if (start >= MAIN_SEGS(sbi)) 2036 break; 2037 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), 2038 start + 1); 2039 2040 if (section_alignment) { 2041 start = rounddown(start, sbi->segs_per_sec); 2042 end = roundup(end, sbi->segs_per_sec); 2043 } 2044 2045 for (i = start; i < end; i++) { 2046 if (test_and_clear_bit(i, prefree_map)) 2047 dirty_i->nr_dirty[PRE]--; 2048 } 2049 2050 if (!f2fs_realtime_discard_enable(sbi)) 2051 continue; 2052 2053 if (force && start >= cpc->trim_start && 2054 (end - 1) <= cpc->trim_end) 2055 continue; 2056 2057 if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { 2058 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 2059 (end - start) << sbi->log_blocks_per_seg); 2060 continue; 2061 } 2062 next: 2063 secno = GET_SEC_FROM_SEG(sbi, start); 2064 start_segno = GET_SEG_FROM_SEC(sbi, secno); 2065 if (!IS_CURSEC(sbi, secno) && 2066 !get_valid_blocks(sbi, start, true)) 2067 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), 2068 sbi->segs_per_sec << sbi->log_blocks_per_seg); 2069 2070 start = start_segno + sbi->segs_per_sec; 2071 if (start < end) 2072 goto next; 2073 else 2074 end = start - 1; 2075 } 2076 mutex_unlock(&dirty_i->seglist_lock); 2077 2078 if (!f2fs_block_unit_discard(sbi)) 2079 goto wakeup; 2080 2081 /* send small discards */ 2082 list_for_each_entry_safe(entry, this, head, list) { 2083 unsigned int cur_pos = 0, next_pos, len, total_len = 0; 2084 bool is_valid = test_bit_le(0, entry->discard_map); 2085 2086 find_next: 2087 if (is_valid) { 2088 next_pos = find_next_zero_bit_le(entry->discard_map, 2089 sbi->blocks_per_seg, cur_pos); 2090 len = next_pos - cur_pos; 2091 2092 if (f2fs_sb_has_blkzoned(sbi) || 2093 (force && len < cpc->trim_minlen)) 2094 goto skip; 2095 2096 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, 2097 len); 2098 total_len += len; 2099 } else { 2100 next_pos = find_next_bit_le(entry->discard_map, 2101 sbi->blocks_per_seg, cur_pos); 2102 } 2103 skip: 2104 cur_pos = next_pos; 2105 is_valid = !is_valid; 2106 2107 if (cur_pos < sbi->blocks_per_seg) 2108 goto find_next; 2109 2110 release_discard_addr(entry); 2111 dcc->nr_discards -= total_len; 2112 } 2113 2114 wakeup: 2115 wake_up_discard_thread(sbi, false); 2116 } 2117 2118 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) 2119 { 2120 dev_t dev = sbi->sb->s_bdev->bd_dev; 2121 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2122 int err = 0; 2123 2124 if (!f2fs_realtime_discard_enable(sbi)) 2125 return 0; 2126 2127 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, 2128 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); 2129 if (IS_ERR(dcc->f2fs_issue_discard)) 2130 err = PTR_ERR(dcc->f2fs_issue_discard); 2131 2132 return err; 2133 } 2134 2135 static int create_discard_cmd_control(struct f2fs_sb_info *sbi) 2136 { 2137 struct discard_cmd_control *dcc; 2138 int err = 0, i; 2139 2140 if (SM_I(sbi)->dcc_info) { 2141 dcc = SM_I(sbi)->dcc_info; 2142 goto init_thread; 2143 } 2144 2145 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); 2146 if (!dcc) 2147 return -ENOMEM; 2148 2149 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; 2150 if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) 2151 dcc->discard_granularity = sbi->blocks_per_seg; 2152 else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) 2153 dcc->discard_granularity = BLKS_PER_SEC(sbi); 2154 2155 INIT_LIST_HEAD(&dcc->entry_list); 2156 for (i = 0; i < MAX_PLIST_NUM; i++) 2157 INIT_LIST_HEAD(&dcc->pend_list[i]); 2158 INIT_LIST_HEAD(&dcc->wait_list); 2159 INIT_LIST_HEAD(&dcc->fstrim_list); 2160 mutex_init(&dcc->cmd_lock); 2161 atomic_set(&dcc->issued_discard, 0); 2162 atomic_set(&dcc->queued_discard, 0); 2163 atomic_set(&dcc->discard_cmd_cnt, 0); 2164 dcc->nr_discards = 0; 2165 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; 2166 dcc->undiscard_blks = 0; 2167 dcc->next_pos = 0; 2168 dcc->root = RB_ROOT_CACHED; 2169 dcc->rbtree_check = false; 2170 2171 init_waitqueue_head(&dcc->discard_wait_queue); 2172 SM_I(sbi)->dcc_info = dcc; 2173 init_thread: 2174 err = f2fs_start_discard_thread(sbi); 2175 if (err) { 2176 kfree(dcc); 2177 SM_I(sbi)->dcc_info = NULL; 2178 } 2179 2180 return err; 2181 } 2182 2183 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) 2184 { 2185 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 2186 2187 if (!dcc) 2188 return; 2189 2190 f2fs_stop_discard_thread(sbi); 2191 2192 /* 2193 * Recovery can cache discard commands, so in error path of 2194 * fill_super(), it needs to give a chance to handle them. 2195 */ 2196 if (unlikely(atomic_read(&dcc->discard_cmd_cnt))) 2197 f2fs_issue_discard_timeout(sbi); 2198 2199 kfree(dcc); 2200 SM_I(sbi)->dcc_info = NULL; 2201 } 2202 2203 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 2204 { 2205 struct sit_info *sit_i = SIT_I(sbi); 2206 2207 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { 2208 sit_i->dirty_sentries++; 2209 return false; 2210 } 2211 2212 return true; 2213 } 2214 2215 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 2216 unsigned int segno, int modified) 2217 { 2218 struct seg_entry *se = get_seg_entry(sbi, segno); 2219 2220 se->type = type; 2221 if (modified) 2222 __mark_sit_entry_dirty(sbi, segno); 2223 } 2224 2225 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi, 2226 block_t blkaddr) 2227 { 2228 unsigned int segno = GET_SEGNO(sbi, blkaddr); 2229 2230 if (segno == NULL_SEGNO) 2231 return 0; 2232 return get_seg_entry(sbi, segno)->mtime; 2233 } 2234 2235 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, 2236 unsigned long long old_mtime) 2237 { 2238 struct seg_entry *se; 2239 unsigned int segno = GET_SEGNO(sbi, blkaddr); 2240 unsigned long long ctime = get_mtime(sbi, false); 2241 unsigned long long mtime = old_mtime ? old_mtime : ctime; 2242 2243 if (segno == NULL_SEGNO) 2244 return; 2245 2246 se = get_seg_entry(sbi, segno); 2247 2248 if (!se->mtime) 2249 se->mtime = mtime; 2250 else 2251 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime, 2252 se->valid_blocks + 1); 2253 2254 if (ctime > SIT_I(sbi)->max_mtime) 2255 SIT_I(sbi)->max_mtime = ctime; 2256 } 2257 2258 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) 2259 { 2260 struct seg_entry *se; 2261 unsigned int segno, offset; 2262 long int new_vblocks; 2263 bool exist; 2264 #ifdef CONFIG_F2FS_CHECK_FS 2265 bool mir_exist; 2266 #endif 2267 2268 segno = GET_SEGNO(sbi, blkaddr); 2269 2270 se = get_seg_entry(sbi, segno); 2271 new_vblocks = se->valid_blocks + del; 2272 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 2273 2274 f2fs_bug_on(sbi, (new_vblocks < 0 || 2275 (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); 2276 2277 se->valid_blocks = new_vblocks; 2278 2279 /* Update valid block bitmap */ 2280 if (del > 0) { 2281 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); 2282 #ifdef CONFIG_F2FS_CHECK_FS 2283 mir_exist = f2fs_test_and_set_bit(offset, 2284 se->cur_valid_map_mir); 2285 if (unlikely(exist != mir_exist)) { 2286 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", 2287 blkaddr, exist); 2288 f2fs_bug_on(sbi, 1); 2289 } 2290 #endif 2291 if (unlikely(exist)) { 2292 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", 2293 blkaddr); 2294 f2fs_bug_on(sbi, 1); 2295 se->valid_blocks--; 2296 del = 0; 2297 } 2298 2299 if (f2fs_block_unit_discard(sbi) && 2300 !f2fs_test_and_set_bit(offset, se->discard_map)) 2301 sbi->discard_blks--; 2302 2303 /* 2304 * SSR should never reuse block which is checkpointed 2305 * or newly invalidated. 2306 */ 2307 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { 2308 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) 2309 se->ckpt_valid_blocks++; 2310 } 2311 } else { 2312 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); 2313 #ifdef CONFIG_F2FS_CHECK_FS 2314 mir_exist = f2fs_test_and_clear_bit(offset, 2315 se->cur_valid_map_mir); 2316 if (unlikely(exist != mir_exist)) { 2317 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", 2318 blkaddr, exist); 2319 f2fs_bug_on(sbi, 1); 2320 } 2321 #endif 2322 if (unlikely(!exist)) { 2323 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", 2324 blkaddr); 2325 f2fs_bug_on(sbi, 1); 2326 se->valid_blocks++; 2327 del = 0; 2328 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2329 /* 2330 * If checkpoints are off, we must not reuse data that 2331 * was used in the previous checkpoint. If it was used 2332 * before, we must track that to know how much space we 2333 * really have. 2334 */ 2335 if (f2fs_test_bit(offset, se->ckpt_valid_map)) { 2336 spin_lock(&sbi->stat_lock); 2337 sbi->unusable_block_count++; 2338 spin_unlock(&sbi->stat_lock); 2339 } 2340 } 2341 2342 if (f2fs_block_unit_discard(sbi) && 2343 f2fs_test_and_clear_bit(offset, se->discard_map)) 2344 sbi->discard_blks++; 2345 } 2346 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 2347 se->ckpt_valid_blocks += del; 2348 2349 __mark_sit_entry_dirty(sbi, segno); 2350 2351 /* update total number of valid blocks to be written in ckpt area */ 2352 SIT_I(sbi)->written_valid_blocks += del; 2353 2354 if (__is_large_section(sbi)) 2355 get_sec_entry(sbi, segno)->valid_blocks += del; 2356 } 2357 2358 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 2359 { 2360 unsigned int segno = GET_SEGNO(sbi, addr); 2361 struct sit_info *sit_i = SIT_I(sbi); 2362 2363 f2fs_bug_on(sbi, addr == NULL_ADDR); 2364 if (addr == NEW_ADDR || addr == COMPRESS_ADDR) 2365 return; 2366 2367 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); 2368 f2fs_invalidate_compress_page(sbi, addr); 2369 2370 /* add it into sit main buffer */ 2371 down_write(&sit_i->sentry_lock); 2372 2373 update_segment_mtime(sbi, addr, 0); 2374 update_sit_entry(sbi, addr, -1); 2375 2376 /* add it into dirty seglist */ 2377 locate_dirty_segment(sbi, segno); 2378 2379 up_write(&sit_i->sentry_lock); 2380 } 2381 2382 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 2383 { 2384 struct sit_info *sit_i = SIT_I(sbi); 2385 unsigned int segno, offset; 2386 struct seg_entry *se; 2387 bool is_cp = false; 2388 2389 if (!__is_valid_data_blkaddr(blkaddr)) 2390 return true; 2391 2392 down_read(&sit_i->sentry_lock); 2393 2394 segno = GET_SEGNO(sbi, blkaddr); 2395 se = get_seg_entry(sbi, segno); 2396 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 2397 2398 if (f2fs_test_bit(offset, se->ckpt_valid_map)) 2399 is_cp = true; 2400 2401 up_read(&sit_i->sentry_lock); 2402 2403 return is_cp; 2404 } 2405 2406 /* 2407 * This function should be resided under the curseg_mutex lock 2408 */ 2409 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, 2410 struct f2fs_summary *sum) 2411 { 2412 struct curseg_info *curseg = CURSEG_I(sbi, type); 2413 void *addr = curseg->sum_blk; 2414 2415 addr += curseg->next_blkoff * sizeof(struct f2fs_summary); 2416 memcpy(addr, sum, sizeof(struct f2fs_summary)); 2417 } 2418 2419 /* 2420 * Calculate the number of current summary pages for writing 2421 */ 2422 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) 2423 { 2424 int valid_sum_count = 0; 2425 int i, sum_in_page; 2426 2427 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 2428 if (sbi->ckpt->alloc_type[i] == SSR) 2429 valid_sum_count += sbi->blocks_per_seg; 2430 else { 2431 if (for_ra) 2432 valid_sum_count += le16_to_cpu( 2433 F2FS_CKPT(sbi)->cur_data_blkoff[i]); 2434 else 2435 valid_sum_count += curseg_blkoff(sbi, i); 2436 } 2437 } 2438 2439 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE - 2440 SUM_FOOTER_SIZE) / SUMMARY_SIZE; 2441 if (valid_sum_count <= sum_in_page) 2442 return 1; 2443 else if ((valid_sum_count - sum_in_page) <= 2444 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) 2445 return 2; 2446 return 3; 2447 } 2448 2449 /* 2450 * Caller should put this summary page 2451 */ 2452 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) 2453 { 2454 if (unlikely(f2fs_cp_error(sbi))) 2455 return ERR_PTR(-EIO); 2456 return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno)); 2457 } 2458 2459 void f2fs_update_meta_page(struct f2fs_sb_info *sbi, 2460 void *src, block_t blk_addr) 2461 { 2462 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 2463 2464 memcpy(page_address(page), src, PAGE_SIZE); 2465 set_page_dirty(page); 2466 f2fs_put_page(page, 1); 2467 } 2468 2469 static void write_sum_page(struct f2fs_sb_info *sbi, 2470 struct f2fs_summary_block *sum_blk, block_t blk_addr) 2471 { 2472 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); 2473 } 2474 2475 static void write_current_sum_page(struct f2fs_sb_info *sbi, 2476 int type, block_t blk_addr) 2477 { 2478 struct curseg_info *curseg = CURSEG_I(sbi, type); 2479 struct page *page = f2fs_grab_meta_page(sbi, blk_addr); 2480 struct f2fs_summary_block *src = curseg->sum_blk; 2481 struct f2fs_summary_block *dst; 2482 2483 dst = (struct f2fs_summary_block *)page_address(page); 2484 memset(dst, 0, PAGE_SIZE); 2485 2486 mutex_lock(&curseg->curseg_mutex); 2487 2488 down_read(&curseg->journal_rwsem); 2489 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE); 2490 up_read(&curseg->journal_rwsem); 2491 2492 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE); 2493 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE); 2494 2495 mutex_unlock(&curseg->curseg_mutex); 2496 2497 set_page_dirty(page); 2498 f2fs_put_page(page, 1); 2499 } 2500 2501 static int is_next_segment_free(struct f2fs_sb_info *sbi, 2502 struct curseg_info *curseg, int type) 2503 { 2504 unsigned int segno = curseg->segno + 1; 2505 struct free_segmap_info *free_i = FREE_I(sbi); 2506 2507 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 2508 return !test_bit(segno, free_i->free_segmap); 2509 return 0; 2510 } 2511 2512 /* 2513 * Find a new segment from the free segments bitmap to right order 2514 * This function should be returned with success, otherwise BUG 2515 */ 2516 static void get_new_segment(struct f2fs_sb_info *sbi, 2517 unsigned int *newseg, bool new_sec, int dir) 2518 { 2519 struct free_segmap_info *free_i = FREE_I(sbi); 2520 unsigned int segno, secno, zoneno; 2521 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 2522 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); 2523 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); 2524 unsigned int left_start = hint; 2525 bool init = true; 2526 int go_left = 0; 2527 int i; 2528 2529 spin_lock(&free_i->segmap_lock); 2530 2531 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 2532 segno = find_next_zero_bit(free_i->free_segmap, 2533 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); 2534 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) 2535 goto got_it; 2536 } 2537 find_other_zone: 2538 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); 2539 if (secno >= MAIN_SECS(sbi)) { 2540 if (dir == ALLOC_RIGHT) { 2541 secno = find_next_zero_bit(free_i->free_secmap, 2542 MAIN_SECS(sbi), 0); 2543 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); 2544 } else { 2545 go_left = 1; 2546 left_start = hint - 1; 2547 } 2548 } 2549 if (go_left == 0) 2550 goto skip_left; 2551 2552 while (test_bit(left_start, free_i->free_secmap)) { 2553 if (left_start > 0) { 2554 left_start--; 2555 continue; 2556 } 2557 left_start = find_next_zero_bit(free_i->free_secmap, 2558 MAIN_SECS(sbi), 0); 2559 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); 2560 break; 2561 } 2562 secno = left_start; 2563 skip_left: 2564 segno = GET_SEG_FROM_SEC(sbi, secno); 2565 zoneno = GET_ZONE_FROM_SEC(sbi, secno); 2566 2567 /* give up on finding another zone */ 2568 if (!init) 2569 goto got_it; 2570 if (sbi->secs_per_zone == 1) 2571 goto got_it; 2572 if (zoneno == old_zoneno) 2573 goto got_it; 2574 if (dir == ALLOC_LEFT) { 2575 if (!go_left && zoneno + 1 >= total_zones) 2576 goto got_it; 2577 if (go_left && zoneno == 0) 2578 goto got_it; 2579 } 2580 for (i = 0; i < NR_CURSEG_TYPE; i++) 2581 if (CURSEG_I(sbi, i)->zone == zoneno) 2582 break; 2583 2584 if (i < NR_CURSEG_TYPE) { 2585 /* zone is in user, try another */ 2586 if (go_left) 2587 hint = zoneno * sbi->secs_per_zone - 1; 2588 else if (zoneno + 1 >= total_zones) 2589 hint = 0; 2590 else 2591 hint = (zoneno + 1) * sbi->secs_per_zone; 2592 init = false; 2593 goto find_other_zone; 2594 } 2595 got_it: 2596 /* set it as dirty segment in free segmap */ 2597 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 2598 __set_inuse(sbi, segno); 2599 *newseg = segno; 2600 spin_unlock(&free_i->segmap_lock); 2601 } 2602 2603 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) 2604 { 2605 struct curseg_info *curseg = CURSEG_I(sbi, type); 2606 struct summary_footer *sum_footer; 2607 unsigned short seg_type = curseg->seg_type; 2608 2609 curseg->inited = true; 2610 curseg->segno = curseg->next_segno; 2611 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); 2612 curseg->next_blkoff = 0; 2613 curseg->next_segno = NULL_SEGNO; 2614 2615 sum_footer = &(curseg->sum_blk->footer); 2616 memset(sum_footer, 0, sizeof(struct summary_footer)); 2617 2618 sanity_check_seg_type(sbi, seg_type); 2619 2620 if (IS_DATASEG(seg_type)) 2621 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); 2622 if (IS_NODESEG(seg_type)) 2623 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); 2624 __set_sit_entry_type(sbi, seg_type, curseg->segno, modified); 2625 } 2626 2627 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) 2628 { 2629 struct curseg_info *curseg = CURSEG_I(sbi, type); 2630 unsigned short seg_type = curseg->seg_type; 2631 2632 sanity_check_seg_type(sbi, seg_type); 2633 2634 /* if segs_per_sec is large than 1, we need to keep original policy. */ 2635 if (__is_large_section(sbi)) 2636 return curseg->segno; 2637 2638 /* inmem log may not locate on any segment after mount */ 2639 if (!curseg->inited) 2640 return 0; 2641 2642 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 2643 return 0; 2644 2645 if (test_opt(sbi, NOHEAP) && 2646 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))) 2647 return 0; 2648 2649 if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) 2650 return SIT_I(sbi)->last_victim[ALLOC_NEXT]; 2651 2652 /* find segments from 0 to reuse freed segments */ 2653 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) 2654 return 0; 2655 2656 return curseg->segno; 2657 } 2658 2659 /* 2660 * Allocate a current working segment. 2661 * This function always allocates a free segment in LFS manner. 2662 */ 2663 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) 2664 { 2665 struct curseg_info *curseg = CURSEG_I(sbi, type); 2666 unsigned short seg_type = curseg->seg_type; 2667 unsigned int segno = curseg->segno; 2668 int dir = ALLOC_LEFT; 2669 2670 if (curseg->inited) 2671 write_sum_page(sbi, curseg->sum_blk, 2672 GET_SUM_BLOCK(sbi, segno)); 2673 if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA) 2674 dir = ALLOC_RIGHT; 2675 2676 if (test_opt(sbi, NOHEAP)) 2677 dir = ALLOC_RIGHT; 2678 2679 segno = __get_next_segno(sbi, type); 2680 get_new_segment(sbi, &segno, new_sec, dir); 2681 curseg->next_segno = segno; 2682 reset_curseg(sbi, type, 1); 2683 curseg->alloc_type = LFS; 2684 } 2685 2686 static int __next_free_blkoff(struct f2fs_sb_info *sbi, 2687 int segno, block_t start) 2688 { 2689 struct seg_entry *se = get_seg_entry(sbi, segno); 2690 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 2691 unsigned long *target_map = SIT_I(sbi)->tmp_map; 2692 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 2693 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 2694 int i; 2695 2696 for (i = 0; i < entries; i++) 2697 target_map[i] = ckpt_map[i] | cur_map[i]; 2698 2699 return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); 2700 } 2701 2702 /* 2703 * If a segment is written by LFS manner, next block offset is just obtained 2704 * by increasing the current block offset. However, if a segment is written by 2705 * SSR manner, next block offset obtained by calling __next_free_blkoff 2706 */ 2707 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, 2708 struct curseg_info *seg) 2709 { 2710 if (seg->alloc_type == SSR) 2711 seg->next_blkoff = 2712 __next_free_blkoff(sbi, seg->segno, 2713 seg->next_blkoff + 1); 2714 else 2715 seg->next_blkoff++; 2716 } 2717 2718 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) 2719 { 2720 return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; 2721 } 2722 2723 /* 2724 * This function always allocates a used segment(from dirty seglist) by SSR 2725 * manner, so it should recover the existing segment information of valid blocks 2726 */ 2727 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) 2728 { 2729 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2730 struct curseg_info *curseg = CURSEG_I(sbi, type); 2731 unsigned int new_segno = curseg->next_segno; 2732 struct f2fs_summary_block *sum_node; 2733 struct page *sum_page; 2734 2735 if (flush) 2736 write_sum_page(sbi, curseg->sum_blk, 2737 GET_SUM_BLOCK(sbi, curseg->segno)); 2738 2739 __set_test_and_inuse(sbi, new_segno); 2740 2741 mutex_lock(&dirty_i->seglist_lock); 2742 __remove_dirty_segment(sbi, new_segno, PRE); 2743 __remove_dirty_segment(sbi, new_segno, DIRTY); 2744 mutex_unlock(&dirty_i->seglist_lock); 2745 2746 reset_curseg(sbi, type, 1); 2747 curseg->alloc_type = SSR; 2748 curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); 2749 2750 sum_page = f2fs_get_sum_page(sbi, new_segno); 2751 if (IS_ERR(sum_page)) { 2752 /* GC won't be able to use stale summary pages by cp_error */ 2753 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); 2754 return; 2755 } 2756 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 2757 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); 2758 f2fs_put_page(sum_page, 1); 2759 } 2760 2761 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, 2762 int alloc_mode, unsigned long long age); 2763 2764 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, 2765 int target_type, int alloc_mode, 2766 unsigned long long age) 2767 { 2768 struct curseg_info *curseg = CURSEG_I(sbi, type); 2769 2770 curseg->seg_type = target_type; 2771 2772 if (get_ssr_segment(sbi, type, alloc_mode, age)) { 2773 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); 2774 2775 curseg->seg_type = se->type; 2776 change_curseg(sbi, type, true); 2777 } else { 2778 /* allocate cold segment by default */ 2779 curseg->seg_type = CURSEG_COLD_DATA; 2780 new_curseg(sbi, type, true); 2781 } 2782 stat_inc_seg_type(sbi, curseg); 2783 } 2784 2785 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) 2786 { 2787 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); 2788 2789 if (!sbi->am.atgc_enabled) 2790 return; 2791 2792 down_read(&SM_I(sbi)->curseg_lock); 2793 2794 mutex_lock(&curseg->curseg_mutex); 2795 down_write(&SIT_I(sbi)->sentry_lock); 2796 2797 get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); 2798 2799 up_write(&SIT_I(sbi)->sentry_lock); 2800 mutex_unlock(&curseg->curseg_mutex); 2801 2802 up_read(&SM_I(sbi)->curseg_lock); 2803 2804 } 2805 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) 2806 { 2807 __f2fs_init_atgc_curseg(sbi); 2808 } 2809 2810 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) 2811 { 2812 struct curseg_info *curseg = CURSEG_I(sbi, type); 2813 2814 mutex_lock(&curseg->curseg_mutex); 2815 if (!curseg->inited) 2816 goto out; 2817 2818 if (get_valid_blocks(sbi, curseg->segno, false)) { 2819 write_sum_page(sbi, curseg->sum_blk, 2820 GET_SUM_BLOCK(sbi, curseg->segno)); 2821 } else { 2822 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 2823 __set_test_and_free(sbi, curseg->segno, true); 2824 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 2825 } 2826 out: 2827 mutex_unlock(&curseg->curseg_mutex); 2828 } 2829 2830 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi) 2831 { 2832 __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); 2833 2834 if (sbi->am.atgc_enabled) 2835 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); 2836 } 2837 2838 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type) 2839 { 2840 struct curseg_info *curseg = CURSEG_I(sbi, type); 2841 2842 mutex_lock(&curseg->curseg_mutex); 2843 if (!curseg->inited) 2844 goto out; 2845 if (get_valid_blocks(sbi, curseg->segno, false)) 2846 goto out; 2847 2848 mutex_lock(&DIRTY_I(sbi)->seglist_lock); 2849 __set_test_and_inuse(sbi, curseg->segno); 2850 mutex_unlock(&DIRTY_I(sbi)->seglist_lock); 2851 out: 2852 mutex_unlock(&curseg->curseg_mutex); 2853 } 2854 2855 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi) 2856 { 2857 __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); 2858 2859 if (sbi->am.atgc_enabled) 2860 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); 2861 } 2862 2863 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, 2864 int alloc_mode, unsigned long long age) 2865 { 2866 struct curseg_info *curseg = CURSEG_I(sbi, type); 2867 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 2868 unsigned segno = NULL_SEGNO; 2869 unsigned short seg_type = curseg->seg_type; 2870 int i, cnt; 2871 bool reversed = false; 2872 2873 sanity_check_seg_type(sbi, seg_type); 2874 2875 /* f2fs_need_SSR() already forces to do this */ 2876 if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { 2877 curseg->next_segno = segno; 2878 return 1; 2879 } 2880 2881 /* For node segments, let's do SSR more intensively */ 2882 if (IS_NODESEG(seg_type)) { 2883 if (seg_type >= CURSEG_WARM_NODE) { 2884 reversed = true; 2885 i = CURSEG_COLD_NODE; 2886 } else { 2887 i = CURSEG_HOT_NODE; 2888 } 2889 cnt = NR_CURSEG_NODE_TYPE; 2890 } else { 2891 if (seg_type >= CURSEG_WARM_DATA) { 2892 reversed = true; 2893 i = CURSEG_COLD_DATA; 2894 } else { 2895 i = CURSEG_HOT_DATA; 2896 } 2897 cnt = NR_CURSEG_DATA_TYPE; 2898 } 2899 2900 for (; cnt-- > 0; reversed ? i-- : i++) { 2901 if (i == seg_type) 2902 continue; 2903 if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { 2904 curseg->next_segno = segno; 2905 return 1; 2906 } 2907 } 2908 2909 /* find valid_blocks=0 in dirty list */ 2910 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 2911 segno = get_free_segment(sbi); 2912 if (segno != NULL_SEGNO) { 2913 curseg->next_segno = segno; 2914 return 1; 2915 } 2916 } 2917 return 0; 2918 } 2919 2920 /* 2921 * flush out current segment and replace it with new segment 2922 * This function should be returned with success, otherwise BUG 2923 */ 2924 static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 2925 int type, bool force) 2926 { 2927 struct curseg_info *curseg = CURSEG_I(sbi, type); 2928 2929 if (force) 2930 new_curseg(sbi, type, true); 2931 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && 2932 curseg->seg_type == CURSEG_WARM_NODE) 2933 new_curseg(sbi, type, false); 2934 else if (curseg->alloc_type == LFS && 2935 is_next_segment_free(sbi, curseg, type) && 2936 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) 2937 new_curseg(sbi, type, false); 2938 else if (f2fs_need_SSR(sbi) && 2939 get_ssr_segment(sbi, type, SSR, 0)) 2940 change_curseg(sbi, type, true); 2941 else 2942 new_curseg(sbi, type, false); 2943 2944 stat_inc_seg_type(sbi, curseg); 2945 } 2946 2947 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, 2948 unsigned int start, unsigned int end) 2949 { 2950 struct curseg_info *curseg = CURSEG_I(sbi, type); 2951 unsigned int segno; 2952 2953 down_read(&SM_I(sbi)->curseg_lock); 2954 mutex_lock(&curseg->curseg_mutex); 2955 down_write(&SIT_I(sbi)->sentry_lock); 2956 2957 segno = CURSEG_I(sbi, type)->segno; 2958 if (segno < start || segno > end) 2959 goto unlock; 2960 2961 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) 2962 change_curseg(sbi, type, true); 2963 else 2964 new_curseg(sbi, type, true); 2965 2966 stat_inc_seg_type(sbi, curseg); 2967 2968 locate_dirty_segment(sbi, segno); 2969 unlock: 2970 up_write(&SIT_I(sbi)->sentry_lock); 2971 2972 if (segno != curseg->segno) 2973 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", 2974 type, segno, curseg->segno); 2975 2976 mutex_unlock(&curseg->curseg_mutex); 2977 up_read(&SM_I(sbi)->curseg_lock); 2978 } 2979 2980 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, 2981 bool new_sec, bool force) 2982 { 2983 struct curseg_info *curseg = CURSEG_I(sbi, type); 2984 unsigned int old_segno; 2985 2986 if (!curseg->inited) 2987 goto alloc; 2988 2989 if (force || curseg->next_blkoff || 2990 get_valid_blocks(sbi, curseg->segno, new_sec)) 2991 goto alloc; 2992 2993 if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) 2994 return; 2995 alloc: 2996 old_segno = curseg->segno; 2997 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); 2998 locate_dirty_segment(sbi, old_segno); 2999 } 3000 3001 static void __allocate_new_section(struct f2fs_sb_info *sbi, 3002 int type, bool force) 3003 { 3004 __allocate_new_segment(sbi, type, true, force); 3005 } 3006 3007 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) 3008 { 3009 down_read(&SM_I(sbi)->curseg_lock); 3010 down_write(&SIT_I(sbi)->sentry_lock); 3011 __allocate_new_section(sbi, type, force); 3012 up_write(&SIT_I(sbi)->sentry_lock); 3013 up_read(&SM_I(sbi)->curseg_lock); 3014 } 3015 3016 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) 3017 { 3018 int i; 3019 3020 down_read(&SM_I(sbi)->curseg_lock); 3021 down_write(&SIT_I(sbi)->sentry_lock); 3022 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) 3023 __allocate_new_segment(sbi, i, false, false); 3024 up_write(&SIT_I(sbi)->sentry_lock); 3025 up_read(&SM_I(sbi)->curseg_lock); 3026 } 3027 3028 static const struct segment_allocation default_salloc_ops = { 3029 .allocate_segment = allocate_segment_by_default, 3030 }; 3031 3032 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, 3033 struct cp_control *cpc) 3034 { 3035 __u64 trim_start = cpc->trim_start; 3036 bool has_candidate = false; 3037 3038 down_write(&SIT_I(sbi)->sentry_lock); 3039 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { 3040 if (add_discard_addrs(sbi, cpc, true)) { 3041 has_candidate = true; 3042 break; 3043 } 3044 } 3045 up_write(&SIT_I(sbi)->sentry_lock); 3046 3047 cpc->trim_start = trim_start; 3048 return has_candidate; 3049 } 3050 3051 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, 3052 struct discard_policy *dpolicy, 3053 unsigned int start, unsigned int end) 3054 { 3055 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 3056 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 3057 struct rb_node **insert_p = NULL, *insert_parent = NULL; 3058 struct discard_cmd *dc; 3059 struct blk_plug plug; 3060 int issued; 3061 unsigned int trimmed = 0; 3062 3063 next: 3064 issued = 0; 3065 3066 mutex_lock(&dcc->cmd_lock); 3067 if (unlikely(dcc->rbtree_check)) 3068 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, 3069 &dcc->root, false)); 3070 3071 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 3072 NULL, start, 3073 (struct rb_entry **)&prev_dc, 3074 (struct rb_entry **)&next_dc, 3075 &insert_p, &insert_parent, true, NULL); 3076 if (!dc) 3077 dc = next_dc; 3078 3079 blk_start_plug(&plug); 3080 3081 while (dc && dc->lstart <= end) { 3082 struct rb_node *node; 3083 int err = 0; 3084 3085 if (dc->len < dpolicy->granularity) 3086 goto skip; 3087 3088 if (dc->state != D_PREP) { 3089 list_move_tail(&dc->list, &dcc->fstrim_list); 3090 goto skip; 3091 } 3092 3093 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); 3094 3095 if (issued >= dpolicy->max_requests) { 3096 start = dc->lstart + dc->len; 3097 3098 if (err) 3099 __remove_discard_cmd(sbi, dc); 3100 3101 blk_finish_plug(&plug); 3102 mutex_unlock(&dcc->cmd_lock); 3103 trimmed += __wait_all_discard_cmd(sbi, NULL); 3104 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); 3105 goto next; 3106 } 3107 skip: 3108 node = rb_next(&dc->rb_node); 3109 if (err) 3110 __remove_discard_cmd(sbi, dc); 3111 dc = rb_entry_safe(node, struct discard_cmd, rb_node); 3112 3113 if (fatal_signal_pending(current)) 3114 break; 3115 } 3116 3117 blk_finish_plug(&plug); 3118 mutex_unlock(&dcc->cmd_lock); 3119 3120 return trimmed; 3121 } 3122 3123 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 3124 { 3125 __u64 start = F2FS_BYTES_TO_BLK(range->start); 3126 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 3127 unsigned int start_segno, end_segno; 3128 block_t start_block, end_block; 3129 struct cp_control cpc; 3130 struct discard_policy dpolicy; 3131 unsigned long long trimmed = 0; 3132 int err = 0; 3133 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); 3134 3135 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 3136 return -EINVAL; 3137 3138 if (end < MAIN_BLKADDR(sbi)) 3139 goto out; 3140 3141 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 3142 f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); 3143 return -EFSCORRUPTED; 3144 } 3145 3146 /* start/end segment number in main_area */ 3147 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 3148 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 3149 GET_SEGNO(sbi, end); 3150 if (need_align) { 3151 start_segno = rounddown(start_segno, sbi->segs_per_sec); 3152 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; 3153 } 3154 3155 cpc.reason = CP_DISCARD; 3156 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); 3157 cpc.trim_start = start_segno; 3158 cpc.trim_end = end_segno; 3159 3160 if (sbi->discard_blks == 0) 3161 goto out; 3162 3163 down_write(&sbi->gc_lock); 3164 err = f2fs_write_checkpoint(sbi, &cpc); 3165 up_write(&sbi->gc_lock); 3166 if (err) 3167 goto out; 3168 3169 /* 3170 * We filed discard candidates, but actually we don't need to wait for 3171 * all of them, since they'll be issued in idle time along with runtime 3172 * discard option. User configuration looks like using runtime discard 3173 * or periodic fstrim instead of it. 3174 */ 3175 if (f2fs_realtime_discard_enable(sbi)) 3176 goto out; 3177 3178 start_block = START_BLOCK(sbi, start_segno); 3179 end_block = START_BLOCK(sbi, end_segno + 1); 3180 3181 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); 3182 trimmed = __issue_discard_cmd_range(sbi, &dpolicy, 3183 start_block, end_block); 3184 3185 trimmed += __wait_discard_cmd_range(sbi, &dpolicy, 3186 start_block, end_block); 3187 out: 3188 if (!err) 3189 range->len = F2FS_BLK_TO_BYTES(trimmed); 3190 return err; 3191 } 3192 3193 static bool __has_curseg_space(struct f2fs_sb_info *sbi, 3194 struct curseg_info *curseg) 3195 { 3196 return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi, 3197 curseg->segno); 3198 } 3199 3200 int f2fs_rw_hint_to_seg_type(enum rw_hint hint) 3201 { 3202 switch (hint) { 3203 case WRITE_LIFE_SHORT: 3204 return CURSEG_HOT_DATA; 3205 case WRITE_LIFE_EXTREME: 3206 return CURSEG_COLD_DATA; 3207 default: 3208 return CURSEG_WARM_DATA; 3209 } 3210 } 3211 3212 /* This returns write hints for each segment type. This hints will be 3213 * passed down to block layer. There are mapping tables which depend on 3214 * the mount option 'whint_mode'. 3215 * 3216 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. 3217 * 3218 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. 3219 * 3220 * User F2FS Block 3221 * ---- ---- ----- 3222 * META WRITE_LIFE_NOT_SET 3223 * HOT_NODE " 3224 * WARM_NODE " 3225 * COLD_NODE " 3226 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME 3227 * extension list " " 3228 * 3229 * -- buffered io 3230 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3231 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3232 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET 3233 * WRITE_LIFE_NONE " " 3234 * WRITE_LIFE_MEDIUM " " 3235 * WRITE_LIFE_LONG " " 3236 * 3237 * -- direct io 3238 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3239 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3240 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET 3241 * WRITE_LIFE_NONE " WRITE_LIFE_NONE 3242 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM 3243 * WRITE_LIFE_LONG " WRITE_LIFE_LONG 3244 * 3245 * 3) whint_mode=fs-based. F2FS passes down hints with its policy. 3246 * 3247 * User F2FS Block 3248 * ---- ---- ----- 3249 * META WRITE_LIFE_MEDIUM; 3250 * HOT_NODE WRITE_LIFE_NOT_SET 3251 * WARM_NODE " 3252 * COLD_NODE WRITE_LIFE_NONE 3253 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME 3254 * extension list " " 3255 * 3256 * -- buffered io 3257 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3258 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3259 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG 3260 * WRITE_LIFE_NONE " " 3261 * WRITE_LIFE_MEDIUM " " 3262 * WRITE_LIFE_LONG " " 3263 * 3264 * -- direct io 3265 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME 3266 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT 3267 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET 3268 * WRITE_LIFE_NONE " WRITE_LIFE_NONE 3269 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM 3270 * WRITE_LIFE_LONG " WRITE_LIFE_LONG 3271 */ 3272 3273 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, 3274 enum page_type type, enum temp_type temp) 3275 { 3276 if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { 3277 if (type == DATA) { 3278 if (temp == WARM) 3279 return WRITE_LIFE_NOT_SET; 3280 else if (temp == HOT) 3281 return WRITE_LIFE_SHORT; 3282 else if (temp == COLD) 3283 return WRITE_LIFE_EXTREME; 3284 } else { 3285 return WRITE_LIFE_NOT_SET; 3286 } 3287 } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { 3288 if (type == DATA) { 3289 if (temp == WARM) 3290 return WRITE_LIFE_LONG; 3291 else if (temp == HOT) 3292 return WRITE_LIFE_SHORT; 3293 else if (temp == COLD) 3294 return WRITE_LIFE_EXTREME; 3295 } else if (type == NODE) { 3296 if (temp == WARM || temp == HOT) 3297 return WRITE_LIFE_NOT_SET; 3298 else if (temp == COLD) 3299 return WRITE_LIFE_NONE; 3300 } else if (type == META) { 3301 return WRITE_LIFE_MEDIUM; 3302 } 3303 } 3304 return WRITE_LIFE_NOT_SET; 3305 } 3306 3307 static int __get_segment_type_2(struct f2fs_io_info *fio) 3308 { 3309 if (fio->type == DATA) 3310 return CURSEG_HOT_DATA; 3311 else 3312 return CURSEG_HOT_NODE; 3313 } 3314 3315 static int __get_segment_type_4(struct f2fs_io_info *fio) 3316 { 3317 if (fio->type == DATA) { 3318 struct inode *inode = fio->page->mapping->host; 3319 3320 if (S_ISDIR(inode->i_mode)) 3321 return CURSEG_HOT_DATA; 3322 else 3323 return CURSEG_COLD_DATA; 3324 } else { 3325 if (IS_DNODE(fio->page) && is_cold_node(fio->page)) 3326 return CURSEG_WARM_NODE; 3327 else 3328 return CURSEG_COLD_NODE; 3329 } 3330 } 3331 3332 static int __get_segment_type_6(struct f2fs_io_info *fio) 3333 { 3334 if (fio->type == DATA) { 3335 struct inode *inode = fio->page->mapping->host; 3336 3337 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) 3338 return CURSEG_COLD_DATA_PINNED; 3339 3340 if (page_private_gcing(fio->page)) { 3341 if (fio->sbi->am.atgc_enabled && 3342 (fio->io_type == FS_DATA_IO) && 3343 (fio->sbi->gc_mode != GC_URGENT_HIGH)) 3344 return CURSEG_ALL_DATA_ATGC; 3345 else 3346 return CURSEG_COLD_DATA; 3347 } 3348 if (file_is_cold(inode) || f2fs_need_compress_data(inode)) 3349 return CURSEG_COLD_DATA; 3350 if (file_is_hot(inode) || 3351 is_inode_flag_set(inode, FI_HOT_DATA) || 3352 f2fs_is_atomic_file(inode) || 3353 f2fs_is_volatile_file(inode)) 3354 return CURSEG_HOT_DATA; 3355 return f2fs_rw_hint_to_seg_type(inode->i_write_hint); 3356 } else { 3357 if (IS_DNODE(fio->page)) 3358 return is_cold_node(fio->page) ? CURSEG_WARM_NODE : 3359 CURSEG_HOT_NODE; 3360 return CURSEG_COLD_NODE; 3361 } 3362 } 3363 3364 static int __get_segment_type(struct f2fs_io_info *fio) 3365 { 3366 int type = 0; 3367 3368 switch (F2FS_OPTION(fio->sbi).active_logs) { 3369 case 2: 3370 type = __get_segment_type_2(fio); 3371 break; 3372 case 4: 3373 type = __get_segment_type_4(fio); 3374 break; 3375 case 6: 3376 type = __get_segment_type_6(fio); 3377 break; 3378 default: 3379 f2fs_bug_on(fio->sbi, true); 3380 } 3381 3382 if (IS_HOT(type)) 3383 fio->temp = HOT; 3384 else if (IS_WARM(type)) 3385 fio->temp = WARM; 3386 else 3387 fio->temp = COLD; 3388 return type; 3389 } 3390 3391 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, 3392 block_t old_blkaddr, block_t *new_blkaddr, 3393 struct f2fs_summary *sum, int type, 3394 struct f2fs_io_info *fio) 3395 { 3396 struct sit_info *sit_i = SIT_I(sbi); 3397 struct curseg_info *curseg = CURSEG_I(sbi, type); 3398 unsigned long long old_mtime; 3399 bool from_gc = (type == CURSEG_ALL_DATA_ATGC); 3400 struct seg_entry *se = NULL; 3401 3402 down_read(&SM_I(sbi)->curseg_lock); 3403 3404 mutex_lock(&curseg->curseg_mutex); 3405 down_write(&sit_i->sentry_lock); 3406 3407 if (from_gc) { 3408 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); 3409 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); 3410 sanity_check_seg_type(sbi, se->type); 3411 f2fs_bug_on(sbi, IS_NODESEG(se->type)); 3412 } 3413 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 3414 3415 f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg); 3416 3417 f2fs_wait_discard_bio(sbi, *new_blkaddr); 3418 3419 /* 3420 * __add_sum_entry should be resided under the curseg_mutex 3421 * because, this function updates a summary entry in the 3422 * current summary block. 3423 */ 3424 __add_sum_entry(sbi, type, sum); 3425 3426 __refresh_next_blkoff(sbi, curseg); 3427 3428 stat_inc_block_count(sbi, curseg); 3429 3430 if (from_gc) { 3431 old_mtime = get_segment_mtime(sbi, old_blkaddr); 3432 } else { 3433 update_segment_mtime(sbi, old_blkaddr, 0); 3434 old_mtime = 0; 3435 } 3436 update_segment_mtime(sbi, *new_blkaddr, old_mtime); 3437 3438 /* 3439 * SIT information should be updated before segment allocation, 3440 * since SSR needs latest valid block information. 3441 */ 3442 update_sit_entry(sbi, *new_blkaddr, 1); 3443 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 3444 update_sit_entry(sbi, old_blkaddr, -1); 3445 3446 if (!__has_curseg_space(sbi, curseg)) { 3447 if (from_gc) 3448 get_atssr_segment(sbi, type, se->type, 3449 AT_SSR, se->mtime); 3450 else 3451 sit_i->s_ops->allocate_segment(sbi, type, false); 3452 } 3453 /* 3454 * segment dirty status should be updated after segment allocation, 3455 * so we just need to update status only one time after previous 3456 * segment being closed. 3457 */ 3458 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 3459 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); 3460 3461 up_write(&sit_i->sentry_lock); 3462 3463 if (page && IS_NODESEG(type)) { 3464 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 3465 3466 f2fs_inode_chksum_set(sbi, page); 3467 } 3468 3469 if (fio) { 3470 struct f2fs_bio_info *io; 3471 3472 if (F2FS_IO_ALIGNED(sbi)) 3473 fio->retry = false; 3474 3475 INIT_LIST_HEAD(&fio->list); 3476 fio->in_list = true; 3477 io = sbi->write_io[fio->type] + fio->temp; 3478 spin_lock(&io->io_lock); 3479 list_add_tail(&fio->list, &io->io_list); 3480 spin_unlock(&io->io_lock); 3481 } 3482 3483 mutex_unlock(&curseg->curseg_mutex); 3484 3485 up_read(&SM_I(sbi)->curseg_lock); 3486 } 3487 3488 static void update_device_state(struct f2fs_io_info *fio) 3489 { 3490 struct f2fs_sb_info *sbi = fio->sbi; 3491 unsigned int devidx; 3492 3493 if (!f2fs_is_multi_device(sbi)) 3494 return; 3495 3496 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); 3497 3498 /* update device state for fsync */ 3499 f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); 3500 3501 /* update device state for checkpoint */ 3502 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { 3503 spin_lock(&sbi->dev_lock); 3504 f2fs_set_bit(devidx, (char *)&sbi->dirty_device); 3505 spin_unlock(&sbi->dev_lock); 3506 } 3507 } 3508 3509 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 3510 { 3511 int type = __get_segment_type(fio); 3512 bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); 3513 3514 if (keep_order) 3515 down_read(&fio->sbi->io_order_lock); 3516 reallocate: 3517 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, 3518 &fio->new_blkaddr, sum, type, fio); 3519 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) { 3520 invalidate_mapping_pages(META_MAPPING(fio->sbi), 3521 fio->old_blkaddr, fio->old_blkaddr); 3522 f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr); 3523 } 3524 3525 /* writeout dirty page into bdev */ 3526 f2fs_submit_page_write(fio); 3527 if (fio->retry) { 3528 fio->old_blkaddr = fio->new_blkaddr; 3529 goto reallocate; 3530 } 3531 3532 update_device_state(fio); 3533 3534 if (keep_order) 3535 up_read(&fio->sbi->io_order_lock); 3536 } 3537 3538 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, 3539 enum iostat_type io_type) 3540 { 3541 struct f2fs_io_info fio = { 3542 .sbi = sbi, 3543 .type = META, 3544 .temp = HOT, 3545 .op = REQ_OP_WRITE, 3546 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, 3547 .old_blkaddr = page->index, 3548 .new_blkaddr = page->index, 3549 .page = page, 3550 .encrypted_page = NULL, 3551 .in_list = false, 3552 }; 3553 3554 if (unlikely(page->index >= MAIN_BLKADDR(sbi))) 3555 fio.op_flags &= ~REQ_META; 3556 3557 set_page_writeback(page); 3558 ClearPageError(page); 3559 f2fs_submit_page_write(&fio); 3560 3561 stat_inc_meta_count(sbi, page->index); 3562 f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); 3563 } 3564 3565 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) 3566 { 3567 struct f2fs_summary sum; 3568 3569 set_summary(&sum, nid, 0, 0); 3570 do_write_page(&sum, fio); 3571 3572 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); 3573 } 3574 3575 void f2fs_outplace_write_data(struct dnode_of_data *dn, 3576 struct f2fs_io_info *fio) 3577 { 3578 struct f2fs_sb_info *sbi = fio->sbi; 3579 struct f2fs_summary sum; 3580 3581 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 3582 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); 3583 do_write_page(&sum, fio); 3584 f2fs_update_data_blkaddr(dn, fio->new_blkaddr); 3585 3586 f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); 3587 } 3588 3589 int f2fs_inplace_write_data(struct f2fs_io_info *fio) 3590 { 3591 int err; 3592 struct f2fs_sb_info *sbi = fio->sbi; 3593 unsigned int segno; 3594 3595 fio->new_blkaddr = fio->old_blkaddr; 3596 /* i/o temperature is needed for passing down write hints */ 3597 __get_segment_type(fio); 3598 3599 segno = GET_SEGNO(sbi, fio->new_blkaddr); 3600 3601 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { 3602 set_sbi_flag(sbi, SBI_NEED_FSCK); 3603 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", 3604 __func__, segno); 3605 err = -EFSCORRUPTED; 3606 goto drop_bio; 3607 } 3608 3609 if (f2fs_cp_error(sbi)) { 3610 err = -EIO; 3611 goto drop_bio; 3612 } 3613 3614 stat_inc_inplace_blocks(fio->sbi); 3615 3616 if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) 3617 err = f2fs_merge_page_bio(fio); 3618 else 3619 err = f2fs_submit_page_bio(fio); 3620 if (!err) { 3621 update_device_state(fio); 3622 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); 3623 } 3624 3625 return err; 3626 drop_bio: 3627 if (fio->bio && *(fio->bio)) { 3628 struct bio *bio = *(fio->bio); 3629 3630 bio->bi_status = BLK_STS_IOERR; 3631 bio_endio(bio); 3632 *(fio->bio) = NULL; 3633 } 3634 return err; 3635 } 3636 3637 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, 3638 unsigned int segno) 3639 { 3640 int i; 3641 3642 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { 3643 if (CURSEG_I(sbi, i)->segno == segno) 3644 break; 3645 } 3646 return i; 3647 } 3648 3649 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 3650 block_t old_blkaddr, block_t new_blkaddr, 3651 bool recover_curseg, bool recover_newaddr, 3652 bool from_gc) 3653 { 3654 struct sit_info *sit_i = SIT_I(sbi); 3655 struct curseg_info *curseg; 3656 unsigned int segno, old_cursegno; 3657 struct seg_entry *se; 3658 int type; 3659 unsigned short old_blkoff; 3660 unsigned char old_alloc_type; 3661 3662 segno = GET_SEGNO(sbi, new_blkaddr); 3663 se = get_seg_entry(sbi, segno); 3664 type = se->type; 3665 3666 down_write(&SM_I(sbi)->curseg_lock); 3667 3668 if (!recover_curseg) { 3669 /* for recovery flow */ 3670 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { 3671 if (old_blkaddr == NULL_ADDR) 3672 type = CURSEG_COLD_DATA; 3673 else 3674 type = CURSEG_WARM_DATA; 3675 } 3676 } else { 3677 if (IS_CURSEG(sbi, segno)) { 3678 /* se->type is volatile as SSR allocation */ 3679 type = __f2fs_get_curseg(sbi, segno); 3680 f2fs_bug_on(sbi, type == NO_CHECK_TYPE); 3681 } else { 3682 type = CURSEG_WARM_DATA; 3683 } 3684 } 3685 3686 f2fs_bug_on(sbi, !IS_DATASEG(type)); 3687 curseg = CURSEG_I(sbi, type); 3688 3689 mutex_lock(&curseg->curseg_mutex); 3690 down_write(&sit_i->sentry_lock); 3691 3692 old_cursegno = curseg->segno; 3693 old_blkoff = curseg->next_blkoff; 3694 old_alloc_type = curseg->alloc_type; 3695 3696 /* change the current segment */ 3697 if (segno != curseg->segno) { 3698 curseg->next_segno = segno; 3699 change_curseg(sbi, type, true); 3700 } 3701 3702 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); 3703 __add_sum_entry(sbi, type, sum); 3704 3705 if (!recover_curseg || recover_newaddr) { 3706 if (!from_gc) 3707 update_segment_mtime(sbi, new_blkaddr, 0); 3708 update_sit_entry(sbi, new_blkaddr, 1); 3709 } 3710 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { 3711 invalidate_mapping_pages(META_MAPPING(sbi), 3712 old_blkaddr, old_blkaddr); 3713 f2fs_invalidate_compress_page(sbi, old_blkaddr); 3714 if (!from_gc) 3715 update_segment_mtime(sbi, old_blkaddr, 0); 3716 update_sit_entry(sbi, old_blkaddr, -1); 3717 } 3718 3719 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 3720 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); 3721 3722 locate_dirty_segment(sbi, old_cursegno); 3723 3724 if (recover_curseg) { 3725 if (old_cursegno != curseg->segno) { 3726 curseg->next_segno = old_cursegno; 3727 change_curseg(sbi, type, true); 3728 } 3729 curseg->next_blkoff = old_blkoff; 3730 curseg->alloc_type = old_alloc_type; 3731 } 3732 3733 up_write(&sit_i->sentry_lock); 3734 mutex_unlock(&curseg->curseg_mutex); 3735 up_write(&SM_I(sbi)->curseg_lock); 3736 } 3737 3738 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, 3739 block_t old_addr, block_t new_addr, 3740 unsigned char version, bool recover_curseg, 3741 bool recover_newaddr) 3742 { 3743 struct f2fs_summary sum; 3744 3745 set_summary(&sum, dn->nid, dn->ofs_in_node, version); 3746 3747 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, 3748 recover_curseg, recover_newaddr, false); 3749 3750 f2fs_update_data_blkaddr(dn, new_addr); 3751 } 3752 3753 void f2fs_wait_on_page_writeback(struct page *page, 3754 enum page_type type, bool ordered, bool locked) 3755 { 3756 if (PageWriteback(page)) { 3757 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 3758 3759 /* submit cached LFS IO */ 3760 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); 3761 /* sbumit cached IPU IO */ 3762 f2fs_submit_merged_ipu_write(sbi, NULL, page); 3763 if (ordered) { 3764 wait_on_page_writeback(page); 3765 f2fs_bug_on(sbi, locked && PageWriteback(page)); 3766 } else { 3767 wait_for_stable_page(page); 3768 } 3769 } 3770 } 3771 3772 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) 3773 { 3774 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3775 struct page *cpage; 3776 3777 if (!f2fs_post_read_required(inode)) 3778 return; 3779 3780 if (!__is_valid_data_blkaddr(blkaddr)) 3781 return; 3782 3783 cpage = find_lock_page(META_MAPPING(sbi), blkaddr); 3784 if (cpage) { 3785 f2fs_wait_on_page_writeback(cpage, DATA, true, true); 3786 f2fs_put_page(cpage, 1); 3787 } 3788 } 3789 3790 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, 3791 block_t len) 3792 { 3793 block_t i; 3794 3795 for (i = 0; i < len; i++) 3796 f2fs_wait_on_block_writeback(inode, blkaddr + i); 3797 } 3798 3799 static int read_compacted_summaries(struct f2fs_sb_info *sbi) 3800 { 3801 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3802 struct curseg_info *seg_i; 3803 unsigned char *kaddr; 3804 struct page *page; 3805 block_t start; 3806 int i, j, offset; 3807 3808 start = start_sum_block(sbi); 3809 3810 page = f2fs_get_meta_page(sbi, start++); 3811 if (IS_ERR(page)) 3812 return PTR_ERR(page); 3813 kaddr = (unsigned char *)page_address(page); 3814 3815 /* Step 1: restore nat cache */ 3816 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 3817 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE); 3818 3819 /* Step 2: restore sit cache */ 3820 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 3821 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE); 3822 offset = 2 * SUM_JOURNAL_SIZE; 3823 3824 /* Step 3: restore summary entries */ 3825 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 3826 unsigned short blk_off; 3827 unsigned int segno; 3828 3829 seg_i = CURSEG_I(sbi, i); 3830 segno = le32_to_cpu(ckpt->cur_data_segno[i]); 3831 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); 3832 seg_i->next_segno = segno; 3833 reset_curseg(sbi, i, 0); 3834 seg_i->alloc_type = ckpt->alloc_type[i]; 3835 seg_i->next_blkoff = blk_off; 3836 3837 if (seg_i->alloc_type == SSR) 3838 blk_off = sbi->blocks_per_seg; 3839 3840 for (j = 0; j < blk_off; j++) { 3841 struct f2fs_summary *s; 3842 3843 s = (struct f2fs_summary *)(kaddr + offset); 3844 seg_i->sum_blk->entries[j] = *s; 3845 offset += SUMMARY_SIZE; 3846 if (offset + SUMMARY_SIZE <= PAGE_SIZE - 3847 SUM_FOOTER_SIZE) 3848 continue; 3849 3850 f2fs_put_page(page, 1); 3851 page = NULL; 3852 3853 page = f2fs_get_meta_page(sbi, start++); 3854 if (IS_ERR(page)) 3855 return PTR_ERR(page); 3856 kaddr = (unsigned char *)page_address(page); 3857 offset = 0; 3858 } 3859 } 3860 f2fs_put_page(page, 1); 3861 return 0; 3862 } 3863 3864 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) 3865 { 3866 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3867 struct f2fs_summary_block *sum; 3868 struct curseg_info *curseg; 3869 struct page *new; 3870 unsigned short blk_off; 3871 unsigned int segno = 0; 3872 block_t blk_addr = 0; 3873 int err = 0; 3874 3875 /* get segment number and block addr */ 3876 if (IS_DATASEG(type)) { 3877 segno = le32_to_cpu(ckpt->cur_data_segno[type]); 3878 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - 3879 CURSEG_HOT_DATA]); 3880 if (__exist_node_summaries(sbi)) 3881 blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type); 3882 else 3883 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); 3884 } else { 3885 segno = le32_to_cpu(ckpt->cur_node_segno[type - 3886 CURSEG_HOT_NODE]); 3887 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - 3888 CURSEG_HOT_NODE]); 3889 if (__exist_node_summaries(sbi)) 3890 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, 3891 type - CURSEG_HOT_NODE); 3892 else 3893 blk_addr = GET_SUM_BLOCK(sbi, segno); 3894 } 3895 3896 new = f2fs_get_meta_page(sbi, blk_addr); 3897 if (IS_ERR(new)) 3898 return PTR_ERR(new); 3899 sum = (struct f2fs_summary_block *)page_address(new); 3900 3901 if (IS_NODESEG(type)) { 3902 if (__exist_node_summaries(sbi)) { 3903 struct f2fs_summary *ns = &sum->entries[0]; 3904 int i; 3905 3906 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { 3907 ns->version = 0; 3908 ns->ofs_in_node = 0; 3909 } 3910 } else { 3911 err = f2fs_restore_node_summary(sbi, segno, sum); 3912 if (err) 3913 goto out; 3914 } 3915 } 3916 3917 /* set uncompleted segment to curseg */ 3918 curseg = CURSEG_I(sbi, type); 3919 mutex_lock(&curseg->curseg_mutex); 3920 3921 /* update journal info */ 3922 down_write(&curseg->journal_rwsem); 3923 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE); 3924 up_write(&curseg->journal_rwsem); 3925 3926 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE); 3927 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE); 3928 curseg->next_segno = segno; 3929 reset_curseg(sbi, type, 0); 3930 curseg->alloc_type = ckpt->alloc_type[type]; 3931 curseg->next_blkoff = blk_off; 3932 mutex_unlock(&curseg->curseg_mutex); 3933 out: 3934 f2fs_put_page(new, 1); 3935 return err; 3936 } 3937 3938 static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 3939 { 3940 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; 3941 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; 3942 int type = CURSEG_HOT_DATA; 3943 int err; 3944 3945 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { 3946 int npages = f2fs_npages_for_summary_flush(sbi, true); 3947 3948 if (npages >= 2) 3949 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, 3950 META_CP, true); 3951 3952 /* restore for compacted data summary */ 3953 err = read_compacted_summaries(sbi); 3954 if (err) 3955 return err; 3956 type = CURSEG_HOT_NODE; 3957 } 3958 3959 if (__exist_node_summaries(sbi)) 3960 f2fs_ra_meta_pages(sbi, 3961 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type), 3962 NR_CURSEG_PERSIST_TYPE - type, META_CP, true); 3963 3964 for (; type <= CURSEG_COLD_NODE; type++) { 3965 err = read_normal_summaries(sbi, type); 3966 if (err) 3967 return err; 3968 } 3969 3970 /* sanity check for summary blocks */ 3971 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || 3972 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { 3973 f2fs_err(sbi, "invalid journal entries nats %u sits %u", 3974 nats_in_cursum(nat_j), sits_in_cursum(sit_j)); 3975 return -EINVAL; 3976 } 3977 3978 return 0; 3979 } 3980 3981 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) 3982 { 3983 struct page *page; 3984 unsigned char *kaddr; 3985 struct f2fs_summary *summary; 3986 struct curseg_info *seg_i; 3987 int written_size = 0; 3988 int i, j; 3989 3990 page = f2fs_grab_meta_page(sbi, blkaddr++); 3991 kaddr = (unsigned char *)page_address(page); 3992 memset(kaddr, 0, PAGE_SIZE); 3993 3994 /* Step 1: write nat cache */ 3995 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 3996 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE); 3997 written_size += SUM_JOURNAL_SIZE; 3998 3999 /* Step 2: write sit cache */ 4000 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 4001 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE); 4002 written_size += SUM_JOURNAL_SIZE; 4003 4004 /* Step 3: write summary entries */ 4005 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 4006 unsigned short blkoff; 4007 4008 seg_i = CURSEG_I(sbi, i); 4009 if (sbi->ckpt->alloc_type[i] == SSR) 4010 blkoff = sbi->blocks_per_seg; 4011 else 4012 blkoff = curseg_blkoff(sbi, i); 4013 4014 for (j = 0; j < blkoff; j++) { 4015 if (!page) { 4016 page = f2fs_grab_meta_page(sbi, blkaddr++); 4017 kaddr = (unsigned char *)page_address(page); 4018 memset(kaddr, 0, PAGE_SIZE); 4019 written_size = 0; 4020 } 4021 summary = (struct f2fs_summary *)(kaddr + written_size); 4022 *summary = seg_i->sum_blk->entries[j]; 4023 written_size += SUMMARY_SIZE; 4024 4025 if (written_size + SUMMARY_SIZE <= PAGE_SIZE - 4026 SUM_FOOTER_SIZE) 4027 continue; 4028 4029 set_page_dirty(page); 4030 f2fs_put_page(page, 1); 4031 page = NULL; 4032 } 4033 } 4034 if (page) { 4035 set_page_dirty(page); 4036 f2fs_put_page(page, 1); 4037 } 4038 } 4039 4040 static void write_normal_summaries(struct f2fs_sb_info *sbi, 4041 block_t blkaddr, int type) 4042 { 4043 int i, end; 4044 4045 if (IS_DATASEG(type)) 4046 end = type + NR_CURSEG_DATA_TYPE; 4047 else 4048 end = type + NR_CURSEG_NODE_TYPE; 4049 4050 for (i = type; i < end; i++) 4051 write_current_sum_page(sbi, i, blkaddr + (i - type)); 4052 } 4053 4054 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 4055 { 4056 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) 4057 write_compacted_summaries(sbi, start_blk); 4058 else 4059 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); 4060 } 4061 4062 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 4063 { 4064 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); 4065 } 4066 4067 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, 4068 unsigned int val, int alloc) 4069 { 4070 int i; 4071 4072 if (type == NAT_JOURNAL) { 4073 for (i = 0; i < nats_in_cursum(journal); i++) { 4074 if (le32_to_cpu(nid_in_journal(journal, i)) == val) 4075 return i; 4076 } 4077 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL)) 4078 return update_nats_in_cursum(journal, 1); 4079 } else if (type == SIT_JOURNAL) { 4080 for (i = 0; i < sits_in_cursum(journal); i++) 4081 if (le32_to_cpu(segno_in_journal(journal, i)) == val) 4082 return i; 4083 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL)) 4084 return update_sits_in_cursum(journal, 1); 4085 } 4086 return -1; 4087 } 4088 4089 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, 4090 unsigned int segno) 4091 { 4092 return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); 4093 } 4094 4095 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, 4096 unsigned int start) 4097 { 4098 struct sit_info *sit_i = SIT_I(sbi); 4099 struct page *page; 4100 pgoff_t src_off, dst_off; 4101 4102 src_off = current_sit_addr(sbi, start); 4103 dst_off = next_sit_addr(sbi, src_off); 4104 4105 page = f2fs_grab_meta_page(sbi, dst_off); 4106 seg_info_to_sit_page(sbi, page, start); 4107 4108 set_page_dirty(page); 4109 set_to_next_sit(sit_i, start); 4110 4111 return page; 4112 } 4113 4114 static struct sit_entry_set *grab_sit_entry_set(void) 4115 { 4116 struct sit_entry_set *ses = 4117 f2fs_kmem_cache_alloc(sit_entry_set_slab, 4118 GFP_NOFS, true, NULL); 4119 4120 ses->entry_cnt = 0; 4121 INIT_LIST_HEAD(&ses->set_list); 4122 return ses; 4123 } 4124 4125 static void release_sit_entry_set(struct sit_entry_set *ses) 4126 { 4127 list_del(&ses->set_list); 4128 kmem_cache_free(sit_entry_set_slab, ses); 4129 } 4130 4131 static void adjust_sit_entry_set(struct sit_entry_set *ses, 4132 struct list_head *head) 4133 { 4134 struct sit_entry_set *next = ses; 4135 4136 if (list_is_last(&ses->set_list, head)) 4137 return; 4138 4139 list_for_each_entry_continue(next, head, set_list) 4140 if (ses->entry_cnt <= next->entry_cnt) 4141 break; 4142 4143 list_move_tail(&ses->set_list, &next->set_list); 4144 } 4145 4146 static void add_sit_entry(unsigned int segno, struct list_head *head) 4147 { 4148 struct sit_entry_set *ses; 4149 unsigned int start_segno = START_SEGNO(segno); 4150 4151 list_for_each_entry(ses, head, set_list) { 4152 if (ses->start_segno == start_segno) { 4153 ses->entry_cnt++; 4154 adjust_sit_entry_set(ses, head); 4155 return; 4156 } 4157 } 4158 4159 ses = grab_sit_entry_set(); 4160 4161 ses->start_segno = start_segno; 4162 ses->entry_cnt++; 4163 list_add(&ses->set_list, head); 4164 } 4165 4166 static void add_sits_in_set(struct f2fs_sb_info *sbi) 4167 { 4168 struct f2fs_sm_info *sm_info = SM_I(sbi); 4169 struct list_head *set_list = &sm_info->sit_entry_set; 4170 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; 4171 unsigned int segno; 4172 4173 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) 4174 add_sit_entry(segno, set_list); 4175 } 4176 4177 static void remove_sits_in_journal(struct f2fs_sb_info *sbi) 4178 { 4179 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4180 struct f2fs_journal *journal = curseg->journal; 4181 int i; 4182 4183 down_write(&curseg->journal_rwsem); 4184 for (i = 0; i < sits_in_cursum(journal); i++) { 4185 unsigned int segno; 4186 bool dirtied; 4187 4188 segno = le32_to_cpu(segno_in_journal(journal, i)); 4189 dirtied = __mark_sit_entry_dirty(sbi, segno); 4190 4191 if (!dirtied) 4192 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); 4193 } 4194 update_sits_in_cursum(journal, -i); 4195 up_write(&curseg->journal_rwsem); 4196 } 4197 4198 /* 4199 * CP calls this function, which flushes SIT entries including sit_journal, 4200 * and moves prefree segs to free segs. 4201 */ 4202 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 4203 { 4204 struct sit_info *sit_i = SIT_I(sbi); 4205 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 4206 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4207 struct f2fs_journal *journal = curseg->journal; 4208 struct sit_entry_set *ses, *tmp; 4209 struct list_head *head = &SM_I(sbi)->sit_entry_set; 4210 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); 4211 struct seg_entry *se; 4212 4213 down_write(&sit_i->sentry_lock); 4214 4215 if (!sit_i->dirty_sentries) 4216 goto out; 4217 4218 /* 4219 * add and account sit entries of dirty bitmap in sit entry 4220 * set temporarily 4221 */ 4222 add_sits_in_set(sbi); 4223 4224 /* 4225 * if there are no enough space in journal to store dirty sit 4226 * entries, remove all entries from journal and add and account 4227 * them in sit entry set. 4228 */ 4229 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || 4230 !to_journal) 4231 remove_sits_in_journal(sbi); 4232 4233 /* 4234 * there are two steps to flush sit entries: 4235 * #1, flush sit entries to journal in current cold data summary block. 4236 * #2, flush sit entries to sit page. 4237 */ 4238 list_for_each_entry_safe(ses, tmp, head, set_list) { 4239 struct page *page = NULL; 4240 struct f2fs_sit_block *raw_sit = NULL; 4241 unsigned int start_segno = ses->start_segno; 4242 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, 4243 (unsigned long)MAIN_SEGS(sbi)); 4244 unsigned int segno = start_segno; 4245 4246 if (to_journal && 4247 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL)) 4248 to_journal = false; 4249 4250 if (to_journal) { 4251 down_write(&curseg->journal_rwsem); 4252 } else { 4253 page = get_next_sit_page(sbi, start_segno); 4254 raw_sit = page_address(page); 4255 } 4256 4257 /* flush dirty sit entries in region of current sit set */ 4258 for_each_set_bit_from(segno, bitmap, end) { 4259 int offset, sit_offset; 4260 4261 se = get_seg_entry(sbi, segno); 4262 #ifdef CONFIG_F2FS_CHECK_FS 4263 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, 4264 SIT_VBLOCK_MAP_SIZE)) 4265 f2fs_bug_on(sbi, 1); 4266 #endif 4267 4268 /* add discard candidates */ 4269 if (!(cpc->reason & CP_DISCARD)) { 4270 cpc->trim_start = segno; 4271 add_discard_addrs(sbi, cpc, false); 4272 } 4273 4274 if (to_journal) { 4275 offset = f2fs_lookup_journal_in_cursum(journal, 4276 SIT_JOURNAL, segno, 1); 4277 f2fs_bug_on(sbi, offset < 0); 4278 segno_in_journal(journal, offset) = 4279 cpu_to_le32(segno); 4280 seg_info_to_raw_sit(se, 4281 &sit_in_journal(journal, offset)); 4282 check_block_count(sbi, segno, 4283 &sit_in_journal(journal, offset)); 4284 } else { 4285 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 4286 seg_info_to_raw_sit(se, 4287 &raw_sit->entries[sit_offset]); 4288 check_block_count(sbi, segno, 4289 &raw_sit->entries[sit_offset]); 4290 } 4291 4292 __clear_bit(segno, bitmap); 4293 sit_i->dirty_sentries--; 4294 ses->entry_cnt--; 4295 } 4296 4297 if (to_journal) 4298 up_write(&curseg->journal_rwsem); 4299 else 4300 f2fs_put_page(page, 1); 4301 4302 f2fs_bug_on(sbi, ses->entry_cnt); 4303 release_sit_entry_set(ses); 4304 } 4305 4306 f2fs_bug_on(sbi, !list_empty(head)); 4307 f2fs_bug_on(sbi, sit_i->dirty_sentries); 4308 out: 4309 if (cpc->reason & CP_DISCARD) { 4310 __u64 trim_start = cpc->trim_start; 4311 4312 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 4313 add_discard_addrs(sbi, cpc, false); 4314 4315 cpc->trim_start = trim_start; 4316 } 4317 up_write(&sit_i->sentry_lock); 4318 4319 set_prefree_as_free_segments(sbi); 4320 } 4321 4322 static int build_sit_info(struct f2fs_sb_info *sbi) 4323 { 4324 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 4325 struct sit_info *sit_i; 4326 unsigned int sit_segs, start; 4327 char *src_bitmap, *bitmap; 4328 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size; 4329 unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0; 4330 4331 /* allocate memory for SIT information */ 4332 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); 4333 if (!sit_i) 4334 return -ENOMEM; 4335 4336 SM_I(sbi)->sit_info = sit_i; 4337 4338 sit_i->sentries = 4339 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), 4340 MAIN_SEGS(sbi)), 4341 GFP_KERNEL); 4342 if (!sit_i->sentries) 4343 return -ENOMEM; 4344 4345 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4346 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size, 4347 GFP_KERNEL); 4348 if (!sit_i->dirty_sentries_bitmap) 4349 return -ENOMEM; 4350 4351 #ifdef CONFIG_F2FS_CHECK_FS 4352 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map); 4353 #else 4354 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map); 4355 #endif 4356 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4357 if (!sit_i->bitmap) 4358 return -ENOMEM; 4359 4360 bitmap = sit_i->bitmap; 4361 4362 for (start = 0; start < MAIN_SEGS(sbi); start++) { 4363 sit_i->sentries[start].cur_valid_map = bitmap; 4364 bitmap += SIT_VBLOCK_MAP_SIZE; 4365 4366 sit_i->sentries[start].ckpt_valid_map = bitmap; 4367 bitmap += SIT_VBLOCK_MAP_SIZE; 4368 4369 #ifdef CONFIG_F2FS_CHECK_FS 4370 sit_i->sentries[start].cur_valid_map_mir = bitmap; 4371 bitmap += SIT_VBLOCK_MAP_SIZE; 4372 #endif 4373 4374 if (discard_map) { 4375 sit_i->sentries[start].discard_map = bitmap; 4376 bitmap += SIT_VBLOCK_MAP_SIZE; 4377 } 4378 } 4379 4380 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 4381 if (!sit_i->tmp_map) 4382 return -ENOMEM; 4383 4384 if (__is_large_section(sbi)) { 4385 sit_i->sec_entries = 4386 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), 4387 MAIN_SECS(sbi)), 4388 GFP_KERNEL); 4389 if (!sit_i->sec_entries) 4390 return -ENOMEM; 4391 } 4392 4393 /* get information related with SIT */ 4394 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; 4395 4396 /* setup SIT bitmap from ckeckpoint pack */ 4397 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 4398 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 4399 4400 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); 4401 if (!sit_i->sit_bitmap) 4402 return -ENOMEM; 4403 4404 #ifdef CONFIG_F2FS_CHECK_FS 4405 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, 4406 sit_bitmap_size, GFP_KERNEL); 4407 if (!sit_i->sit_bitmap_mir) 4408 return -ENOMEM; 4409 4410 sit_i->invalid_segmap = f2fs_kvzalloc(sbi, 4411 main_bitmap_size, GFP_KERNEL); 4412 if (!sit_i->invalid_segmap) 4413 return -ENOMEM; 4414 #endif 4415 4416 /* init SIT information */ 4417 sit_i->s_ops = &default_salloc_ops; 4418 4419 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 4420 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 4421 sit_i->written_valid_blocks = 0; 4422 sit_i->bitmap_size = sit_bitmap_size; 4423 sit_i->dirty_sentries = 0; 4424 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 4425 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); 4426 sit_i->mounted_time = ktime_get_boottime_seconds(); 4427 init_rwsem(&sit_i->sentry_lock); 4428 return 0; 4429 } 4430 4431 static int build_free_segmap(struct f2fs_sb_info *sbi) 4432 { 4433 struct free_segmap_info *free_i; 4434 unsigned int bitmap_size, sec_bitmap_size; 4435 4436 /* allocate memory for free segmap information */ 4437 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); 4438 if (!free_i) 4439 return -ENOMEM; 4440 4441 SM_I(sbi)->free_info = free_i; 4442 4443 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4444 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); 4445 if (!free_i->free_segmap) 4446 return -ENOMEM; 4447 4448 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4449 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); 4450 if (!free_i->free_secmap) 4451 return -ENOMEM; 4452 4453 /* set all segments as dirty temporarily */ 4454 memset(free_i->free_segmap, 0xff, bitmap_size); 4455 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 4456 4457 /* init free segmap information */ 4458 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 4459 free_i->free_segments = 0; 4460 free_i->free_sections = 0; 4461 spin_lock_init(&free_i->segmap_lock); 4462 return 0; 4463 } 4464 4465 static int build_curseg(struct f2fs_sb_info *sbi) 4466 { 4467 struct curseg_info *array; 4468 int i; 4469 4470 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, 4471 sizeof(*array)), GFP_KERNEL); 4472 if (!array) 4473 return -ENOMEM; 4474 4475 SM_I(sbi)->curseg_array = array; 4476 4477 for (i = 0; i < NO_CHECK_TYPE; i++) { 4478 mutex_init(&array[i].curseg_mutex); 4479 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); 4480 if (!array[i].sum_blk) 4481 return -ENOMEM; 4482 init_rwsem(&array[i].journal_rwsem); 4483 array[i].journal = f2fs_kzalloc(sbi, 4484 sizeof(struct f2fs_journal), GFP_KERNEL); 4485 if (!array[i].journal) 4486 return -ENOMEM; 4487 if (i < NR_PERSISTENT_LOG) 4488 array[i].seg_type = CURSEG_HOT_DATA + i; 4489 else if (i == CURSEG_COLD_DATA_PINNED) 4490 array[i].seg_type = CURSEG_COLD_DATA; 4491 else if (i == CURSEG_ALL_DATA_ATGC) 4492 array[i].seg_type = CURSEG_COLD_DATA; 4493 array[i].segno = NULL_SEGNO; 4494 array[i].next_blkoff = 0; 4495 array[i].inited = false; 4496 } 4497 return restore_curseg_summaries(sbi); 4498 } 4499 4500 static int build_sit_entries(struct f2fs_sb_info *sbi) 4501 { 4502 struct sit_info *sit_i = SIT_I(sbi); 4503 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 4504 struct f2fs_journal *journal = curseg->journal; 4505 struct seg_entry *se; 4506 struct f2fs_sit_entry sit; 4507 int sit_blk_cnt = SIT_BLK_CNT(sbi); 4508 unsigned int i, start, end; 4509 unsigned int readed, start_blk = 0; 4510 int err = 0; 4511 block_t total_node_blocks = 0; 4512 4513 do { 4514 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, 4515 META_SIT, true); 4516 4517 start = start_blk * sit_i->sents_per_block; 4518 end = (start_blk + readed) * sit_i->sents_per_block; 4519 4520 for (; start < end && start < MAIN_SEGS(sbi); start++) { 4521 struct f2fs_sit_block *sit_blk; 4522 struct page *page; 4523 4524 se = &sit_i->sentries[start]; 4525 page = get_current_sit_page(sbi, start); 4526 if (IS_ERR(page)) 4527 return PTR_ERR(page); 4528 sit_blk = (struct f2fs_sit_block *)page_address(page); 4529 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 4530 f2fs_put_page(page, 1); 4531 4532 err = check_block_count(sbi, start, &sit); 4533 if (err) 4534 return err; 4535 seg_info_from_raw_sit(se, &sit); 4536 if (IS_NODESEG(se->type)) 4537 total_node_blocks += se->valid_blocks; 4538 4539 if (f2fs_block_unit_discard(sbi)) { 4540 /* build discard map only one time */ 4541 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 4542 memset(se->discard_map, 0xff, 4543 SIT_VBLOCK_MAP_SIZE); 4544 } else { 4545 memcpy(se->discard_map, 4546 se->cur_valid_map, 4547 SIT_VBLOCK_MAP_SIZE); 4548 sbi->discard_blks += 4549 sbi->blocks_per_seg - 4550 se->valid_blocks; 4551 } 4552 } 4553 4554 if (__is_large_section(sbi)) 4555 get_sec_entry(sbi, start)->valid_blocks += 4556 se->valid_blocks; 4557 } 4558 start_blk += readed; 4559 } while (start_blk < sit_blk_cnt); 4560 4561 down_read(&curseg->journal_rwsem); 4562 for (i = 0; i < sits_in_cursum(journal); i++) { 4563 unsigned int old_valid_blocks; 4564 4565 start = le32_to_cpu(segno_in_journal(journal, i)); 4566 if (start >= MAIN_SEGS(sbi)) { 4567 f2fs_err(sbi, "Wrong journal entry on segno %u", 4568 start); 4569 err = -EFSCORRUPTED; 4570 break; 4571 } 4572 4573 se = &sit_i->sentries[start]; 4574 sit = sit_in_journal(journal, i); 4575 4576 old_valid_blocks = se->valid_blocks; 4577 if (IS_NODESEG(se->type)) 4578 total_node_blocks -= old_valid_blocks; 4579 4580 err = check_block_count(sbi, start, &sit); 4581 if (err) 4582 break; 4583 seg_info_from_raw_sit(se, &sit); 4584 if (IS_NODESEG(se->type)) 4585 total_node_blocks += se->valid_blocks; 4586 4587 if (f2fs_block_unit_discard(sbi)) { 4588 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 4589 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); 4590 } else { 4591 memcpy(se->discard_map, se->cur_valid_map, 4592 SIT_VBLOCK_MAP_SIZE); 4593 sbi->discard_blks += old_valid_blocks; 4594 sbi->discard_blks -= se->valid_blocks; 4595 } 4596 } 4597 4598 if (__is_large_section(sbi)) { 4599 get_sec_entry(sbi, start)->valid_blocks += 4600 se->valid_blocks; 4601 get_sec_entry(sbi, start)->valid_blocks -= 4602 old_valid_blocks; 4603 } 4604 } 4605 up_read(&curseg->journal_rwsem); 4606 4607 if (!err && total_node_blocks != valid_node_count(sbi)) { 4608 f2fs_err(sbi, "SIT is corrupted node# %u vs %u", 4609 total_node_blocks, valid_node_count(sbi)); 4610 err = -EFSCORRUPTED; 4611 } 4612 4613 return err; 4614 } 4615 4616 static void init_free_segmap(struct f2fs_sb_info *sbi) 4617 { 4618 unsigned int start; 4619 int type; 4620 struct seg_entry *sentry; 4621 4622 for (start = 0; start < MAIN_SEGS(sbi); start++) { 4623 if (f2fs_usable_blks_in_seg(sbi, start) == 0) 4624 continue; 4625 sentry = get_seg_entry(sbi, start); 4626 if (!sentry->valid_blocks) 4627 __set_free(sbi, start); 4628 else 4629 SIT_I(sbi)->written_valid_blocks += 4630 sentry->valid_blocks; 4631 } 4632 4633 /* set use the current segments */ 4634 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { 4635 struct curseg_info *curseg_t = CURSEG_I(sbi, type); 4636 4637 __set_test_and_inuse(sbi, curseg_t->segno); 4638 } 4639 } 4640 4641 static void init_dirty_segmap(struct f2fs_sb_info *sbi) 4642 { 4643 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 4644 struct free_segmap_info *free_i = FREE_I(sbi); 4645 unsigned int segno = 0, offset = 0, secno; 4646 block_t valid_blocks, usable_blks_in_seg; 4647 block_t blks_per_sec = BLKS_PER_SEC(sbi); 4648 4649 while (1) { 4650 /* find dirty segment based on free segmap */ 4651 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); 4652 if (segno >= MAIN_SEGS(sbi)) 4653 break; 4654 offset = segno + 1; 4655 valid_blocks = get_valid_blocks(sbi, segno, false); 4656 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno); 4657 if (valid_blocks == usable_blks_in_seg || !valid_blocks) 4658 continue; 4659 if (valid_blocks > usable_blks_in_seg) { 4660 f2fs_bug_on(sbi, 1); 4661 continue; 4662 } 4663 mutex_lock(&dirty_i->seglist_lock); 4664 __locate_dirty_segment(sbi, segno, DIRTY); 4665 mutex_unlock(&dirty_i->seglist_lock); 4666 } 4667 4668 if (!__is_large_section(sbi)) 4669 return; 4670 4671 mutex_lock(&dirty_i->seglist_lock); 4672 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 4673 valid_blocks = get_valid_blocks(sbi, segno, true); 4674 secno = GET_SEC_FROM_SEG(sbi, segno); 4675 4676 if (!valid_blocks || valid_blocks == blks_per_sec) 4677 continue; 4678 if (IS_CURSEC(sbi, secno)) 4679 continue; 4680 set_bit(secno, dirty_i->dirty_secmap); 4681 } 4682 mutex_unlock(&dirty_i->seglist_lock); 4683 } 4684 4685 static int init_victim_secmap(struct f2fs_sb_info *sbi) 4686 { 4687 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 4688 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4689 4690 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); 4691 if (!dirty_i->victim_secmap) 4692 return -ENOMEM; 4693 return 0; 4694 } 4695 4696 static int build_dirty_segmap(struct f2fs_sb_info *sbi) 4697 { 4698 struct dirty_seglist_info *dirty_i; 4699 unsigned int bitmap_size, i; 4700 4701 /* allocate memory for dirty segments list information */ 4702 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), 4703 GFP_KERNEL); 4704 if (!dirty_i) 4705 return -ENOMEM; 4706 4707 SM_I(sbi)->dirty_info = dirty_i; 4708 mutex_init(&dirty_i->seglist_lock); 4709 4710 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 4711 4712 for (i = 0; i < NR_DIRTY_TYPE; i++) { 4713 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, 4714 GFP_KERNEL); 4715 if (!dirty_i->dirty_segmap[i]) 4716 return -ENOMEM; 4717 } 4718 4719 if (__is_large_section(sbi)) { 4720 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 4721 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi, 4722 bitmap_size, GFP_KERNEL); 4723 if (!dirty_i->dirty_secmap) 4724 return -ENOMEM; 4725 } 4726 4727 init_dirty_segmap(sbi); 4728 return init_victim_secmap(sbi); 4729 } 4730 4731 static int sanity_check_curseg(struct f2fs_sb_info *sbi) 4732 { 4733 int i; 4734 4735 /* 4736 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; 4737 * In LFS curseg, all blkaddr after .next_blkoff should be unused. 4738 */ 4739 for (i = 0; i < NR_PERSISTENT_LOG; i++) { 4740 struct curseg_info *curseg = CURSEG_I(sbi, i); 4741 struct seg_entry *se = get_seg_entry(sbi, curseg->segno); 4742 unsigned int blkofs = curseg->next_blkoff; 4743 4744 if (f2fs_sb_has_readonly(sbi) && 4745 i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE) 4746 continue; 4747 4748 sanity_check_seg_type(sbi, curseg->seg_type); 4749 4750 if (f2fs_test_bit(blkofs, se->cur_valid_map)) 4751 goto out; 4752 4753 if (curseg->alloc_type == SSR) 4754 continue; 4755 4756 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { 4757 if (!f2fs_test_bit(blkofs, se->cur_valid_map)) 4758 continue; 4759 out: 4760 f2fs_err(sbi, 4761 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", 4762 i, curseg->segno, curseg->alloc_type, 4763 curseg->next_blkoff, blkofs); 4764 return -EFSCORRUPTED; 4765 } 4766 } 4767 return 0; 4768 } 4769 4770 #ifdef CONFIG_BLK_DEV_ZONED 4771 4772 static int check_zone_write_pointer(struct f2fs_sb_info *sbi, 4773 struct f2fs_dev_info *fdev, 4774 struct blk_zone *zone) 4775 { 4776 unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; 4777 block_t zone_block, wp_block, last_valid_block; 4778 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; 4779 int i, s, b, ret; 4780 struct seg_entry *se; 4781 4782 if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4783 return 0; 4784 4785 wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); 4786 wp_segno = GET_SEGNO(sbi, wp_block); 4787 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); 4788 zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); 4789 zone_segno = GET_SEGNO(sbi, zone_block); 4790 zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); 4791 4792 if (zone_segno >= MAIN_SEGS(sbi)) 4793 return 0; 4794 4795 /* 4796 * Skip check of zones cursegs point to, since 4797 * fix_curseg_write_pointer() checks them. 4798 */ 4799 for (i = 0; i < NO_CHECK_TYPE; i++) 4800 if (zone_secno == GET_SEC_FROM_SEG(sbi, 4801 CURSEG_I(sbi, i)->segno)) 4802 return 0; 4803 4804 /* 4805 * Get last valid block of the zone. 4806 */ 4807 last_valid_block = zone_block - 1; 4808 for (s = sbi->segs_per_sec - 1; s >= 0; s--) { 4809 segno = zone_segno + s; 4810 se = get_seg_entry(sbi, segno); 4811 for (b = sbi->blocks_per_seg - 1; b >= 0; b--) 4812 if (f2fs_test_bit(b, se->cur_valid_map)) { 4813 last_valid_block = START_BLOCK(sbi, segno) + b; 4814 break; 4815 } 4816 if (last_valid_block >= zone_block) 4817 break; 4818 } 4819 4820 /* 4821 * If last valid block is beyond the write pointer, report the 4822 * inconsistency. This inconsistency does not cause write error 4823 * because the zone will not be selected for write operation until 4824 * it get discarded. Just report it. 4825 */ 4826 if (last_valid_block >= wp_block) { 4827 f2fs_notice(sbi, "Valid block beyond write pointer: " 4828 "valid block[0x%x,0x%x] wp[0x%x,0x%x]", 4829 GET_SEGNO(sbi, last_valid_block), 4830 GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), 4831 wp_segno, wp_blkoff); 4832 return 0; 4833 } 4834 4835 /* 4836 * If there is no valid block in the zone and if write pointer is 4837 * not at zone start, reset the write pointer. 4838 */ 4839 if (last_valid_block + 1 == zone_block && zone->wp != zone->start) { 4840 f2fs_notice(sbi, 4841 "Zone without valid block has non-zero write " 4842 "pointer. Reset the write pointer: wp[0x%x,0x%x]", 4843 wp_segno, wp_blkoff); 4844 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, 4845 zone->len >> log_sectors_per_block); 4846 if (ret) { 4847 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", 4848 fdev->path, ret); 4849 return ret; 4850 } 4851 } 4852 4853 return 0; 4854 } 4855 4856 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, 4857 block_t zone_blkaddr) 4858 { 4859 int i; 4860 4861 for (i = 0; i < sbi->s_ndevs; i++) { 4862 if (!bdev_is_zoned(FDEV(i).bdev)) 4863 continue; 4864 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr && 4865 zone_blkaddr <= FDEV(i).end_blk)) 4866 return &FDEV(i); 4867 } 4868 4869 return NULL; 4870 } 4871 4872 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, 4873 void *data) 4874 { 4875 memcpy(data, zone, sizeof(struct blk_zone)); 4876 return 0; 4877 } 4878 4879 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) 4880 { 4881 struct curseg_info *cs = CURSEG_I(sbi, type); 4882 struct f2fs_dev_info *zbd; 4883 struct blk_zone zone; 4884 unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; 4885 block_t cs_zone_block, wp_block; 4886 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; 4887 sector_t zone_sector; 4888 int err; 4889 4890 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); 4891 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); 4892 4893 zbd = get_target_zoned_dev(sbi, cs_zone_block); 4894 if (!zbd) 4895 return 0; 4896 4897 /* report zone for the sector the curseg points to */ 4898 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) 4899 << log_sectors_per_block; 4900 err = blkdev_report_zones(zbd->bdev, zone_sector, 1, 4901 report_one_zone_cb, &zone); 4902 if (err != 1) { 4903 f2fs_err(sbi, "Report zone failed: %s errno=(%d)", 4904 zbd->path, err); 4905 return err; 4906 } 4907 4908 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4909 return 0; 4910 4911 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); 4912 wp_segno = GET_SEGNO(sbi, wp_block); 4913 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); 4914 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); 4915 4916 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && 4917 wp_sector_off == 0) 4918 return 0; 4919 4920 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " 4921 "curseg[0x%x,0x%x] wp[0x%x,0x%x]", 4922 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); 4923 4924 f2fs_notice(sbi, "Assign new section to curseg[%d]: " 4925 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); 4926 4927 f2fs_allocate_new_section(sbi, type, true); 4928 4929 /* check consistency of the zone curseg pointed to */ 4930 if (check_zone_write_pointer(sbi, zbd, &zone)) 4931 return -EIO; 4932 4933 /* check newly assigned zone */ 4934 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); 4935 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); 4936 4937 zbd = get_target_zoned_dev(sbi, cs_zone_block); 4938 if (!zbd) 4939 return 0; 4940 4941 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) 4942 << log_sectors_per_block; 4943 err = blkdev_report_zones(zbd->bdev, zone_sector, 1, 4944 report_one_zone_cb, &zone); 4945 if (err != 1) { 4946 f2fs_err(sbi, "Report zone failed: %s errno=(%d)", 4947 zbd->path, err); 4948 return err; 4949 } 4950 4951 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) 4952 return 0; 4953 4954 if (zone.wp != zone.start) { 4955 f2fs_notice(sbi, 4956 "New zone for curseg[%d] is not yet discarded. " 4957 "Reset the zone: curseg[0x%x,0x%x]", 4958 type, cs->segno, cs->next_blkoff); 4959 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, 4960 zone_sector >> log_sectors_per_block, 4961 zone.len >> log_sectors_per_block); 4962 if (err) { 4963 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", 4964 zbd->path, err); 4965 return err; 4966 } 4967 } 4968 4969 return 0; 4970 } 4971 4972 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) 4973 { 4974 int i, ret; 4975 4976 for (i = 0; i < NR_PERSISTENT_LOG; i++) { 4977 ret = fix_curseg_write_pointer(sbi, i); 4978 if (ret) 4979 return ret; 4980 } 4981 4982 return 0; 4983 } 4984 4985 struct check_zone_write_pointer_args { 4986 struct f2fs_sb_info *sbi; 4987 struct f2fs_dev_info *fdev; 4988 }; 4989 4990 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, 4991 void *data) 4992 { 4993 struct check_zone_write_pointer_args *args; 4994 4995 args = (struct check_zone_write_pointer_args *)data; 4996 4997 return check_zone_write_pointer(args->sbi, args->fdev, zone); 4998 } 4999 5000 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) 5001 { 5002 int i, ret; 5003 struct check_zone_write_pointer_args args; 5004 5005 for (i = 0; i < sbi->s_ndevs; i++) { 5006 if (!bdev_is_zoned(FDEV(i).bdev)) 5007 continue; 5008 5009 args.sbi = sbi; 5010 args.fdev = &FDEV(i); 5011 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES, 5012 check_zone_write_pointer_cb, &args); 5013 if (ret < 0) 5014 return ret; 5015 } 5016 5017 return 0; 5018 } 5019 5020 static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx, 5021 unsigned int dev_idx) 5022 { 5023 if (!bdev_is_zoned(FDEV(dev_idx).bdev)) 5024 return true; 5025 return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq); 5026 } 5027 5028 /* Return the zone index in the given device */ 5029 static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno, 5030 int dev_idx) 5031 { 5032 block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); 5033 5034 return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >> 5035 sbi->log_blocks_per_blkz; 5036 } 5037 5038 /* 5039 * Return the usable segments in a section based on the zone's 5040 * corresponding zone capacity. Zone is equal to a section. 5041 */ 5042 static inline unsigned int f2fs_usable_zone_segs_in_sec( 5043 struct f2fs_sb_info *sbi, unsigned int segno) 5044 { 5045 unsigned int dev_idx, zone_idx, unusable_segs_in_sec; 5046 5047 dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno)); 5048 zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx); 5049 5050 /* Conventional zone's capacity is always equal to zone size */ 5051 if (is_conv_zone(sbi, zone_idx, dev_idx)) 5052 return sbi->segs_per_sec; 5053 5054 /* 5055 * If the zone_capacity_blocks array is NULL, then zone capacity 5056 * is equal to the zone size for all zones 5057 */ 5058 if (!FDEV(dev_idx).zone_capacity_blocks) 5059 return sbi->segs_per_sec; 5060 5061 /* Get the segment count beyond zone capacity block */ 5062 unusable_segs_in_sec = (sbi->blocks_per_blkz - 5063 FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >> 5064 sbi->log_blocks_per_seg; 5065 return sbi->segs_per_sec - unusable_segs_in_sec; 5066 } 5067 5068 /* 5069 * Return the number of usable blocks in a segment. The number of blocks 5070 * returned is always equal to the number of blocks in a segment for 5071 * segments fully contained within a sequential zone capacity or a 5072 * conventional zone. For segments partially contained in a sequential 5073 * zone capacity, the number of usable blocks up to the zone capacity 5074 * is returned. 0 is returned in all other cases. 5075 */ 5076 static inline unsigned int f2fs_usable_zone_blks_in_seg( 5077 struct f2fs_sb_info *sbi, unsigned int segno) 5078 { 5079 block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr; 5080 unsigned int zone_idx, dev_idx, secno; 5081 5082 secno = GET_SEC_FROM_SEG(sbi, segno); 5083 seg_start = START_BLOCK(sbi, segno); 5084 dev_idx = f2fs_target_device_index(sbi, seg_start); 5085 zone_idx = get_zone_idx(sbi, secno, dev_idx); 5086 5087 /* 5088 * Conventional zone's capacity is always equal to zone size, 5089 * so, blocks per segment is unchanged. 5090 */ 5091 if (is_conv_zone(sbi, zone_idx, dev_idx)) 5092 return sbi->blocks_per_seg; 5093 5094 if (!FDEV(dev_idx).zone_capacity_blocks) 5095 return sbi->blocks_per_seg; 5096 5097 sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); 5098 sec_cap_blkaddr = sec_start_blkaddr + 5099 FDEV(dev_idx).zone_capacity_blocks[zone_idx]; 5100 5101 /* 5102 * If segment starts before zone capacity and spans beyond 5103 * zone capacity, then usable blocks are from seg start to 5104 * zone capacity. If the segment starts after the zone capacity, 5105 * then there are no usable blocks. 5106 */ 5107 if (seg_start >= sec_cap_blkaddr) 5108 return 0; 5109 if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr) 5110 return sec_cap_blkaddr - seg_start; 5111 5112 return sbi->blocks_per_seg; 5113 } 5114 #else 5115 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) 5116 { 5117 return 0; 5118 } 5119 5120 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) 5121 { 5122 return 0; 5123 } 5124 5125 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi, 5126 unsigned int segno) 5127 { 5128 return 0; 5129 } 5130 5131 static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi, 5132 unsigned int segno) 5133 { 5134 return 0; 5135 } 5136 #endif 5137 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, 5138 unsigned int segno) 5139 { 5140 if (f2fs_sb_has_blkzoned(sbi)) 5141 return f2fs_usable_zone_blks_in_seg(sbi, segno); 5142 5143 return sbi->blocks_per_seg; 5144 } 5145 5146 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, 5147 unsigned int segno) 5148 { 5149 if (f2fs_sb_has_blkzoned(sbi)) 5150 return f2fs_usable_zone_segs_in_sec(sbi, segno); 5151 5152 return sbi->segs_per_sec; 5153 } 5154 5155 /* 5156 * Update min, max modified time for cost-benefit GC algorithm 5157 */ 5158 static void init_min_max_mtime(struct f2fs_sb_info *sbi) 5159 { 5160 struct sit_info *sit_i = SIT_I(sbi); 5161 unsigned int segno; 5162 5163 down_write(&sit_i->sentry_lock); 5164 5165 sit_i->min_mtime = ULLONG_MAX; 5166 5167 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 5168 unsigned int i; 5169 unsigned long long mtime = 0; 5170 5171 for (i = 0; i < sbi->segs_per_sec; i++) 5172 mtime += get_seg_entry(sbi, segno + i)->mtime; 5173 5174 mtime = div_u64(mtime, sbi->segs_per_sec); 5175 5176 if (sit_i->min_mtime > mtime) 5177 sit_i->min_mtime = mtime; 5178 } 5179 sit_i->max_mtime = get_mtime(sbi, false); 5180 sit_i->dirty_max_mtime = 0; 5181 up_write(&sit_i->sentry_lock); 5182 } 5183 5184 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) 5185 { 5186 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 5187 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 5188 struct f2fs_sm_info *sm_info; 5189 int err; 5190 5191 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); 5192 if (!sm_info) 5193 return -ENOMEM; 5194 5195 /* init sm info */ 5196 sbi->sm_info = sm_info; 5197 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 5198 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); 5199 sm_info->segment_count = le32_to_cpu(raw_super->segment_count); 5200 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); 5201 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 5202 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 5203 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 5204 sm_info->rec_prefree_segments = sm_info->main_segments * 5205 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 5206 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) 5207 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; 5208 5209 if (!f2fs_lfs_mode(sbi)) 5210 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 5211 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 5212 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 5213 sm_info->min_seq_blocks = sbi->blocks_per_seg; 5214 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; 5215 sm_info->min_ssr_sections = reserved_sections(sbi); 5216 5217 INIT_LIST_HEAD(&sm_info->sit_entry_set); 5218 5219 init_rwsem(&sm_info->curseg_lock); 5220 5221 if (!f2fs_readonly(sbi->sb)) { 5222 err = f2fs_create_flush_cmd_control(sbi); 5223 if (err) 5224 return err; 5225 } 5226 5227 err = create_discard_cmd_control(sbi); 5228 if (err) 5229 return err; 5230 5231 err = build_sit_info(sbi); 5232 if (err) 5233 return err; 5234 err = build_free_segmap(sbi); 5235 if (err) 5236 return err; 5237 err = build_curseg(sbi); 5238 if (err) 5239 return err; 5240 5241 /* reinit free segmap based on SIT */ 5242 err = build_sit_entries(sbi); 5243 if (err) 5244 return err; 5245 5246 init_free_segmap(sbi); 5247 err = build_dirty_segmap(sbi); 5248 if (err) 5249 return err; 5250 5251 err = sanity_check_curseg(sbi); 5252 if (err) 5253 return err; 5254 5255 init_min_max_mtime(sbi); 5256 return 0; 5257 } 5258 5259 static void discard_dirty_segmap(struct f2fs_sb_info *sbi, 5260 enum dirty_type dirty_type) 5261 { 5262 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5263 5264 mutex_lock(&dirty_i->seglist_lock); 5265 kvfree(dirty_i->dirty_segmap[dirty_type]); 5266 dirty_i->nr_dirty[dirty_type] = 0; 5267 mutex_unlock(&dirty_i->seglist_lock); 5268 } 5269 5270 static void destroy_victim_secmap(struct f2fs_sb_info *sbi) 5271 { 5272 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5273 5274 kvfree(dirty_i->victim_secmap); 5275 } 5276 5277 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) 5278 { 5279 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 5280 int i; 5281 5282 if (!dirty_i) 5283 return; 5284 5285 /* discard pre-free/dirty segments list */ 5286 for (i = 0; i < NR_DIRTY_TYPE; i++) 5287 discard_dirty_segmap(sbi, i); 5288 5289 if (__is_large_section(sbi)) { 5290 mutex_lock(&dirty_i->seglist_lock); 5291 kvfree(dirty_i->dirty_secmap); 5292 mutex_unlock(&dirty_i->seglist_lock); 5293 } 5294 5295 destroy_victim_secmap(sbi); 5296 SM_I(sbi)->dirty_info = NULL; 5297 kfree(dirty_i); 5298 } 5299 5300 static void destroy_curseg(struct f2fs_sb_info *sbi) 5301 { 5302 struct curseg_info *array = SM_I(sbi)->curseg_array; 5303 int i; 5304 5305 if (!array) 5306 return; 5307 SM_I(sbi)->curseg_array = NULL; 5308 for (i = 0; i < NR_CURSEG_TYPE; i++) { 5309 kfree(array[i].sum_blk); 5310 kfree(array[i].journal); 5311 } 5312 kfree(array); 5313 } 5314 5315 static void destroy_free_segmap(struct f2fs_sb_info *sbi) 5316 { 5317 struct free_segmap_info *free_i = SM_I(sbi)->free_info; 5318 5319 if (!free_i) 5320 return; 5321 SM_I(sbi)->free_info = NULL; 5322 kvfree(free_i->free_segmap); 5323 kvfree(free_i->free_secmap); 5324 kfree(free_i); 5325 } 5326 5327 static void destroy_sit_info(struct f2fs_sb_info *sbi) 5328 { 5329 struct sit_info *sit_i = SIT_I(sbi); 5330 5331 if (!sit_i) 5332 return; 5333 5334 if (sit_i->sentries) 5335 kvfree(sit_i->bitmap); 5336 kfree(sit_i->tmp_map); 5337 5338 kvfree(sit_i->sentries); 5339 kvfree(sit_i->sec_entries); 5340 kvfree(sit_i->dirty_sentries_bitmap); 5341 5342 SM_I(sbi)->sit_info = NULL; 5343 kvfree(sit_i->sit_bitmap); 5344 #ifdef CONFIG_F2FS_CHECK_FS 5345 kvfree(sit_i->sit_bitmap_mir); 5346 kvfree(sit_i->invalid_segmap); 5347 #endif 5348 kfree(sit_i); 5349 } 5350 5351 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) 5352 { 5353 struct f2fs_sm_info *sm_info = SM_I(sbi); 5354 5355 if (!sm_info) 5356 return; 5357 f2fs_destroy_flush_cmd_control(sbi, true); 5358 destroy_discard_cmd_control(sbi); 5359 destroy_dirty_segmap(sbi); 5360 destroy_curseg(sbi); 5361 destroy_free_segmap(sbi); 5362 destroy_sit_info(sbi); 5363 sbi->sm_info = NULL; 5364 kfree(sm_info); 5365 } 5366 5367 int __init f2fs_create_segment_manager_caches(void) 5368 { 5369 discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", 5370 sizeof(struct discard_entry)); 5371 if (!discard_entry_slab) 5372 goto fail; 5373 5374 discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", 5375 sizeof(struct discard_cmd)); 5376 if (!discard_cmd_slab) 5377 goto destroy_discard_entry; 5378 5379 sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", 5380 sizeof(struct sit_entry_set)); 5381 if (!sit_entry_set_slab) 5382 goto destroy_discard_cmd; 5383 5384 inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry", 5385 sizeof(struct inmem_pages)); 5386 if (!inmem_entry_slab) 5387 goto destroy_sit_entry_set; 5388 return 0; 5389 5390 destroy_sit_entry_set: 5391 kmem_cache_destroy(sit_entry_set_slab); 5392 destroy_discard_cmd: 5393 kmem_cache_destroy(discard_cmd_slab); 5394 destroy_discard_entry: 5395 kmem_cache_destroy(discard_entry_slab); 5396 fail: 5397 return -ENOMEM; 5398 } 5399 5400 void f2fs_destroy_segment_manager_caches(void) 5401 { 5402 kmem_cache_destroy(sit_entry_set_slab); 5403 kmem_cache_destroy(discard_cmd_slab); 5404 kmem_cache_destroy(discard_entry_slab); 5405 kmem_cache_destroy(inmem_entry_slab); 5406 } 5407