1 /* 2 * fs/f2fs/segment.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/bio.h> 14 #include <linux/blkdev.h> 15 #include <linux/prefetch.h> 16 #include <linux/kthread.h> 17 #include <linux/swap.h> 18 #include <linux/timer.h> 19 20 #include "f2fs.h" 21 #include "segment.h" 22 #include "node.h" 23 #include "trace.h" 24 #include <trace/events/f2fs.h> 25 26 #define __reverse_ffz(x) __reverse_ffs(~(x)) 27 28 static struct kmem_cache *discard_entry_slab; 29 static struct kmem_cache *discard_cmd_slab; 30 static struct kmem_cache *sit_entry_set_slab; 31 static struct kmem_cache *inmem_entry_slab; 32 33 static unsigned long __reverse_ulong(unsigned char *str) 34 { 35 unsigned long tmp = 0; 36 int shift = 24, idx = 0; 37 38 #if BITS_PER_LONG == 64 39 shift = 56; 40 #endif 41 while (shift >= 0) { 42 tmp |= (unsigned long)str[idx++] << shift; 43 shift -= BITS_PER_BYTE; 44 } 45 return tmp; 46 } 47 48 /* 49 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 50 * MSB and LSB are reversed in a byte by f2fs_set_bit. 51 */ 52 static inline unsigned long __reverse_ffs(unsigned long word) 53 { 54 int num = 0; 55 56 #if BITS_PER_LONG == 64 57 if ((word & 0xffffffff00000000UL) == 0) 58 num += 32; 59 else 60 word >>= 32; 61 #endif 62 if ((word & 0xffff0000) == 0) 63 num += 16; 64 else 65 word >>= 16; 66 67 if ((word & 0xff00) == 0) 68 num += 8; 69 else 70 word >>= 8; 71 72 if ((word & 0xf0) == 0) 73 num += 4; 74 else 75 word >>= 4; 76 77 if ((word & 0xc) == 0) 78 num += 2; 79 else 80 word >>= 2; 81 82 if ((word & 0x2) == 0) 83 num += 1; 84 return num; 85 } 86 87 /* 88 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 89 * f2fs_set_bit makes MSB and LSB reversed in a byte. 90 * @size must be integral times of unsigned long. 91 * Example: 92 * MSB <--> LSB 93 * f2fs_set_bit(0, bitmap) => 1000 0000 94 * f2fs_set_bit(7, bitmap) => 0000 0001 95 */ 96 static unsigned long __find_rev_next_bit(const unsigned long *addr, 97 unsigned long size, unsigned long offset) 98 { 99 const unsigned long *p = addr + BIT_WORD(offset); 100 unsigned long result = size; 101 unsigned long tmp; 102 103 if (offset >= size) 104 return size; 105 106 size -= (offset & ~(BITS_PER_LONG - 1)); 107 offset %= BITS_PER_LONG; 108 109 while (1) { 110 if (*p == 0) 111 goto pass; 112 113 tmp = __reverse_ulong((unsigned char *)p); 114 115 tmp &= ~0UL >> offset; 116 if (size < BITS_PER_LONG) 117 tmp &= (~0UL << (BITS_PER_LONG - size)); 118 if (tmp) 119 goto found; 120 pass: 121 if (size <= BITS_PER_LONG) 122 break; 123 size -= BITS_PER_LONG; 124 offset = 0; 125 p++; 126 } 127 return result; 128 found: 129 return result - size + __reverse_ffs(tmp); 130 } 131 132 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 133 unsigned long size, unsigned long offset) 134 { 135 const unsigned long *p = addr + BIT_WORD(offset); 136 unsigned long result = size; 137 unsigned long tmp; 138 139 if (offset >= size) 140 return size; 141 142 size -= (offset & ~(BITS_PER_LONG - 1)); 143 offset %= BITS_PER_LONG; 144 145 while (1) { 146 if (*p == ~0UL) 147 goto pass; 148 149 tmp = __reverse_ulong((unsigned char *)p); 150 151 if (offset) 152 tmp |= ~0UL << (BITS_PER_LONG - offset); 153 if (size < BITS_PER_LONG) 154 tmp |= ~0UL >> size; 155 if (tmp != ~0UL) 156 goto found; 157 pass: 158 if (size <= BITS_PER_LONG) 159 break; 160 size -= BITS_PER_LONG; 161 offset = 0; 162 p++; 163 } 164 return result; 165 found: 166 return result - size + __reverse_ffz(tmp); 167 } 168 169 void register_inmem_page(struct inode *inode, struct page *page) 170 { 171 struct f2fs_inode_info *fi = F2FS_I(inode); 172 struct inmem_pages *new; 173 174 f2fs_trace_pid(page); 175 176 set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); 177 SetPagePrivate(page); 178 179 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 180 181 /* add atomic page indices to the list */ 182 new->page = page; 183 INIT_LIST_HEAD(&new->list); 184 185 /* increase reference count with clean state */ 186 mutex_lock(&fi->inmem_lock); 187 get_page(page); 188 list_add_tail(&new->list, &fi->inmem_pages); 189 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 190 mutex_unlock(&fi->inmem_lock); 191 192 trace_f2fs_register_inmem_page(page, INMEM); 193 } 194 195 static int __revoke_inmem_pages(struct inode *inode, 196 struct list_head *head, bool drop, bool recover) 197 { 198 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 199 struct inmem_pages *cur, *tmp; 200 int err = 0; 201 202 list_for_each_entry_safe(cur, tmp, head, list) { 203 struct page *page = cur->page; 204 205 if (drop) 206 trace_f2fs_commit_inmem_page(page, INMEM_DROP); 207 208 lock_page(page); 209 210 if (recover) { 211 struct dnode_of_data dn; 212 struct node_info ni; 213 214 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); 215 216 set_new_dnode(&dn, inode, NULL, NULL, 0); 217 if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) { 218 err = -EAGAIN; 219 goto next; 220 } 221 get_node_info(sbi, dn.nid, &ni); 222 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 223 cur->old_addr, ni.version, true, true); 224 f2fs_put_dnode(&dn); 225 } 226 next: 227 /* we don't need to invalidate this in the sccessful status */ 228 if (drop || recover) 229 ClearPageUptodate(page); 230 set_page_private(page, 0); 231 ClearPagePrivate(page); 232 f2fs_put_page(page, 1); 233 234 list_del(&cur->list); 235 kmem_cache_free(inmem_entry_slab, cur); 236 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 237 } 238 return err; 239 } 240 241 void drop_inmem_pages(struct inode *inode) 242 { 243 struct f2fs_inode_info *fi = F2FS_I(inode); 244 245 mutex_lock(&fi->inmem_lock); 246 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 247 mutex_unlock(&fi->inmem_lock); 248 249 clear_inode_flag(inode, FI_ATOMIC_FILE); 250 stat_dec_atomic_write(inode); 251 } 252 253 void drop_inmem_page(struct inode *inode, struct page *page) 254 { 255 struct f2fs_inode_info *fi = F2FS_I(inode); 256 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 257 struct list_head *head = &fi->inmem_pages; 258 struct inmem_pages *cur = NULL; 259 260 f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); 261 262 mutex_lock(&fi->inmem_lock); 263 list_for_each_entry(cur, head, list) { 264 if (cur->page == page) 265 break; 266 } 267 268 f2fs_bug_on(sbi, !cur || cur->page != page); 269 list_del(&cur->list); 270 mutex_unlock(&fi->inmem_lock); 271 272 dec_page_count(sbi, F2FS_INMEM_PAGES); 273 kmem_cache_free(inmem_entry_slab, cur); 274 275 ClearPageUptodate(page); 276 set_page_private(page, 0); 277 ClearPagePrivate(page); 278 f2fs_put_page(page, 0); 279 280 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); 281 } 282 283 static int __commit_inmem_pages(struct inode *inode, 284 struct list_head *revoke_list) 285 { 286 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 287 struct f2fs_inode_info *fi = F2FS_I(inode); 288 struct inmem_pages *cur, *tmp; 289 struct f2fs_io_info fio = { 290 .sbi = sbi, 291 .type = DATA, 292 .op = REQ_OP_WRITE, 293 .op_flags = REQ_SYNC | REQ_PRIO, 294 }; 295 pgoff_t last_idx = ULONG_MAX; 296 int err = 0; 297 298 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 299 struct page *page = cur->page; 300 301 lock_page(page); 302 if (page->mapping == inode->i_mapping) { 303 trace_f2fs_commit_inmem_page(page, INMEM); 304 305 set_page_dirty(page); 306 f2fs_wait_on_page_writeback(page, DATA, true); 307 if (clear_page_dirty_for_io(page)) { 308 inode_dec_dirty_pages(inode); 309 remove_dirty_inode(inode); 310 } 311 312 fio.page = page; 313 fio.old_blkaddr = NULL_ADDR; 314 fio.encrypted_page = NULL; 315 fio.need_lock = false, 316 err = do_write_data_page(&fio); 317 if (err) { 318 unlock_page(page); 319 break; 320 } 321 322 /* record old blkaddr for revoking */ 323 cur->old_addr = fio.old_blkaddr; 324 last_idx = page->index; 325 } 326 unlock_page(page); 327 list_move_tail(&cur->list, revoke_list); 328 } 329 330 if (last_idx != ULONG_MAX) 331 f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx, 332 DATA, WRITE); 333 334 if (!err) 335 __revoke_inmem_pages(inode, revoke_list, false, false); 336 337 return err; 338 } 339 340 int commit_inmem_pages(struct inode *inode) 341 { 342 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 343 struct f2fs_inode_info *fi = F2FS_I(inode); 344 struct list_head revoke_list; 345 int err; 346 347 INIT_LIST_HEAD(&revoke_list); 348 f2fs_balance_fs(sbi, true); 349 f2fs_lock_op(sbi); 350 351 set_inode_flag(inode, FI_ATOMIC_COMMIT); 352 353 mutex_lock(&fi->inmem_lock); 354 err = __commit_inmem_pages(inode, &revoke_list); 355 if (err) { 356 int ret; 357 /* 358 * try to revoke all committed pages, but still we could fail 359 * due to no memory or other reason, if that happened, EAGAIN 360 * will be returned, which means in such case, transaction is 361 * already not integrity, caller should use journal to do the 362 * recovery or rewrite & commit last transaction. For other 363 * error number, revoking was done by filesystem itself. 364 */ 365 ret = __revoke_inmem_pages(inode, &revoke_list, false, true); 366 if (ret) 367 err = ret; 368 369 /* drop all uncommitted pages */ 370 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 371 } 372 mutex_unlock(&fi->inmem_lock); 373 374 clear_inode_flag(inode, FI_ATOMIC_COMMIT); 375 376 f2fs_unlock_op(sbi); 377 return err; 378 } 379 380 /* 381 * This function balances dirty node and dentry pages. 382 * In addition, it controls garbage collection. 383 */ 384 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) 385 { 386 #ifdef CONFIG_F2FS_FAULT_INJECTION 387 if (time_to_inject(sbi, FAULT_CHECKPOINT)) { 388 f2fs_show_injection_info(FAULT_CHECKPOINT); 389 f2fs_stop_checkpoint(sbi, false); 390 } 391 #endif 392 393 /* balance_fs_bg is able to be pending */ 394 if (need && excess_cached_nats(sbi)) 395 f2fs_balance_fs_bg(sbi); 396 397 /* 398 * We should do GC or end up with checkpoint, if there are so many dirty 399 * dir/node pages without enough free segments. 400 */ 401 if (has_not_enough_free_secs(sbi, 0, 0)) { 402 mutex_lock(&sbi->gc_mutex); 403 f2fs_gc(sbi, false, false, NULL_SEGNO); 404 } 405 } 406 407 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 408 { 409 /* try to shrink extent cache when there is no enough memory */ 410 if (!available_free_memory(sbi, EXTENT_CACHE)) 411 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 412 413 /* check the # of cached NAT entries */ 414 if (!available_free_memory(sbi, NAT_ENTRIES)) 415 try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); 416 417 if (!available_free_memory(sbi, FREE_NIDS)) 418 try_to_free_nids(sbi, MAX_FREE_NIDS); 419 else 420 build_free_nids(sbi, false, false); 421 422 if (!is_idle(sbi) && !excess_dirty_nats(sbi)) 423 return; 424 425 /* checkpoint is the only way to shrink partial cached entries */ 426 if (!available_free_memory(sbi, NAT_ENTRIES) || 427 !available_free_memory(sbi, INO_ENTRIES) || 428 excess_prefree_segs(sbi) || 429 excess_dirty_nats(sbi) || 430 f2fs_time_over(sbi, CP_TIME)) { 431 if (test_opt(sbi, DATA_FLUSH)) { 432 struct blk_plug plug; 433 434 blk_start_plug(&plug); 435 sync_dirty_inodes(sbi, FILE_INODE); 436 blk_finish_plug(&plug); 437 } 438 f2fs_sync_fs(sbi->sb, true); 439 stat_inc_bg_cp_count(sbi->stat_info); 440 } 441 } 442 443 static int __submit_flush_wait(struct f2fs_sb_info *sbi, 444 struct block_device *bdev) 445 { 446 struct bio *bio = f2fs_bio_alloc(0); 447 int ret; 448 449 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; 450 bio->bi_bdev = bdev; 451 ret = submit_bio_wait(bio); 452 bio_put(bio); 453 454 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), 455 test_opt(sbi, FLUSH_MERGE), ret); 456 return ret; 457 } 458 459 static int submit_flush_wait(struct f2fs_sb_info *sbi) 460 { 461 int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); 462 int i; 463 464 if (!sbi->s_ndevs || ret) 465 return ret; 466 467 for (i = 1; i < sbi->s_ndevs; i++) { 468 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 469 if (ret) 470 break; 471 } 472 return ret; 473 } 474 475 static int issue_flush_thread(void *data) 476 { 477 struct f2fs_sb_info *sbi = data; 478 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 479 wait_queue_head_t *q = &fcc->flush_wait_queue; 480 repeat: 481 if (kthread_should_stop()) 482 return 0; 483 484 if (!llist_empty(&fcc->issue_list)) { 485 struct flush_cmd *cmd, *next; 486 int ret; 487 488 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 489 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 490 491 ret = submit_flush_wait(sbi); 492 atomic_inc(&fcc->issued_flush); 493 494 llist_for_each_entry_safe(cmd, next, 495 fcc->dispatch_list, llnode) { 496 cmd->ret = ret; 497 complete(&cmd->wait); 498 } 499 fcc->dispatch_list = NULL; 500 } 501 502 wait_event_interruptible(*q, 503 kthread_should_stop() || !llist_empty(&fcc->issue_list)); 504 goto repeat; 505 } 506 507 int f2fs_issue_flush(struct f2fs_sb_info *sbi) 508 { 509 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 510 struct flush_cmd cmd; 511 int ret; 512 513 if (test_opt(sbi, NOBARRIER)) 514 return 0; 515 516 if (!test_opt(sbi, FLUSH_MERGE)) { 517 ret = submit_flush_wait(sbi); 518 atomic_inc(&fcc->issued_flush); 519 return ret; 520 } 521 522 if (!atomic_read(&fcc->issing_flush)) { 523 atomic_inc(&fcc->issing_flush); 524 ret = submit_flush_wait(sbi); 525 atomic_dec(&fcc->issing_flush); 526 527 atomic_inc(&fcc->issued_flush); 528 return ret; 529 } 530 531 init_completion(&cmd.wait); 532 533 atomic_inc(&fcc->issing_flush); 534 llist_add(&cmd.llnode, &fcc->issue_list); 535 536 if (!fcc->dispatch_list) 537 wake_up(&fcc->flush_wait_queue); 538 539 if (fcc->f2fs_issue_flush) { 540 wait_for_completion(&cmd.wait); 541 atomic_dec(&fcc->issing_flush); 542 } else { 543 llist_del_all(&fcc->issue_list); 544 atomic_set(&fcc->issing_flush, 0); 545 } 546 547 return cmd.ret; 548 } 549 550 int create_flush_cmd_control(struct f2fs_sb_info *sbi) 551 { 552 dev_t dev = sbi->sb->s_bdev->bd_dev; 553 struct flush_cmd_control *fcc; 554 int err = 0; 555 556 if (SM_I(sbi)->fcc_info) { 557 fcc = SM_I(sbi)->fcc_info; 558 goto init_thread; 559 } 560 561 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 562 if (!fcc) 563 return -ENOMEM; 564 atomic_set(&fcc->issued_flush, 0); 565 atomic_set(&fcc->issing_flush, 0); 566 init_waitqueue_head(&fcc->flush_wait_queue); 567 init_llist_head(&fcc->issue_list); 568 SM_I(sbi)->fcc_info = fcc; 569 init_thread: 570 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 571 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 572 if (IS_ERR(fcc->f2fs_issue_flush)) { 573 err = PTR_ERR(fcc->f2fs_issue_flush); 574 kfree(fcc); 575 SM_I(sbi)->fcc_info = NULL; 576 return err; 577 } 578 579 return err; 580 } 581 582 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) 583 { 584 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 585 586 if (fcc && fcc->f2fs_issue_flush) { 587 struct task_struct *flush_thread = fcc->f2fs_issue_flush; 588 589 fcc->f2fs_issue_flush = NULL; 590 kthread_stop(flush_thread); 591 } 592 if (free) { 593 kfree(fcc); 594 SM_I(sbi)->fcc_info = NULL; 595 } 596 } 597 598 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 599 enum dirty_type dirty_type) 600 { 601 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 602 603 /* need not be added */ 604 if (IS_CURSEG(sbi, segno)) 605 return; 606 607 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) 608 dirty_i->nr_dirty[dirty_type]++; 609 610 if (dirty_type == DIRTY) { 611 struct seg_entry *sentry = get_seg_entry(sbi, segno); 612 enum dirty_type t = sentry->type; 613 614 if (unlikely(t >= DIRTY)) { 615 f2fs_bug_on(sbi, 1); 616 return; 617 } 618 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 619 dirty_i->nr_dirty[t]++; 620 } 621 } 622 623 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 624 enum dirty_type dirty_type) 625 { 626 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 627 628 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) 629 dirty_i->nr_dirty[dirty_type]--; 630 631 if (dirty_type == DIRTY) { 632 struct seg_entry *sentry = get_seg_entry(sbi, segno); 633 enum dirty_type t = sentry->type; 634 635 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) 636 dirty_i->nr_dirty[t]--; 637 638 if (get_valid_blocks(sbi, segno, true) == 0) 639 clear_bit(GET_SEC_FROM_SEG(sbi, segno), 640 dirty_i->victim_secmap); 641 } 642 } 643 644 /* 645 * Should not occur error such as -ENOMEM. 646 * Adding dirty entry into seglist is not critical operation. 647 * If a given segment is one of current working segments, it won't be added. 648 */ 649 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) 650 { 651 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 652 unsigned short valid_blocks; 653 654 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) 655 return; 656 657 mutex_lock(&dirty_i->seglist_lock); 658 659 valid_blocks = get_valid_blocks(sbi, segno, false); 660 661 if (valid_blocks == 0) { 662 __locate_dirty_segment(sbi, segno, PRE); 663 __remove_dirty_segment(sbi, segno, DIRTY); 664 } else if (valid_blocks < sbi->blocks_per_seg) { 665 __locate_dirty_segment(sbi, segno, DIRTY); 666 } else { 667 /* Recovery routine with SSR needs this */ 668 __remove_dirty_segment(sbi, segno, DIRTY); 669 } 670 671 mutex_unlock(&dirty_i->seglist_lock); 672 } 673 674 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, 675 struct block_device *bdev, block_t lstart, 676 block_t start, block_t len) 677 { 678 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 679 struct list_head *pend_list; 680 struct discard_cmd *dc; 681 682 f2fs_bug_on(sbi, !len); 683 684 pend_list = &dcc->pend_list[plist_idx(len)]; 685 686 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); 687 INIT_LIST_HEAD(&dc->list); 688 dc->bdev = bdev; 689 dc->lstart = lstart; 690 dc->start = start; 691 dc->len = len; 692 dc->ref = 0; 693 dc->state = D_PREP; 694 dc->error = 0; 695 init_completion(&dc->wait); 696 list_add_tail(&dc->list, pend_list); 697 atomic_inc(&dcc->discard_cmd_cnt); 698 dcc->undiscard_blks += len; 699 700 return dc; 701 } 702 703 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, 704 struct block_device *bdev, block_t lstart, 705 block_t start, block_t len, 706 struct rb_node *parent, struct rb_node **p) 707 { 708 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 709 struct discard_cmd *dc; 710 711 dc = __create_discard_cmd(sbi, bdev, lstart, start, len); 712 713 rb_link_node(&dc->rb_node, parent, p); 714 rb_insert_color(&dc->rb_node, &dcc->root); 715 716 return dc; 717 } 718 719 static void __detach_discard_cmd(struct discard_cmd_control *dcc, 720 struct discard_cmd *dc) 721 { 722 if (dc->state == D_DONE) 723 atomic_dec(&dcc->issing_discard); 724 725 list_del(&dc->list); 726 rb_erase(&dc->rb_node, &dcc->root); 727 dcc->undiscard_blks -= dc->len; 728 729 kmem_cache_free(discard_cmd_slab, dc); 730 731 atomic_dec(&dcc->discard_cmd_cnt); 732 } 733 734 static void __remove_discard_cmd(struct f2fs_sb_info *sbi, 735 struct discard_cmd *dc) 736 { 737 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 738 739 if (dc->error == -EOPNOTSUPP) 740 dc->error = 0; 741 742 if (dc->error) 743 f2fs_msg(sbi->sb, KERN_INFO, 744 "Issue discard failed, ret: %d", dc->error); 745 __detach_discard_cmd(dcc, dc); 746 } 747 748 static void f2fs_submit_discard_endio(struct bio *bio) 749 { 750 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 751 752 dc->error = bio->bi_error; 753 dc->state = D_DONE; 754 complete(&dc->wait); 755 bio_put(bio); 756 } 757 758 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 759 static void __submit_discard_cmd(struct f2fs_sb_info *sbi, 760 struct discard_cmd *dc) 761 { 762 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 763 struct bio *bio = NULL; 764 765 if (dc->state != D_PREP) 766 return; 767 768 trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); 769 770 dc->error = __blkdev_issue_discard(dc->bdev, 771 SECTOR_FROM_BLOCK(dc->start), 772 SECTOR_FROM_BLOCK(dc->len), 773 GFP_NOFS, 0, &bio); 774 if (!dc->error) { 775 /* should keep before submission to avoid D_DONE right away */ 776 dc->state = D_SUBMIT; 777 atomic_inc(&dcc->issued_discard); 778 atomic_inc(&dcc->issing_discard); 779 if (bio) { 780 bio->bi_private = dc; 781 bio->bi_end_io = f2fs_submit_discard_endio; 782 bio->bi_opf |= REQ_SYNC; 783 submit_bio(bio); 784 list_move_tail(&dc->list, &dcc->wait_list); 785 } 786 } else { 787 __remove_discard_cmd(sbi, dc); 788 } 789 } 790 791 static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, 792 struct block_device *bdev, block_t lstart, 793 block_t start, block_t len, 794 struct rb_node **insert_p, 795 struct rb_node *insert_parent) 796 { 797 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 798 struct rb_node **p = &dcc->root.rb_node; 799 struct rb_node *parent = NULL; 800 struct discard_cmd *dc = NULL; 801 802 if (insert_p && insert_parent) { 803 parent = insert_parent; 804 p = insert_p; 805 goto do_insert; 806 } 807 808 p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); 809 do_insert: 810 dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); 811 if (!dc) 812 return NULL; 813 814 return dc; 815 } 816 817 static void __relocate_discard_cmd(struct discard_cmd_control *dcc, 818 struct discard_cmd *dc) 819 { 820 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); 821 } 822 823 static void __punch_discard_cmd(struct f2fs_sb_info *sbi, 824 struct discard_cmd *dc, block_t blkaddr) 825 { 826 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 827 struct discard_info di = dc->di; 828 bool modified = false; 829 830 if (dc->state == D_DONE || dc->len == 1) { 831 __remove_discard_cmd(sbi, dc); 832 return; 833 } 834 835 dcc->undiscard_blks -= di.len; 836 837 if (blkaddr > di.lstart) { 838 dc->len = blkaddr - dc->lstart; 839 dcc->undiscard_blks += dc->len; 840 __relocate_discard_cmd(dcc, dc); 841 f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); 842 modified = true; 843 } 844 845 if (blkaddr < di.lstart + di.len - 1) { 846 if (modified) { 847 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, 848 di.start + blkaddr + 1 - di.lstart, 849 di.lstart + di.len - 1 - blkaddr, 850 NULL, NULL); 851 f2fs_bug_on(sbi, 852 !__check_rb_tree_consistence(sbi, &dcc->root)); 853 } else { 854 dc->lstart++; 855 dc->len--; 856 dc->start++; 857 dcc->undiscard_blks += dc->len; 858 __relocate_discard_cmd(dcc, dc); 859 f2fs_bug_on(sbi, 860 !__check_rb_tree_consistence(sbi, &dcc->root)); 861 } 862 } 863 } 864 865 static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 866 struct block_device *bdev, block_t lstart, 867 block_t start, block_t len) 868 { 869 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 870 struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 871 struct discard_cmd *dc; 872 struct discard_info di = {0}; 873 struct rb_node **insert_p = NULL, *insert_parent = NULL; 874 block_t end = lstart + len; 875 876 mutex_lock(&dcc->cmd_lock); 877 878 dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, 879 NULL, lstart, 880 (struct rb_entry **)&prev_dc, 881 (struct rb_entry **)&next_dc, 882 &insert_p, &insert_parent, true); 883 if (dc) 884 prev_dc = dc; 885 886 if (!prev_dc) { 887 di.lstart = lstart; 888 di.len = next_dc ? next_dc->lstart - lstart : len; 889 di.len = min(di.len, len); 890 di.start = start; 891 } 892 893 while (1) { 894 struct rb_node *node; 895 bool merged = false; 896 struct discard_cmd *tdc = NULL; 897 898 if (prev_dc) { 899 di.lstart = prev_dc->lstart + prev_dc->len; 900 if (di.lstart < lstart) 901 di.lstart = lstart; 902 if (di.lstart >= end) 903 break; 904 905 if (!next_dc || next_dc->lstart > end) 906 di.len = end - di.lstart; 907 else 908 di.len = next_dc->lstart - di.lstart; 909 di.start = start + di.lstart - lstart; 910 } 911 912 if (!di.len) 913 goto next; 914 915 if (prev_dc && prev_dc->state == D_PREP && 916 prev_dc->bdev == bdev && 917 __is_discard_back_mergeable(&di, &prev_dc->di)) { 918 prev_dc->di.len += di.len; 919 dcc->undiscard_blks += di.len; 920 __relocate_discard_cmd(dcc, prev_dc); 921 f2fs_bug_on(sbi, 922 !__check_rb_tree_consistence(sbi, &dcc->root)); 923 di = prev_dc->di; 924 tdc = prev_dc; 925 merged = true; 926 } 927 928 if (next_dc && next_dc->state == D_PREP && 929 next_dc->bdev == bdev && 930 __is_discard_front_mergeable(&di, &next_dc->di)) { 931 next_dc->di.lstart = di.lstart; 932 next_dc->di.len += di.len; 933 next_dc->di.start = di.start; 934 dcc->undiscard_blks += di.len; 935 __relocate_discard_cmd(dcc, next_dc); 936 if (tdc) 937 __remove_discard_cmd(sbi, tdc); 938 f2fs_bug_on(sbi, 939 !__check_rb_tree_consistence(sbi, &dcc->root)); 940 merged = true; 941 } 942 943 if (!merged) { 944 __insert_discard_tree(sbi, bdev, di.lstart, di.start, 945 di.len, NULL, NULL); 946 f2fs_bug_on(sbi, 947 !__check_rb_tree_consistence(sbi, &dcc->root)); 948 } 949 next: 950 prev_dc = next_dc; 951 if (!prev_dc) 952 break; 953 954 node = rb_next(&prev_dc->rb_node); 955 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 956 } 957 958 mutex_unlock(&dcc->cmd_lock); 959 } 960 961 static int __queue_discard_cmd(struct f2fs_sb_info *sbi, 962 struct block_device *bdev, block_t blkstart, block_t blklen) 963 { 964 block_t lblkstart = blkstart; 965 966 trace_f2fs_queue_discard(bdev, blkstart, blklen); 967 968 if (sbi->s_ndevs) { 969 int devi = f2fs_target_device_index(sbi, blkstart); 970 971 blkstart -= FDEV(devi).start_blk; 972 } 973 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); 974 return 0; 975 } 976 977 static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) 978 { 979 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 980 struct list_head *pend_list; 981 struct discard_cmd *dc, *tmp; 982 struct blk_plug plug; 983 int i, iter = 0; 984 985 mutex_lock(&dcc->cmd_lock); 986 blk_start_plug(&plug); 987 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 988 pend_list = &dcc->pend_list[i]; 989 list_for_each_entry_safe(dc, tmp, pend_list, list) { 990 f2fs_bug_on(sbi, dc->state != D_PREP); 991 992 if (!issue_cond || is_idle(sbi)) 993 __submit_discard_cmd(sbi, dc); 994 if (issue_cond && iter++ > DISCARD_ISSUE_RATE) 995 goto out; 996 } 997 } 998 out: 999 blk_finish_plug(&plug); 1000 mutex_unlock(&dcc->cmd_lock); 1001 } 1002 1003 static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) 1004 { 1005 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1006 struct list_head *wait_list = &(dcc->wait_list); 1007 struct discard_cmd *dc, *tmp; 1008 1009 mutex_lock(&dcc->cmd_lock); 1010 list_for_each_entry_safe(dc, tmp, wait_list, list) { 1011 if (!wait_cond || dc->state == D_DONE) { 1012 if (dc->ref) 1013 continue; 1014 wait_for_completion_io(&dc->wait); 1015 __remove_discard_cmd(sbi, dc); 1016 } 1017 } 1018 mutex_unlock(&dcc->cmd_lock); 1019 } 1020 1021 /* This should be covered by global mutex, &sit_i->sentry_lock */ 1022 void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) 1023 { 1024 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1025 struct discard_cmd *dc; 1026 bool need_wait = false; 1027 1028 mutex_lock(&dcc->cmd_lock); 1029 dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); 1030 if (dc) { 1031 if (dc->state == D_PREP) { 1032 __punch_discard_cmd(sbi, dc, blkaddr); 1033 } else { 1034 dc->ref++; 1035 need_wait = true; 1036 } 1037 } 1038 mutex_unlock(&dcc->cmd_lock); 1039 1040 if (need_wait) { 1041 wait_for_completion_io(&dc->wait); 1042 mutex_lock(&dcc->cmd_lock); 1043 f2fs_bug_on(sbi, dc->state != D_DONE); 1044 dc->ref--; 1045 if (!dc->ref) 1046 __remove_discard_cmd(sbi, dc); 1047 mutex_unlock(&dcc->cmd_lock); 1048 } 1049 } 1050 1051 /* This comes from f2fs_put_super */ 1052 void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) 1053 { 1054 __issue_discard_cmd(sbi, false); 1055 __wait_discard_cmd(sbi, false); 1056 } 1057 1058 static int issue_discard_thread(void *data) 1059 { 1060 struct f2fs_sb_info *sbi = data; 1061 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1062 wait_queue_head_t *q = &dcc->discard_wait_queue; 1063 repeat: 1064 if (kthread_should_stop()) 1065 return 0; 1066 1067 __issue_discard_cmd(sbi, true); 1068 __wait_discard_cmd(sbi, true); 1069 1070 congestion_wait(BLK_RW_SYNC, HZ/50); 1071 1072 wait_event_interruptible(*q, kthread_should_stop() || 1073 atomic_read(&dcc->discard_cmd_cnt)); 1074 goto repeat; 1075 } 1076 1077 #ifdef CONFIG_BLK_DEV_ZONED 1078 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, 1079 struct block_device *bdev, block_t blkstart, block_t blklen) 1080 { 1081 sector_t sector, nr_sects; 1082 block_t lblkstart = blkstart; 1083 int devi = 0; 1084 1085 if (sbi->s_ndevs) { 1086 devi = f2fs_target_device_index(sbi, blkstart); 1087 blkstart -= FDEV(devi).start_blk; 1088 } 1089 1090 /* 1091 * We need to know the type of the zone: for conventional zones, 1092 * use regular discard if the drive supports it. For sequential 1093 * zones, reset the zone write pointer. 1094 */ 1095 switch (get_blkz_type(sbi, bdev, blkstart)) { 1096 1097 case BLK_ZONE_TYPE_CONVENTIONAL: 1098 if (!blk_queue_discard(bdev_get_queue(bdev))) 1099 return 0; 1100 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); 1101 case BLK_ZONE_TYPE_SEQWRITE_REQ: 1102 case BLK_ZONE_TYPE_SEQWRITE_PREF: 1103 sector = SECTOR_FROM_BLOCK(blkstart); 1104 nr_sects = SECTOR_FROM_BLOCK(blklen); 1105 1106 if (sector & (bdev_zone_sectors(bdev) - 1) || 1107 nr_sects != bdev_zone_sectors(bdev)) { 1108 f2fs_msg(sbi->sb, KERN_INFO, 1109 "(%d) %s: Unaligned discard attempted (block %x + %x)", 1110 devi, sbi->s_ndevs ? FDEV(devi).path: "", 1111 blkstart, blklen); 1112 return -EIO; 1113 } 1114 trace_f2fs_issue_reset_zone(bdev, blkstart); 1115 return blkdev_reset_zones(bdev, sector, 1116 nr_sects, GFP_NOFS); 1117 default: 1118 /* Unknown zone type: broken device ? */ 1119 return -EIO; 1120 } 1121 } 1122 #endif 1123 1124 static int __issue_discard_async(struct f2fs_sb_info *sbi, 1125 struct block_device *bdev, block_t blkstart, block_t blklen) 1126 { 1127 #ifdef CONFIG_BLK_DEV_ZONED 1128 if (f2fs_sb_mounted_blkzoned(sbi->sb) && 1129 bdev_zoned_model(bdev) != BLK_ZONED_NONE) 1130 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); 1131 #endif 1132 return __queue_discard_cmd(sbi, bdev, blkstart, blklen); 1133 } 1134 1135 static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 1136 block_t blkstart, block_t blklen) 1137 { 1138 sector_t start = blkstart, len = 0; 1139 struct block_device *bdev; 1140 struct seg_entry *se; 1141 unsigned int offset; 1142 block_t i; 1143 int err = 0; 1144 1145 bdev = f2fs_target_device(sbi, blkstart, NULL); 1146 1147 for (i = blkstart; i < blkstart + blklen; i++, len++) { 1148 if (i != start) { 1149 struct block_device *bdev2 = 1150 f2fs_target_device(sbi, i, NULL); 1151 1152 if (bdev2 != bdev) { 1153 err = __issue_discard_async(sbi, bdev, 1154 start, len); 1155 if (err) 1156 return err; 1157 bdev = bdev2; 1158 start = i; 1159 len = 0; 1160 } 1161 } 1162 1163 se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); 1164 offset = GET_BLKOFF_FROM_SEG0(sbi, i); 1165 1166 if (!f2fs_test_and_set_bit(offset, se->discard_map)) 1167 sbi->discard_blks--; 1168 } 1169 1170 if (len) 1171 err = __issue_discard_async(sbi, bdev, start, len); 1172 return err; 1173 } 1174 1175 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, 1176 bool check_only) 1177 { 1178 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 1179 int max_blocks = sbi->blocks_per_seg; 1180 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 1181 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 1182 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 1183 unsigned long *discard_map = (unsigned long *)se->discard_map; 1184 unsigned long *dmap = SIT_I(sbi)->tmp_map; 1185 unsigned int start = 0, end = -1; 1186 bool force = (cpc->reason & CP_DISCARD); 1187 struct discard_entry *de = NULL; 1188 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; 1189 int i; 1190 1191 if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) 1192 return false; 1193 1194 if (!force) { 1195 if (!test_opt(sbi, DISCARD) || !se->valid_blocks || 1196 SM_I(sbi)->dcc_info->nr_discards >= 1197 SM_I(sbi)->dcc_info->max_discards) 1198 return false; 1199 } 1200 1201 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 1202 for (i = 0; i < entries; i++) 1203 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 1204 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 1205 1206 while (force || SM_I(sbi)->dcc_info->nr_discards <= 1207 SM_I(sbi)->dcc_info->max_discards) { 1208 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 1209 if (start >= max_blocks) 1210 break; 1211 1212 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 1213 if (force && start && end != max_blocks 1214 && (end - start) < cpc->trim_minlen) 1215 continue; 1216 1217 if (check_only) 1218 return true; 1219 1220 if (!de) { 1221 de = f2fs_kmem_cache_alloc(discard_entry_slab, 1222 GFP_F2FS_ZERO); 1223 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); 1224 list_add_tail(&de->list, head); 1225 } 1226 1227 for (i = start; i < end; i++) 1228 __set_bit_le(i, (void *)de->discard_map); 1229 1230 SM_I(sbi)->dcc_info->nr_discards += end - start; 1231 } 1232 return false; 1233 } 1234 1235 void release_discard_addrs(struct f2fs_sb_info *sbi) 1236 { 1237 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 1238 struct discard_entry *entry, *this; 1239 1240 /* drop caches */ 1241 list_for_each_entry_safe(entry, this, head, list) { 1242 list_del(&entry->list); 1243 kmem_cache_free(discard_entry_slab, entry); 1244 } 1245 } 1246 1247 /* 1248 * Should call clear_prefree_segments after checkpoint is done. 1249 */ 1250 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 1251 { 1252 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1253 unsigned int segno; 1254 1255 mutex_lock(&dirty_i->seglist_lock); 1256 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) 1257 __set_test_and_free(sbi, segno); 1258 mutex_unlock(&dirty_i->seglist_lock); 1259 } 1260 1261 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1262 { 1263 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 1264 struct discard_entry *entry, *this; 1265 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1266 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 1267 unsigned int start = 0, end = -1; 1268 unsigned int secno, start_segno; 1269 bool force = (cpc->reason & CP_DISCARD); 1270 1271 mutex_lock(&dirty_i->seglist_lock); 1272 1273 while (1) { 1274 int i; 1275 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); 1276 if (start >= MAIN_SEGS(sbi)) 1277 break; 1278 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), 1279 start + 1); 1280 1281 for (i = start; i < end; i++) 1282 clear_bit(i, prefree_map); 1283 1284 dirty_i->nr_dirty[PRE] -= end - start; 1285 1286 if (!test_opt(sbi, DISCARD)) 1287 continue; 1288 1289 if (force && start >= cpc->trim_start && 1290 (end - 1) <= cpc->trim_end) 1291 continue; 1292 1293 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { 1294 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 1295 (end - start) << sbi->log_blocks_per_seg); 1296 continue; 1297 } 1298 next: 1299 secno = GET_SEC_FROM_SEG(sbi, start); 1300 start_segno = GET_SEG_FROM_SEC(sbi, secno); 1301 if (!IS_CURSEC(sbi, secno) && 1302 !get_valid_blocks(sbi, start, true)) 1303 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), 1304 sbi->segs_per_sec << sbi->log_blocks_per_seg); 1305 1306 start = start_segno + sbi->segs_per_sec; 1307 if (start < end) 1308 goto next; 1309 else 1310 end = start - 1; 1311 } 1312 mutex_unlock(&dirty_i->seglist_lock); 1313 1314 /* send small discards */ 1315 list_for_each_entry_safe(entry, this, head, list) { 1316 unsigned int cur_pos = 0, next_pos, len, total_len = 0; 1317 bool is_valid = test_bit_le(0, entry->discard_map); 1318 1319 find_next: 1320 if (is_valid) { 1321 next_pos = find_next_zero_bit_le(entry->discard_map, 1322 sbi->blocks_per_seg, cur_pos); 1323 len = next_pos - cur_pos; 1324 1325 if (force && len < cpc->trim_minlen) 1326 goto skip; 1327 1328 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, 1329 len); 1330 cpc->trimmed += len; 1331 total_len += len; 1332 } else { 1333 next_pos = find_next_bit_le(entry->discard_map, 1334 sbi->blocks_per_seg, cur_pos); 1335 } 1336 skip: 1337 cur_pos = next_pos; 1338 is_valid = !is_valid; 1339 1340 if (cur_pos < sbi->blocks_per_seg) 1341 goto find_next; 1342 1343 list_del(&entry->list); 1344 SM_I(sbi)->dcc_info->nr_discards -= total_len; 1345 kmem_cache_free(discard_entry_slab, entry); 1346 } 1347 1348 wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); 1349 } 1350 1351 static int create_discard_cmd_control(struct f2fs_sb_info *sbi) 1352 { 1353 dev_t dev = sbi->sb->s_bdev->bd_dev; 1354 struct discard_cmd_control *dcc; 1355 int err = 0, i; 1356 1357 if (SM_I(sbi)->dcc_info) { 1358 dcc = SM_I(sbi)->dcc_info; 1359 goto init_thread; 1360 } 1361 1362 dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL); 1363 if (!dcc) 1364 return -ENOMEM; 1365 1366 INIT_LIST_HEAD(&dcc->entry_list); 1367 for (i = 0; i < MAX_PLIST_NUM; i++) 1368 INIT_LIST_HEAD(&dcc->pend_list[i]); 1369 INIT_LIST_HEAD(&dcc->wait_list); 1370 mutex_init(&dcc->cmd_lock); 1371 atomic_set(&dcc->issued_discard, 0); 1372 atomic_set(&dcc->issing_discard, 0); 1373 atomic_set(&dcc->discard_cmd_cnt, 0); 1374 dcc->nr_discards = 0; 1375 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; 1376 dcc->undiscard_blks = 0; 1377 dcc->root = RB_ROOT; 1378 1379 init_waitqueue_head(&dcc->discard_wait_queue); 1380 SM_I(sbi)->dcc_info = dcc; 1381 init_thread: 1382 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, 1383 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); 1384 if (IS_ERR(dcc->f2fs_issue_discard)) { 1385 err = PTR_ERR(dcc->f2fs_issue_discard); 1386 kfree(dcc); 1387 SM_I(sbi)->dcc_info = NULL; 1388 return err; 1389 } 1390 1391 return err; 1392 } 1393 1394 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) 1395 { 1396 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1397 1398 if (!dcc) 1399 return; 1400 1401 if (dcc->f2fs_issue_discard) { 1402 struct task_struct *discard_thread = dcc->f2fs_issue_discard; 1403 1404 dcc->f2fs_issue_discard = NULL; 1405 kthread_stop(discard_thread); 1406 } 1407 1408 kfree(dcc); 1409 SM_I(sbi)->dcc_info = NULL; 1410 } 1411 1412 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 1413 { 1414 struct sit_info *sit_i = SIT_I(sbi); 1415 1416 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { 1417 sit_i->dirty_sentries++; 1418 return false; 1419 } 1420 1421 return true; 1422 } 1423 1424 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 1425 unsigned int segno, int modified) 1426 { 1427 struct seg_entry *se = get_seg_entry(sbi, segno); 1428 se->type = type; 1429 if (modified) 1430 __mark_sit_entry_dirty(sbi, segno); 1431 } 1432 1433 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) 1434 { 1435 struct seg_entry *se; 1436 unsigned int segno, offset; 1437 long int new_vblocks; 1438 1439 segno = GET_SEGNO(sbi, blkaddr); 1440 1441 se = get_seg_entry(sbi, segno); 1442 new_vblocks = se->valid_blocks + del; 1443 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 1444 1445 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || 1446 (new_vblocks > sbi->blocks_per_seg))); 1447 1448 se->valid_blocks = new_vblocks; 1449 se->mtime = get_mtime(sbi); 1450 SIT_I(sbi)->max_mtime = se->mtime; 1451 1452 /* Update valid block bitmap */ 1453 if (del > 0) { 1454 if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) { 1455 #ifdef CONFIG_F2FS_CHECK_FS 1456 if (f2fs_test_and_set_bit(offset, 1457 se->cur_valid_map_mir)) 1458 f2fs_bug_on(sbi, 1); 1459 else 1460 WARN_ON(1); 1461 #else 1462 f2fs_bug_on(sbi, 1); 1463 #endif 1464 } 1465 if (f2fs_discard_en(sbi) && 1466 !f2fs_test_and_set_bit(offset, se->discard_map)) 1467 sbi->discard_blks--; 1468 1469 /* don't overwrite by SSR to keep node chain */ 1470 if (se->type == CURSEG_WARM_NODE) { 1471 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) 1472 se->ckpt_valid_blocks++; 1473 } 1474 } else { 1475 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { 1476 #ifdef CONFIG_F2FS_CHECK_FS 1477 if (!f2fs_test_and_clear_bit(offset, 1478 se->cur_valid_map_mir)) 1479 f2fs_bug_on(sbi, 1); 1480 else 1481 WARN_ON(1); 1482 #else 1483 f2fs_bug_on(sbi, 1); 1484 #endif 1485 } 1486 if (f2fs_discard_en(sbi) && 1487 f2fs_test_and_clear_bit(offset, se->discard_map)) 1488 sbi->discard_blks++; 1489 } 1490 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 1491 se->ckpt_valid_blocks += del; 1492 1493 __mark_sit_entry_dirty(sbi, segno); 1494 1495 /* update total number of valid blocks to be written in ckpt area */ 1496 SIT_I(sbi)->written_valid_blocks += del; 1497 1498 if (sbi->segs_per_sec > 1) 1499 get_sec_entry(sbi, segno)->valid_blocks += del; 1500 } 1501 1502 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) 1503 { 1504 update_sit_entry(sbi, new, 1); 1505 if (GET_SEGNO(sbi, old) != NULL_SEGNO) 1506 update_sit_entry(sbi, old, -1); 1507 1508 locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); 1509 locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); 1510 } 1511 1512 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 1513 { 1514 unsigned int segno = GET_SEGNO(sbi, addr); 1515 struct sit_info *sit_i = SIT_I(sbi); 1516 1517 f2fs_bug_on(sbi, addr == NULL_ADDR); 1518 if (addr == NEW_ADDR) 1519 return; 1520 1521 /* add it into sit main buffer */ 1522 mutex_lock(&sit_i->sentry_lock); 1523 1524 update_sit_entry(sbi, addr, -1); 1525 1526 /* add it into dirty seglist */ 1527 locate_dirty_segment(sbi, segno); 1528 1529 mutex_unlock(&sit_i->sentry_lock); 1530 } 1531 1532 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 1533 { 1534 struct sit_info *sit_i = SIT_I(sbi); 1535 unsigned int segno, offset; 1536 struct seg_entry *se; 1537 bool is_cp = false; 1538 1539 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) 1540 return true; 1541 1542 mutex_lock(&sit_i->sentry_lock); 1543 1544 segno = GET_SEGNO(sbi, blkaddr); 1545 se = get_seg_entry(sbi, segno); 1546 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 1547 1548 if (f2fs_test_bit(offset, se->ckpt_valid_map)) 1549 is_cp = true; 1550 1551 mutex_unlock(&sit_i->sentry_lock); 1552 1553 return is_cp; 1554 } 1555 1556 /* 1557 * This function should be resided under the curseg_mutex lock 1558 */ 1559 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, 1560 struct f2fs_summary *sum) 1561 { 1562 struct curseg_info *curseg = CURSEG_I(sbi, type); 1563 void *addr = curseg->sum_blk; 1564 addr += curseg->next_blkoff * sizeof(struct f2fs_summary); 1565 memcpy(addr, sum, sizeof(struct f2fs_summary)); 1566 } 1567 1568 /* 1569 * Calculate the number of current summary pages for writing 1570 */ 1571 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) 1572 { 1573 int valid_sum_count = 0; 1574 int i, sum_in_page; 1575 1576 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1577 if (sbi->ckpt->alloc_type[i] == SSR) 1578 valid_sum_count += sbi->blocks_per_seg; 1579 else { 1580 if (for_ra) 1581 valid_sum_count += le16_to_cpu( 1582 F2FS_CKPT(sbi)->cur_data_blkoff[i]); 1583 else 1584 valid_sum_count += curseg_blkoff(sbi, i); 1585 } 1586 } 1587 1588 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE - 1589 SUM_FOOTER_SIZE) / SUMMARY_SIZE; 1590 if (valid_sum_count <= sum_in_page) 1591 return 1; 1592 else if ((valid_sum_count - sum_in_page) <= 1593 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) 1594 return 2; 1595 return 3; 1596 } 1597 1598 /* 1599 * Caller should put this summary page 1600 */ 1601 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) 1602 { 1603 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); 1604 } 1605 1606 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) 1607 { 1608 struct page *page = grab_meta_page(sbi, blk_addr); 1609 void *dst = page_address(page); 1610 1611 if (src) 1612 memcpy(dst, src, PAGE_SIZE); 1613 else 1614 memset(dst, 0, PAGE_SIZE); 1615 set_page_dirty(page); 1616 f2fs_put_page(page, 1); 1617 } 1618 1619 static void write_sum_page(struct f2fs_sb_info *sbi, 1620 struct f2fs_summary_block *sum_blk, block_t blk_addr) 1621 { 1622 update_meta_page(sbi, (void *)sum_blk, blk_addr); 1623 } 1624 1625 static void write_current_sum_page(struct f2fs_sb_info *sbi, 1626 int type, block_t blk_addr) 1627 { 1628 struct curseg_info *curseg = CURSEG_I(sbi, type); 1629 struct page *page = grab_meta_page(sbi, blk_addr); 1630 struct f2fs_summary_block *src = curseg->sum_blk; 1631 struct f2fs_summary_block *dst; 1632 1633 dst = (struct f2fs_summary_block *)page_address(page); 1634 1635 mutex_lock(&curseg->curseg_mutex); 1636 1637 down_read(&curseg->journal_rwsem); 1638 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE); 1639 up_read(&curseg->journal_rwsem); 1640 1641 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE); 1642 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE); 1643 1644 mutex_unlock(&curseg->curseg_mutex); 1645 1646 set_page_dirty(page); 1647 f2fs_put_page(page, 1); 1648 } 1649 1650 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) 1651 { 1652 struct curseg_info *curseg = CURSEG_I(sbi, type); 1653 unsigned int segno = curseg->segno + 1; 1654 struct free_segmap_info *free_i = FREE_I(sbi); 1655 1656 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 1657 return !test_bit(segno, free_i->free_segmap); 1658 return 0; 1659 } 1660 1661 /* 1662 * Find a new segment from the free segments bitmap to right order 1663 * This function should be returned with success, otherwise BUG 1664 */ 1665 static void get_new_segment(struct f2fs_sb_info *sbi, 1666 unsigned int *newseg, bool new_sec, int dir) 1667 { 1668 struct free_segmap_info *free_i = FREE_I(sbi); 1669 unsigned int segno, secno, zoneno; 1670 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 1671 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); 1672 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); 1673 unsigned int left_start = hint; 1674 bool init = true; 1675 int go_left = 0; 1676 int i; 1677 1678 spin_lock(&free_i->segmap_lock); 1679 1680 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 1681 segno = find_next_zero_bit(free_i->free_segmap, 1682 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); 1683 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) 1684 goto got_it; 1685 } 1686 find_other_zone: 1687 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); 1688 if (secno >= MAIN_SECS(sbi)) { 1689 if (dir == ALLOC_RIGHT) { 1690 secno = find_next_zero_bit(free_i->free_secmap, 1691 MAIN_SECS(sbi), 0); 1692 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); 1693 } else { 1694 go_left = 1; 1695 left_start = hint - 1; 1696 } 1697 } 1698 if (go_left == 0) 1699 goto skip_left; 1700 1701 while (test_bit(left_start, free_i->free_secmap)) { 1702 if (left_start > 0) { 1703 left_start--; 1704 continue; 1705 } 1706 left_start = find_next_zero_bit(free_i->free_secmap, 1707 MAIN_SECS(sbi), 0); 1708 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); 1709 break; 1710 } 1711 secno = left_start; 1712 skip_left: 1713 hint = secno; 1714 segno = GET_SEG_FROM_SEC(sbi, secno); 1715 zoneno = GET_ZONE_FROM_SEC(sbi, secno); 1716 1717 /* give up on finding another zone */ 1718 if (!init) 1719 goto got_it; 1720 if (sbi->secs_per_zone == 1) 1721 goto got_it; 1722 if (zoneno == old_zoneno) 1723 goto got_it; 1724 if (dir == ALLOC_LEFT) { 1725 if (!go_left && zoneno + 1 >= total_zones) 1726 goto got_it; 1727 if (go_left && zoneno == 0) 1728 goto got_it; 1729 } 1730 for (i = 0; i < NR_CURSEG_TYPE; i++) 1731 if (CURSEG_I(sbi, i)->zone == zoneno) 1732 break; 1733 1734 if (i < NR_CURSEG_TYPE) { 1735 /* zone is in user, try another */ 1736 if (go_left) 1737 hint = zoneno * sbi->secs_per_zone - 1; 1738 else if (zoneno + 1 >= total_zones) 1739 hint = 0; 1740 else 1741 hint = (zoneno + 1) * sbi->secs_per_zone; 1742 init = false; 1743 goto find_other_zone; 1744 } 1745 got_it: 1746 /* set it as dirty segment in free segmap */ 1747 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 1748 __set_inuse(sbi, segno); 1749 *newseg = segno; 1750 spin_unlock(&free_i->segmap_lock); 1751 } 1752 1753 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) 1754 { 1755 struct curseg_info *curseg = CURSEG_I(sbi, type); 1756 struct summary_footer *sum_footer; 1757 1758 curseg->segno = curseg->next_segno; 1759 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); 1760 curseg->next_blkoff = 0; 1761 curseg->next_segno = NULL_SEGNO; 1762 1763 sum_footer = &(curseg->sum_blk->footer); 1764 memset(sum_footer, 0, sizeof(struct summary_footer)); 1765 if (IS_DATASEG(type)) 1766 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); 1767 if (IS_NODESEG(type)) 1768 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); 1769 __set_sit_entry_type(sbi, type, curseg->segno, modified); 1770 } 1771 1772 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) 1773 { 1774 /* if segs_per_sec is large than 1, we need to keep original policy. */ 1775 if (sbi->segs_per_sec != 1) 1776 return CURSEG_I(sbi, type)->segno; 1777 1778 if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) 1779 return 0; 1780 1781 if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) 1782 return SIT_I(sbi)->last_victim[ALLOC_NEXT]; 1783 return CURSEG_I(sbi, type)->segno; 1784 } 1785 1786 /* 1787 * Allocate a current working segment. 1788 * This function always allocates a free segment in LFS manner. 1789 */ 1790 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) 1791 { 1792 struct curseg_info *curseg = CURSEG_I(sbi, type); 1793 unsigned int segno = curseg->segno; 1794 int dir = ALLOC_LEFT; 1795 1796 write_sum_page(sbi, curseg->sum_blk, 1797 GET_SUM_BLOCK(sbi, segno)); 1798 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) 1799 dir = ALLOC_RIGHT; 1800 1801 if (test_opt(sbi, NOHEAP)) 1802 dir = ALLOC_RIGHT; 1803 1804 segno = __get_next_segno(sbi, type); 1805 get_new_segment(sbi, &segno, new_sec, dir); 1806 curseg->next_segno = segno; 1807 reset_curseg(sbi, type, 1); 1808 curseg->alloc_type = LFS; 1809 } 1810 1811 static void __next_free_blkoff(struct f2fs_sb_info *sbi, 1812 struct curseg_info *seg, block_t start) 1813 { 1814 struct seg_entry *se = get_seg_entry(sbi, seg->segno); 1815 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 1816 unsigned long *target_map = SIT_I(sbi)->tmp_map; 1817 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 1818 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 1819 int i, pos; 1820 1821 for (i = 0; i < entries; i++) 1822 target_map[i] = ckpt_map[i] | cur_map[i]; 1823 1824 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); 1825 1826 seg->next_blkoff = pos; 1827 } 1828 1829 /* 1830 * If a segment is written by LFS manner, next block offset is just obtained 1831 * by increasing the current block offset. However, if a segment is written by 1832 * SSR manner, next block offset obtained by calling __next_free_blkoff 1833 */ 1834 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, 1835 struct curseg_info *seg) 1836 { 1837 if (seg->alloc_type == SSR) 1838 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); 1839 else 1840 seg->next_blkoff++; 1841 } 1842 1843 /* 1844 * This function always allocates a used segment(from dirty seglist) by SSR 1845 * manner, so it should recover the existing segment information of valid blocks 1846 */ 1847 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) 1848 { 1849 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1850 struct curseg_info *curseg = CURSEG_I(sbi, type); 1851 unsigned int new_segno = curseg->next_segno; 1852 struct f2fs_summary_block *sum_node; 1853 struct page *sum_page; 1854 1855 write_sum_page(sbi, curseg->sum_blk, 1856 GET_SUM_BLOCK(sbi, curseg->segno)); 1857 __set_test_and_inuse(sbi, new_segno); 1858 1859 mutex_lock(&dirty_i->seglist_lock); 1860 __remove_dirty_segment(sbi, new_segno, PRE); 1861 __remove_dirty_segment(sbi, new_segno, DIRTY); 1862 mutex_unlock(&dirty_i->seglist_lock); 1863 1864 reset_curseg(sbi, type, 1); 1865 curseg->alloc_type = SSR; 1866 __next_free_blkoff(sbi, curseg, 0); 1867 1868 if (reuse) { 1869 sum_page = get_sum_page(sbi, new_segno); 1870 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 1871 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); 1872 f2fs_put_page(sum_page, 1); 1873 } 1874 } 1875 1876 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) 1877 { 1878 struct curseg_info *curseg = CURSEG_I(sbi, type); 1879 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 1880 unsigned segno = NULL_SEGNO; 1881 int i, cnt; 1882 bool reversed = false; 1883 1884 /* need_SSR() already forces to do this */ 1885 if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { 1886 curseg->next_segno = segno; 1887 return 1; 1888 } 1889 1890 /* For node segments, let's do SSR more intensively */ 1891 if (IS_NODESEG(type)) { 1892 if (type >= CURSEG_WARM_NODE) { 1893 reversed = true; 1894 i = CURSEG_COLD_NODE; 1895 } else { 1896 i = CURSEG_HOT_NODE; 1897 } 1898 cnt = NR_CURSEG_NODE_TYPE; 1899 } else { 1900 if (type >= CURSEG_WARM_DATA) { 1901 reversed = true; 1902 i = CURSEG_COLD_DATA; 1903 } else { 1904 i = CURSEG_HOT_DATA; 1905 } 1906 cnt = NR_CURSEG_DATA_TYPE; 1907 } 1908 1909 for (; cnt-- > 0; reversed ? i-- : i++) { 1910 if (i == type) 1911 continue; 1912 if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { 1913 curseg->next_segno = segno; 1914 return 1; 1915 } 1916 } 1917 return 0; 1918 } 1919 1920 /* 1921 * flush out current segment and replace it with new segment 1922 * This function should be returned with success, otherwise BUG 1923 */ 1924 static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 1925 int type, bool force) 1926 { 1927 struct curseg_info *curseg = CURSEG_I(sbi, type); 1928 1929 if (force) 1930 new_curseg(sbi, type, true); 1931 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && 1932 type == CURSEG_WARM_NODE) 1933 new_curseg(sbi, type, false); 1934 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) 1935 new_curseg(sbi, type, false); 1936 else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) 1937 change_curseg(sbi, type, true); 1938 else 1939 new_curseg(sbi, type, false); 1940 1941 stat_inc_seg_type(sbi, curseg); 1942 } 1943 1944 void allocate_new_segments(struct f2fs_sb_info *sbi) 1945 { 1946 struct curseg_info *curseg; 1947 unsigned int old_segno; 1948 int i; 1949 1950 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1951 curseg = CURSEG_I(sbi, i); 1952 old_segno = curseg->segno; 1953 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); 1954 locate_dirty_segment(sbi, old_segno); 1955 } 1956 } 1957 1958 static const struct segment_allocation default_salloc_ops = { 1959 .allocate_segment = allocate_segment_by_default, 1960 }; 1961 1962 bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1963 { 1964 __u64 trim_start = cpc->trim_start; 1965 bool has_candidate = false; 1966 1967 mutex_lock(&SIT_I(sbi)->sentry_lock); 1968 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { 1969 if (add_discard_addrs(sbi, cpc, true)) { 1970 has_candidate = true; 1971 break; 1972 } 1973 } 1974 mutex_unlock(&SIT_I(sbi)->sentry_lock); 1975 1976 cpc->trim_start = trim_start; 1977 return has_candidate; 1978 } 1979 1980 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 1981 { 1982 __u64 start = F2FS_BYTES_TO_BLK(range->start); 1983 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 1984 unsigned int start_segno, end_segno; 1985 struct cp_control cpc; 1986 int err = 0; 1987 1988 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 1989 return -EINVAL; 1990 1991 cpc.trimmed = 0; 1992 if (end <= MAIN_BLKADDR(sbi)) 1993 goto out; 1994 1995 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { 1996 f2fs_msg(sbi->sb, KERN_WARNING, 1997 "Found FS corruption, run fsck to fix."); 1998 goto out; 1999 } 2000 2001 /* start/end segment number in main_area */ 2002 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 2003 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 2004 GET_SEGNO(sbi, end); 2005 cpc.reason = CP_DISCARD; 2006 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); 2007 2008 /* do checkpoint to issue discard commands safely */ 2009 for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { 2010 cpc.trim_start = start_segno; 2011 2012 if (sbi->discard_blks == 0) 2013 break; 2014 else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) 2015 cpc.trim_end = end_segno; 2016 else 2017 cpc.trim_end = min_t(unsigned int, 2018 rounddown(start_segno + 2019 BATCHED_TRIM_SEGMENTS(sbi), 2020 sbi->segs_per_sec) - 1, end_segno); 2021 2022 mutex_lock(&sbi->gc_mutex); 2023 err = write_checkpoint(sbi, &cpc); 2024 mutex_unlock(&sbi->gc_mutex); 2025 if (err) 2026 break; 2027 2028 schedule(); 2029 } 2030 out: 2031 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); 2032 return err; 2033 } 2034 2035 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 2036 { 2037 struct curseg_info *curseg = CURSEG_I(sbi, type); 2038 if (curseg->next_blkoff < sbi->blocks_per_seg) 2039 return true; 2040 return false; 2041 } 2042 2043 static int __get_segment_type_2(struct page *page, enum page_type p_type) 2044 { 2045 if (p_type == DATA) 2046 return CURSEG_HOT_DATA; 2047 else 2048 return CURSEG_HOT_NODE; 2049 } 2050 2051 static int __get_segment_type_4(struct page *page, enum page_type p_type) 2052 { 2053 if (p_type == DATA) { 2054 struct inode *inode = page->mapping->host; 2055 2056 if (S_ISDIR(inode->i_mode)) 2057 return CURSEG_HOT_DATA; 2058 else 2059 return CURSEG_COLD_DATA; 2060 } else { 2061 if (IS_DNODE(page) && is_cold_node(page)) 2062 return CURSEG_WARM_NODE; 2063 else 2064 return CURSEG_COLD_NODE; 2065 } 2066 } 2067 2068 static int __get_segment_type_6(struct page *page, enum page_type p_type) 2069 { 2070 if (p_type == DATA) { 2071 struct inode *inode = page->mapping->host; 2072 2073 if (is_cold_data(page) || file_is_cold(inode)) 2074 return CURSEG_COLD_DATA; 2075 if (is_inode_flag_set(inode, FI_HOT_DATA)) 2076 return CURSEG_HOT_DATA; 2077 return CURSEG_WARM_DATA; 2078 } else { 2079 if (IS_DNODE(page)) 2080 return is_cold_node(page) ? CURSEG_WARM_NODE : 2081 CURSEG_HOT_NODE; 2082 return CURSEG_COLD_NODE; 2083 } 2084 } 2085 2086 static int __get_segment_type(struct page *page, enum page_type p_type) 2087 { 2088 switch (F2FS_P_SB(page)->active_logs) { 2089 case 2: 2090 return __get_segment_type_2(page, p_type); 2091 case 4: 2092 return __get_segment_type_4(page, p_type); 2093 } 2094 /* NR_CURSEG_TYPE(6) logs by default */ 2095 f2fs_bug_on(F2FS_P_SB(page), 2096 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); 2097 return __get_segment_type_6(page, p_type); 2098 } 2099 2100 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, 2101 block_t old_blkaddr, block_t *new_blkaddr, 2102 struct f2fs_summary *sum, int type) 2103 { 2104 struct sit_info *sit_i = SIT_I(sbi); 2105 struct curseg_info *curseg = CURSEG_I(sbi, type); 2106 2107 mutex_lock(&curseg->curseg_mutex); 2108 mutex_lock(&sit_i->sentry_lock); 2109 2110 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 2111 2112 f2fs_wait_discard_bio(sbi, *new_blkaddr); 2113 2114 /* 2115 * __add_sum_entry should be resided under the curseg_mutex 2116 * because, this function updates a summary entry in the 2117 * current summary block. 2118 */ 2119 __add_sum_entry(sbi, type, sum); 2120 2121 __refresh_next_blkoff(sbi, curseg); 2122 2123 stat_inc_block_count(sbi, curseg); 2124 2125 if (!__has_curseg_space(sbi, type)) 2126 sit_i->s_ops->allocate_segment(sbi, type, false); 2127 /* 2128 * SIT information should be updated after segment allocation, 2129 * since we need to keep dirty segments precisely under SSR. 2130 */ 2131 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 2132 2133 mutex_unlock(&sit_i->sentry_lock); 2134 2135 if (page && IS_NODESEG(type)) 2136 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 2137 2138 mutex_unlock(&curseg->curseg_mutex); 2139 } 2140 2141 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 2142 { 2143 int type = __get_segment_type(fio->page, fio->type); 2144 int err; 2145 2146 if (fio->type == NODE || fio->type == DATA) 2147 mutex_lock(&fio->sbi->wio_mutex[fio->type]); 2148 reallocate: 2149 allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, 2150 &fio->new_blkaddr, sum, type); 2151 2152 /* writeout dirty page into bdev */ 2153 err = f2fs_submit_page_mbio(fio); 2154 if (err == -EAGAIN) { 2155 fio->old_blkaddr = fio->new_blkaddr; 2156 goto reallocate; 2157 } 2158 2159 if (fio->type == NODE || fio->type == DATA) 2160 mutex_unlock(&fio->sbi->wio_mutex[fio->type]); 2161 } 2162 2163 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) 2164 { 2165 struct f2fs_io_info fio = { 2166 .sbi = sbi, 2167 .type = META, 2168 .op = REQ_OP_WRITE, 2169 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, 2170 .old_blkaddr = page->index, 2171 .new_blkaddr = page->index, 2172 .page = page, 2173 .encrypted_page = NULL, 2174 }; 2175 2176 if (unlikely(page->index >= MAIN_BLKADDR(sbi))) 2177 fio.op_flags &= ~REQ_META; 2178 2179 set_page_writeback(page); 2180 f2fs_submit_page_mbio(&fio); 2181 } 2182 2183 void write_node_page(unsigned int nid, struct f2fs_io_info *fio) 2184 { 2185 struct f2fs_summary sum; 2186 2187 set_summary(&sum, nid, 0, 0); 2188 do_write_page(&sum, fio); 2189 } 2190 2191 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) 2192 { 2193 struct f2fs_sb_info *sbi = fio->sbi; 2194 struct f2fs_summary sum; 2195 struct node_info ni; 2196 2197 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 2198 get_node_info(sbi, dn->nid, &ni); 2199 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 2200 do_write_page(&sum, fio); 2201 f2fs_update_data_blkaddr(dn, fio->new_blkaddr); 2202 } 2203 2204 int rewrite_data_page(struct f2fs_io_info *fio) 2205 { 2206 fio->new_blkaddr = fio->old_blkaddr; 2207 stat_inc_inplace_blocks(fio->sbi); 2208 return f2fs_submit_page_bio(fio); 2209 } 2210 2211 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 2212 block_t old_blkaddr, block_t new_blkaddr, 2213 bool recover_curseg, bool recover_newaddr) 2214 { 2215 struct sit_info *sit_i = SIT_I(sbi); 2216 struct curseg_info *curseg; 2217 unsigned int segno, old_cursegno; 2218 struct seg_entry *se; 2219 int type; 2220 unsigned short old_blkoff; 2221 2222 segno = GET_SEGNO(sbi, new_blkaddr); 2223 se = get_seg_entry(sbi, segno); 2224 type = se->type; 2225 2226 if (!recover_curseg) { 2227 /* for recovery flow */ 2228 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { 2229 if (old_blkaddr == NULL_ADDR) 2230 type = CURSEG_COLD_DATA; 2231 else 2232 type = CURSEG_WARM_DATA; 2233 } 2234 } else { 2235 if (!IS_CURSEG(sbi, segno)) 2236 type = CURSEG_WARM_DATA; 2237 } 2238 2239 curseg = CURSEG_I(sbi, type); 2240 2241 mutex_lock(&curseg->curseg_mutex); 2242 mutex_lock(&sit_i->sentry_lock); 2243 2244 old_cursegno = curseg->segno; 2245 old_blkoff = curseg->next_blkoff; 2246 2247 /* change the current segment */ 2248 if (segno != curseg->segno) { 2249 curseg->next_segno = segno; 2250 change_curseg(sbi, type, true); 2251 } 2252 2253 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); 2254 __add_sum_entry(sbi, type, sum); 2255 2256 if (!recover_curseg || recover_newaddr) 2257 update_sit_entry(sbi, new_blkaddr, 1); 2258 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 2259 update_sit_entry(sbi, old_blkaddr, -1); 2260 2261 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 2262 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); 2263 2264 locate_dirty_segment(sbi, old_cursegno); 2265 2266 if (recover_curseg) { 2267 if (old_cursegno != curseg->segno) { 2268 curseg->next_segno = old_cursegno; 2269 change_curseg(sbi, type, true); 2270 } 2271 curseg->next_blkoff = old_blkoff; 2272 } 2273 2274 mutex_unlock(&sit_i->sentry_lock); 2275 mutex_unlock(&curseg->curseg_mutex); 2276 } 2277 2278 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, 2279 block_t old_addr, block_t new_addr, 2280 unsigned char version, bool recover_curseg, 2281 bool recover_newaddr) 2282 { 2283 struct f2fs_summary sum; 2284 2285 set_summary(&sum, dn->nid, dn->ofs_in_node, version); 2286 2287 __f2fs_replace_block(sbi, &sum, old_addr, new_addr, 2288 recover_curseg, recover_newaddr); 2289 2290 f2fs_update_data_blkaddr(dn, new_addr); 2291 } 2292 2293 void f2fs_wait_on_page_writeback(struct page *page, 2294 enum page_type type, bool ordered) 2295 { 2296 if (PageWriteback(page)) { 2297 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 2298 2299 f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 2300 0, page->index, type, WRITE); 2301 if (ordered) 2302 wait_on_page_writeback(page); 2303 else 2304 wait_for_stable_page(page); 2305 } 2306 } 2307 2308 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, 2309 block_t blkaddr) 2310 { 2311 struct page *cpage; 2312 2313 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) 2314 return; 2315 2316 cpage = find_lock_page(META_MAPPING(sbi), blkaddr); 2317 if (cpage) { 2318 f2fs_wait_on_page_writeback(cpage, DATA, true); 2319 f2fs_put_page(cpage, 1); 2320 } 2321 } 2322 2323 static int read_compacted_summaries(struct f2fs_sb_info *sbi) 2324 { 2325 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 2326 struct curseg_info *seg_i; 2327 unsigned char *kaddr; 2328 struct page *page; 2329 block_t start; 2330 int i, j, offset; 2331 2332 start = start_sum_block(sbi); 2333 2334 page = get_meta_page(sbi, start++); 2335 kaddr = (unsigned char *)page_address(page); 2336 2337 /* Step 1: restore nat cache */ 2338 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 2339 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE); 2340 2341 /* Step 2: restore sit cache */ 2342 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 2343 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE); 2344 offset = 2 * SUM_JOURNAL_SIZE; 2345 2346 /* Step 3: restore summary entries */ 2347 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 2348 unsigned short blk_off; 2349 unsigned int segno; 2350 2351 seg_i = CURSEG_I(sbi, i); 2352 segno = le32_to_cpu(ckpt->cur_data_segno[i]); 2353 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); 2354 seg_i->next_segno = segno; 2355 reset_curseg(sbi, i, 0); 2356 seg_i->alloc_type = ckpt->alloc_type[i]; 2357 seg_i->next_blkoff = blk_off; 2358 2359 if (seg_i->alloc_type == SSR) 2360 blk_off = sbi->blocks_per_seg; 2361 2362 for (j = 0; j < blk_off; j++) { 2363 struct f2fs_summary *s; 2364 s = (struct f2fs_summary *)(kaddr + offset); 2365 seg_i->sum_blk->entries[j] = *s; 2366 offset += SUMMARY_SIZE; 2367 if (offset + SUMMARY_SIZE <= PAGE_SIZE - 2368 SUM_FOOTER_SIZE) 2369 continue; 2370 2371 f2fs_put_page(page, 1); 2372 page = NULL; 2373 2374 page = get_meta_page(sbi, start++); 2375 kaddr = (unsigned char *)page_address(page); 2376 offset = 0; 2377 } 2378 } 2379 f2fs_put_page(page, 1); 2380 return 0; 2381 } 2382 2383 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) 2384 { 2385 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 2386 struct f2fs_summary_block *sum; 2387 struct curseg_info *curseg; 2388 struct page *new; 2389 unsigned short blk_off; 2390 unsigned int segno = 0; 2391 block_t blk_addr = 0; 2392 2393 /* get segment number and block addr */ 2394 if (IS_DATASEG(type)) { 2395 segno = le32_to_cpu(ckpt->cur_data_segno[type]); 2396 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - 2397 CURSEG_HOT_DATA]); 2398 if (__exist_node_summaries(sbi)) 2399 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); 2400 else 2401 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); 2402 } else { 2403 segno = le32_to_cpu(ckpt->cur_node_segno[type - 2404 CURSEG_HOT_NODE]); 2405 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - 2406 CURSEG_HOT_NODE]); 2407 if (__exist_node_summaries(sbi)) 2408 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, 2409 type - CURSEG_HOT_NODE); 2410 else 2411 blk_addr = GET_SUM_BLOCK(sbi, segno); 2412 } 2413 2414 new = get_meta_page(sbi, blk_addr); 2415 sum = (struct f2fs_summary_block *)page_address(new); 2416 2417 if (IS_NODESEG(type)) { 2418 if (__exist_node_summaries(sbi)) { 2419 struct f2fs_summary *ns = &sum->entries[0]; 2420 int i; 2421 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { 2422 ns->version = 0; 2423 ns->ofs_in_node = 0; 2424 } 2425 } else { 2426 int err; 2427 2428 err = restore_node_summary(sbi, segno, sum); 2429 if (err) { 2430 f2fs_put_page(new, 1); 2431 return err; 2432 } 2433 } 2434 } 2435 2436 /* set uncompleted segment to curseg */ 2437 curseg = CURSEG_I(sbi, type); 2438 mutex_lock(&curseg->curseg_mutex); 2439 2440 /* update journal info */ 2441 down_write(&curseg->journal_rwsem); 2442 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE); 2443 up_write(&curseg->journal_rwsem); 2444 2445 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE); 2446 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE); 2447 curseg->next_segno = segno; 2448 reset_curseg(sbi, type, 0); 2449 curseg->alloc_type = ckpt->alloc_type[type]; 2450 curseg->next_blkoff = blk_off; 2451 mutex_unlock(&curseg->curseg_mutex); 2452 f2fs_put_page(new, 1); 2453 return 0; 2454 } 2455 2456 static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 2457 { 2458 int type = CURSEG_HOT_DATA; 2459 int err; 2460 2461 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { 2462 int npages = npages_for_summary_flush(sbi, true); 2463 2464 if (npages >= 2) 2465 ra_meta_pages(sbi, start_sum_block(sbi), npages, 2466 META_CP, true); 2467 2468 /* restore for compacted data summary */ 2469 if (read_compacted_summaries(sbi)) 2470 return -EINVAL; 2471 type = CURSEG_HOT_NODE; 2472 } 2473 2474 if (__exist_node_summaries(sbi)) 2475 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), 2476 NR_CURSEG_TYPE - type, META_CP, true); 2477 2478 for (; type <= CURSEG_COLD_NODE; type++) { 2479 err = read_normal_summaries(sbi, type); 2480 if (err) 2481 return err; 2482 } 2483 2484 return 0; 2485 } 2486 2487 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) 2488 { 2489 struct page *page; 2490 unsigned char *kaddr; 2491 struct f2fs_summary *summary; 2492 struct curseg_info *seg_i; 2493 int written_size = 0; 2494 int i, j; 2495 2496 page = grab_meta_page(sbi, blkaddr++); 2497 kaddr = (unsigned char *)page_address(page); 2498 2499 /* Step 1: write nat cache */ 2500 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); 2501 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE); 2502 written_size += SUM_JOURNAL_SIZE; 2503 2504 /* Step 2: write sit cache */ 2505 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); 2506 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE); 2507 written_size += SUM_JOURNAL_SIZE; 2508 2509 /* Step 3: write summary entries */ 2510 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 2511 unsigned short blkoff; 2512 seg_i = CURSEG_I(sbi, i); 2513 if (sbi->ckpt->alloc_type[i] == SSR) 2514 blkoff = sbi->blocks_per_seg; 2515 else 2516 blkoff = curseg_blkoff(sbi, i); 2517 2518 for (j = 0; j < blkoff; j++) { 2519 if (!page) { 2520 page = grab_meta_page(sbi, blkaddr++); 2521 kaddr = (unsigned char *)page_address(page); 2522 written_size = 0; 2523 } 2524 summary = (struct f2fs_summary *)(kaddr + written_size); 2525 *summary = seg_i->sum_blk->entries[j]; 2526 written_size += SUMMARY_SIZE; 2527 2528 if (written_size + SUMMARY_SIZE <= PAGE_SIZE - 2529 SUM_FOOTER_SIZE) 2530 continue; 2531 2532 set_page_dirty(page); 2533 f2fs_put_page(page, 1); 2534 page = NULL; 2535 } 2536 } 2537 if (page) { 2538 set_page_dirty(page); 2539 f2fs_put_page(page, 1); 2540 } 2541 } 2542 2543 static void write_normal_summaries(struct f2fs_sb_info *sbi, 2544 block_t blkaddr, int type) 2545 { 2546 int i, end; 2547 if (IS_DATASEG(type)) 2548 end = type + NR_CURSEG_DATA_TYPE; 2549 else 2550 end = type + NR_CURSEG_NODE_TYPE; 2551 2552 for (i = type; i < end; i++) 2553 write_current_sum_page(sbi, i, blkaddr + (i - type)); 2554 } 2555 2556 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 2557 { 2558 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) 2559 write_compacted_summaries(sbi, start_blk); 2560 else 2561 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); 2562 } 2563 2564 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 2565 { 2566 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); 2567 } 2568 2569 int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, 2570 unsigned int val, int alloc) 2571 { 2572 int i; 2573 2574 if (type == NAT_JOURNAL) { 2575 for (i = 0; i < nats_in_cursum(journal); i++) { 2576 if (le32_to_cpu(nid_in_journal(journal, i)) == val) 2577 return i; 2578 } 2579 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL)) 2580 return update_nats_in_cursum(journal, 1); 2581 } else if (type == SIT_JOURNAL) { 2582 for (i = 0; i < sits_in_cursum(journal); i++) 2583 if (le32_to_cpu(segno_in_journal(journal, i)) == val) 2584 return i; 2585 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL)) 2586 return update_sits_in_cursum(journal, 1); 2587 } 2588 return -1; 2589 } 2590 2591 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, 2592 unsigned int segno) 2593 { 2594 return get_meta_page(sbi, current_sit_addr(sbi, segno)); 2595 } 2596 2597 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, 2598 unsigned int start) 2599 { 2600 struct sit_info *sit_i = SIT_I(sbi); 2601 struct page *src_page, *dst_page; 2602 pgoff_t src_off, dst_off; 2603 void *src_addr, *dst_addr; 2604 2605 src_off = current_sit_addr(sbi, start); 2606 dst_off = next_sit_addr(sbi, src_off); 2607 2608 /* get current sit block page without lock */ 2609 src_page = get_meta_page(sbi, src_off); 2610 dst_page = grab_meta_page(sbi, dst_off); 2611 f2fs_bug_on(sbi, PageDirty(src_page)); 2612 2613 src_addr = page_address(src_page); 2614 dst_addr = page_address(dst_page); 2615 memcpy(dst_addr, src_addr, PAGE_SIZE); 2616 2617 set_page_dirty(dst_page); 2618 f2fs_put_page(src_page, 1); 2619 2620 set_to_next_sit(sit_i, start); 2621 2622 return dst_page; 2623 } 2624 2625 static struct sit_entry_set *grab_sit_entry_set(void) 2626 { 2627 struct sit_entry_set *ses = 2628 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); 2629 2630 ses->entry_cnt = 0; 2631 INIT_LIST_HEAD(&ses->set_list); 2632 return ses; 2633 } 2634 2635 static void release_sit_entry_set(struct sit_entry_set *ses) 2636 { 2637 list_del(&ses->set_list); 2638 kmem_cache_free(sit_entry_set_slab, ses); 2639 } 2640 2641 static void adjust_sit_entry_set(struct sit_entry_set *ses, 2642 struct list_head *head) 2643 { 2644 struct sit_entry_set *next = ses; 2645 2646 if (list_is_last(&ses->set_list, head)) 2647 return; 2648 2649 list_for_each_entry_continue(next, head, set_list) 2650 if (ses->entry_cnt <= next->entry_cnt) 2651 break; 2652 2653 list_move_tail(&ses->set_list, &next->set_list); 2654 } 2655 2656 static void add_sit_entry(unsigned int segno, struct list_head *head) 2657 { 2658 struct sit_entry_set *ses; 2659 unsigned int start_segno = START_SEGNO(segno); 2660 2661 list_for_each_entry(ses, head, set_list) { 2662 if (ses->start_segno == start_segno) { 2663 ses->entry_cnt++; 2664 adjust_sit_entry_set(ses, head); 2665 return; 2666 } 2667 } 2668 2669 ses = grab_sit_entry_set(); 2670 2671 ses->start_segno = start_segno; 2672 ses->entry_cnt++; 2673 list_add(&ses->set_list, head); 2674 } 2675 2676 static void add_sits_in_set(struct f2fs_sb_info *sbi) 2677 { 2678 struct f2fs_sm_info *sm_info = SM_I(sbi); 2679 struct list_head *set_list = &sm_info->sit_entry_set; 2680 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; 2681 unsigned int segno; 2682 2683 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) 2684 add_sit_entry(segno, set_list); 2685 } 2686 2687 static void remove_sits_in_journal(struct f2fs_sb_info *sbi) 2688 { 2689 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 2690 struct f2fs_journal *journal = curseg->journal; 2691 int i; 2692 2693 down_write(&curseg->journal_rwsem); 2694 for (i = 0; i < sits_in_cursum(journal); i++) { 2695 unsigned int segno; 2696 bool dirtied; 2697 2698 segno = le32_to_cpu(segno_in_journal(journal, i)); 2699 dirtied = __mark_sit_entry_dirty(sbi, segno); 2700 2701 if (!dirtied) 2702 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); 2703 } 2704 update_sits_in_cursum(journal, -i); 2705 up_write(&curseg->journal_rwsem); 2706 } 2707 2708 /* 2709 * CP calls this function, which flushes SIT entries including sit_journal, 2710 * and moves prefree segs to free segs. 2711 */ 2712 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 2713 { 2714 struct sit_info *sit_i = SIT_I(sbi); 2715 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 2716 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 2717 struct f2fs_journal *journal = curseg->journal; 2718 struct sit_entry_set *ses, *tmp; 2719 struct list_head *head = &SM_I(sbi)->sit_entry_set; 2720 bool to_journal = true; 2721 struct seg_entry *se; 2722 2723 mutex_lock(&sit_i->sentry_lock); 2724 2725 if (!sit_i->dirty_sentries) 2726 goto out; 2727 2728 /* 2729 * add and account sit entries of dirty bitmap in sit entry 2730 * set temporarily 2731 */ 2732 add_sits_in_set(sbi); 2733 2734 /* 2735 * if there are no enough space in journal to store dirty sit 2736 * entries, remove all entries from journal and add and account 2737 * them in sit entry set. 2738 */ 2739 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) 2740 remove_sits_in_journal(sbi); 2741 2742 /* 2743 * there are two steps to flush sit entries: 2744 * #1, flush sit entries to journal in current cold data summary block. 2745 * #2, flush sit entries to sit page. 2746 */ 2747 list_for_each_entry_safe(ses, tmp, head, set_list) { 2748 struct page *page = NULL; 2749 struct f2fs_sit_block *raw_sit = NULL; 2750 unsigned int start_segno = ses->start_segno; 2751 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, 2752 (unsigned long)MAIN_SEGS(sbi)); 2753 unsigned int segno = start_segno; 2754 2755 if (to_journal && 2756 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL)) 2757 to_journal = false; 2758 2759 if (to_journal) { 2760 down_write(&curseg->journal_rwsem); 2761 } else { 2762 page = get_next_sit_page(sbi, start_segno); 2763 raw_sit = page_address(page); 2764 } 2765 2766 /* flush dirty sit entries in region of current sit set */ 2767 for_each_set_bit_from(segno, bitmap, end) { 2768 int offset, sit_offset; 2769 2770 se = get_seg_entry(sbi, segno); 2771 2772 /* add discard candidates */ 2773 if (!(cpc->reason & CP_DISCARD)) { 2774 cpc->trim_start = segno; 2775 add_discard_addrs(sbi, cpc, false); 2776 } 2777 2778 if (to_journal) { 2779 offset = lookup_journal_in_cursum(journal, 2780 SIT_JOURNAL, segno, 1); 2781 f2fs_bug_on(sbi, offset < 0); 2782 segno_in_journal(journal, offset) = 2783 cpu_to_le32(segno); 2784 seg_info_to_raw_sit(se, 2785 &sit_in_journal(journal, offset)); 2786 } else { 2787 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 2788 seg_info_to_raw_sit(se, 2789 &raw_sit->entries[sit_offset]); 2790 } 2791 2792 __clear_bit(segno, bitmap); 2793 sit_i->dirty_sentries--; 2794 ses->entry_cnt--; 2795 } 2796 2797 if (to_journal) 2798 up_write(&curseg->journal_rwsem); 2799 else 2800 f2fs_put_page(page, 1); 2801 2802 f2fs_bug_on(sbi, ses->entry_cnt); 2803 release_sit_entry_set(ses); 2804 } 2805 2806 f2fs_bug_on(sbi, !list_empty(head)); 2807 f2fs_bug_on(sbi, sit_i->dirty_sentries); 2808 out: 2809 if (cpc->reason & CP_DISCARD) { 2810 __u64 trim_start = cpc->trim_start; 2811 2812 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 2813 add_discard_addrs(sbi, cpc, false); 2814 2815 cpc->trim_start = trim_start; 2816 } 2817 mutex_unlock(&sit_i->sentry_lock); 2818 2819 set_prefree_as_free_segments(sbi); 2820 } 2821 2822 static int build_sit_info(struct f2fs_sb_info *sbi) 2823 { 2824 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 2825 struct sit_info *sit_i; 2826 unsigned int sit_segs, start; 2827 char *src_bitmap; 2828 unsigned int bitmap_size; 2829 2830 /* allocate memory for SIT information */ 2831 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); 2832 if (!sit_i) 2833 return -ENOMEM; 2834 2835 SM_I(sbi)->sit_info = sit_i; 2836 2837 sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * 2838 sizeof(struct seg_entry), GFP_KERNEL); 2839 if (!sit_i->sentries) 2840 return -ENOMEM; 2841 2842 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2843 sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); 2844 if (!sit_i->dirty_sentries_bitmap) 2845 return -ENOMEM; 2846 2847 for (start = 0; start < MAIN_SEGS(sbi); start++) { 2848 sit_i->sentries[start].cur_valid_map 2849 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2850 sit_i->sentries[start].ckpt_valid_map 2851 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2852 if (!sit_i->sentries[start].cur_valid_map || 2853 !sit_i->sentries[start].ckpt_valid_map) 2854 return -ENOMEM; 2855 2856 #ifdef CONFIG_F2FS_CHECK_FS 2857 sit_i->sentries[start].cur_valid_map_mir 2858 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2859 if (!sit_i->sentries[start].cur_valid_map_mir) 2860 return -ENOMEM; 2861 #endif 2862 2863 if (f2fs_discard_en(sbi)) { 2864 sit_i->sentries[start].discard_map 2865 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2866 if (!sit_i->sentries[start].discard_map) 2867 return -ENOMEM; 2868 } 2869 } 2870 2871 sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2872 if (!sit_i->tmp_map) 2873 return -ENOMEM; 2874 2875 if (sbi->segs_per_sec > 1) { 2876 sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * 2877 sizeof(struct sec_entry), GFP_KERNEL); 2878 if (!sit_i->sec_entries) 2879 return -ENOMEM; 2880 } 2881 2882 /* get information related with SIT */ 2883 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; 2884 2885 /* setup SIT bitmap from ckeckpoint pack */ 2886 bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 2887 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 2888 2889 sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); 2890 if (!sit_i->sit_bitmap) 2891 return -ENOMEM; 2892 2893 #ifdef CONFIG_F2FS_CHECK_FS 2894 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); 2895 if (!sit_i->sit_bitmap_mir) 2896 return -ENOMEM; 2897 #endif 2898 2899 /* init SIT information */ 2900 sit_i->s_ops = &default_salloc_ops; 2901 2902 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 2903 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 2904 sit_i->written_valid_blocks = 0; 2905 sit_i->bitmap_size = bitmap_size; 2906 sit_i->dirty_sentries = 0; 2907 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 2908 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); 2909 sit_i->mounted_time = ktime_get_real_seconds(); 2910 mutex_init(&sit_i->sentry_lock); 2911 return 0; 2912 } 2913 2914 static int build_free_segmap(struct f2fs_sb_info *sbi) 2915 { 2916 struct free_segmap_info *free_i; 2917 unsigned int bitmap_size, sec_bitmap_size; 2918 2919 /* allocate memory for free segmap information */ 2920 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); 2921 if (!free_i) 2922 return -ENOMEM; 2923 2924 SM_I(sbi)->free_info = free_i; 2925 2926 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2927 free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); 2928 if (!free_i->free_segmap) 2929 return -ENOMEM; 2930 2931 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 2932 free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); 2933 if (!free_i->free_secmap) 2934 return -ENOMEM; 2935 2936 /* set all segments as dirty temporarily */ 2937 memset(free_i->free_segmap, 0xff, bitmap_size); 2938 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 2939 2940 /* init free segmap information */ 2941 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 2942 free_i->free_segments = 0; 2943 free_i->free_sections = 0; 2944 spin_lock_init(&free_i->segmap_lock); 2945 return 0; 2946 } 2947 2948 static int build_curseg(struct f2fs_sb_info *sbi) 2949 { 2950 struct curseg_info *array; 2951 int i; 2952 2953 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); 2954 if (!array) 2955 return -ENOMEM; 2956 2957 SM_I(sbi)->curseg_array = array; 2958 2959 for (i = 0; i < NR_CURSEG_TYPE; i++) { 2960 mutex_init(&array[i].curseg_mutex); 2961 array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL); 2962 if (!array[i].sum_blk) 2963 return -ENOMEM; 2964 init_rwsem(&array[i].journal_rwsem); 2965 array[i].journal = kzalloc(sizeof(struct f2fs_journal), 2966 GFP_KERNEL); 2967 if (!array[i].journal) 2968 return -ENOMEM; 2969 array[i].segno = NULL_SEGNO; 2970 array[i].next_blkoff = 0; 2971 } 2972 return restore_curseg_summaries(sbi); 2973 } 2974 2975 static void build_sit_entries(struct f2fs_sb_info *sbi) 2976 { 2977 struct sit_info *sit_i = SIT_I(sbi); 2978 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 2979 struct f2fs_journal *journal = curseg->journal; 2980 struct seg_entry *se; 2981 struct f2fs_sit_entry sit; 2982 int sit_blk_cnt = SIT_BLK_CNT(sbi); 2983 unsigned int i, start, end; 2984 unsigned int readed, start_blk = 0; 2985 2986 do { 2987 readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, 2988 META_SIT, true); 2989 2990 start = start_blk * sit_i->sents_per_block; 2991 end = (start_blk + readed) * sit_i->sents_per_block; 2992 2993 for (; start < end && start < MAIN_SEGS(sbi); start++) { 2994 struct f2fs_sit_block *sit_blk; 2995 struct page *page; 2996 2997 se = &sit_i->sentries[start]; 2998 page = get_current_sit_page(sbi, start); 2999 sit_blk = (struct f2fs_sit_block *)page_address(page); 3000 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 3001 f2fs_put_page(page, 1); 3002 3003 check_block_count(sbi, start, &sit); 3004 seg_info_from_raw_sit(se, &sit); 3005 3006 /* build discard map only one time */ 3007 if (f2fs_discard_en(sbi)) { 3008 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 3009 memset(se->discard_map, 0xff, 3010 SIT_VBLOCK_MAP_SIZE); 3011 } else { 3012 memcpy(se->discard_map, 3013 se->cur_valid_map, 3014 SIT_VBLOCK_MAP_SIZE); 3015 sbi->discard_blks += 3016 sbi->blocks_per_seg - 3017 se->valid_blocks; 3018 } 3019 } 3020 3021 if (sbi->segs_per_sec > 1) 3022 get_sec_entry(sbi, start)->valid_blocks += 3023 se->valid_blocks; 3024 } 3025 start_blk += readed; 3026 } while (start_blk < sit_blk_cnt); 3027 3028 down_read(&curseg->journal_rwsem); 3029 for (i = 0; i < sits_in_cursum(journal); i++) { 3030 unsigned int old_valid_blocks; 3031 3032 start = le32_to_cpu(segno_in_journal(journal, i)); 3033 se = &sit_i->sentries[start]; 3034 sit = sit_in_journal(journal, i); 3035 3036 old_valid_blocks = se->valid_blocks; 3037 3038 check_block_count(sbi, start, &sit); 3039 seg_info_from_raw_sit(se, &sit); 3040 3041 if (f2fs_discard_en(sbi)) { 3042 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 3043 memset(se->discard_map, 0xff, 3044 SIT_VBLOCK_MAP_SIZE); 3045 } else { 3046 memcpy(se->discard_map, se->cur_valid_map, 3047 SIT_VBLOCK_MAP_SIZE); 3048 sbi->discard_blks += old_valid_blocks - 3049 se->valid_blocks; 3050 } 3051 } 3052 3053 if (sbi->segs_per_sec > 1) 3054 get_sec_entry(sbi, start)->valid_blocks += 3055 se->valid_blocks - old_valid_blocks; 3056 } 3057 up_read(&curseg->journal_rwsem); 3058 } 3059 3060 static void init_free_segmap(struct f2fs_sb_info *sbi) 3061 { 3062 unsigned int start; 3063 int type; 3064 3065 for (start = 0; start < MAIN_SEGS(sbi); start++) { 3066 struct seg_entry *sentry = get_seg_entry(sbi, start); 3067 if (!sentry->valid_blocks) 3068 __set_free(sbi, start); 3069 else 3070 SIT_I(sbi)->written_valid_blocks += 3071 sentry->valid_blocks; 3072 } 3073 3074 /* set use the current segments */ 3075 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { 3076 struct curseg_info *curseg_t = CURSEG_I(sbi, type); 3077 __set_test_and_inuse(sbi, curseg_t->segno); 3078 } 3079 } 3080 3081 static void init_dirty_segmap(struct f2fs_sb_info *sbi) 3082 { 3083 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 3084 struct free_segmap_info *free_i = FREE_I(sbi); 3085 unsigned int segno = 0, offset = 0; 3086 unsigned short valid_blocks; 3087 3088 while (1) { 3089 /* find dirty segment based on free segmap */ 3090 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); 3091 if (segno >= MAIN_SEGS(sbi)) 3092 break; 3093 offset = segno + 1; 3094 valid_blocks = get_valid_blocks(sbi, segno, false); 3095 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) 3096 continue; 3097 if (valid_blocks > sbi->blocks_per_seg) { 3098 f2fs_bug_on(sbi, 1); 3099 continue; 3100 } 3101 mutex_lock(&dirty_i->seglist_lock); 3102 __locate_dirty_segment(sbi, segno, DIRTY); 3103 mutex_unlock(&dirty_i->seglist_lock); 3104 } 3105 } 3106 3107 static int init_victim_secmap(struct f2fs_sb_info *sbi) 3108 { 3109 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 3110 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 3111 3112 dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); 3113 if (!dirty_i->victim_secmap) 3114 return -ENOMEM; 3115 return 0; 3116 } 3117 3118 static int build_dirty_segmap(struct f2fs_sb_info *sbi) 3119 { 3120 struct dirty_seglist_info *dirty_i; 3121 unsigned int bitmap_size, i; 3122 3123 /* allocate memory for dirty segments list information */ 3124 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); 3125 if (!dirty_i) 3126 return -ENOMEM; 3127 3128 SM_I(sbi)->dirty_info = dirty_i; 3129 mutex_init(&dirty_i->seglist_lock); 3130 3131 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 3132 3133 for (i = 0; i < NR_DIRTY_TYPE; i++) { 3134 dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); 3135 if (!dirty_i->dirty_segmap[i]) 3136 return -ENOMEM; 3137 } 3138 3139 init_dirty_segmap(sbi); 3140 return init_victim_secmap(sbi); 3141 } 3142 3143 /* 3144 * Update min, max modified time for cost-benefit GC algorithm 3145 */ 3146 static void init_min_max_mtime(struct f2fs_sb_info *sbi) 3147 { 3148 struct sit_info *sit_i = SIT_I(sbi); 3149 unsigned int segno; 3150 3151 mutex_lock(&sit_i->sentry_lock); 3152 3153 sit_i->min_mtime = LLONG_MAX; 3154 3155 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 3156 unsigned int i; 3157 unsigned long long mtime = 0; 3158 3159 for (i = 0; i < sbi->segs_per_sec; i++) 3160 mtime += get_seg_entry(sbi, segno + i)->mtime; 3161 3162 mtime = div_u64(mtime, sbi->segs_per_sec); 3163 3164 if (sit_i->min_mtime > mtime) 3165 sit_i->min_mtime = mtime; 3166 } 3167 sit_i->max_mtime = get_mtime(sbi); 3168 mutex_unlock(&sit_i->sentry_lock); 3169 } 3170 3171 int build_segment_manager(struct f2fs_sb_info *sbi) 3172 { 3173 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 3174 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 3175 struct f2fs_sm_info *sm_info; 3176 int err; 3177 3178 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); 3179 if (!sm_info) 3180 return -ENOMEM; 3181 3182 /* init sm info */ 3183 sbi->sm_info = sm_info; 3184 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 3185 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); 3186 sm_info->segment_count = le32_to_cpu(raw_super->segment_count); 3187 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); 3188 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 3189 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 3190 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 3191 sm_info->rec_prefree_segments = sm_info->main_segments * 3192 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 3193 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) 3194 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; 3195 3196 if (!test_opt(sbi, LFS)) 3197 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 3198 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 3199 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 3200 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; 3201 3202 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; 3203 3204 INIT_LIST_HEAD(&sm_info->sit_entry_set); 3205 3206 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 3207 err = create_flush_cmd_control(sbi); 3208 if (err) 3209 return err; 3210 } 3211 3212 err = create_discard_cmd_control(sbi); 3213 if (err) 3214 return err; 3215 3216 err = build_sit_info(sbi); 3217 if (err) 3218 return err; 3219 err = build_free_segmap(sbi); 3220 if (err) 3221 return err; 3222 err = build_curseg(sbi); 3223 if (err) 3224 return err; 3225 3226 /* reinit free segmap based on SIT */ 3227 build_sit_entries(sbi); 3228 3229 init_free_segmap(sbi); 3230 err = build_dirty_segmap(sbi); 3231 if (err) 3232 return err; 3233 3234 init_min_max_mtime(sbi); 3235 return 0; 3236 } 3237 3238 static void discard_dirty_segmap(struct f2fs_sb_info *sbi, 3239 enum dirty_type dirty_type) 3240 { 3241 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 3242 3243 mutex_lock(&dirty_i->seglist_lock); 3244 kvfree(dirty_i->dirty_segmap[dirty_type]); 3245 dirty_i->nr_dirty[dirty_type] = 0; 3246 mutex_unlock(&dirty_i->seglist_lock); 3247 } 3248 3249 static void destroy_victim_secmap(struct f2fs_sb_info *sbi) 3250 { 3251 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 3252 kvfree(dirty_i->victim_secmap); 3253 } 3254 3255 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) 3256 { 3257 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 3258 int i; 3259 3260 if (!dirty_i) 3261 return; 3262 3263 /* discard pre-free/dirty segments list */ 3264 for (i = 0; i < NR_DIRTY_TYPE; i++) 3265 discard_dirty_segmap(sbi, i); 3266 3267 destroy_victim_secmap(sbi); 3268 SM_I(sbi)->dirty_info = NULL; 3269 kfree(dirty_i); 3270 } 3271 3272 static void destroy_curseg(struct f2fs_sb_info *sbi) 3273 { 3274 struct curseg_info *array = SM_I(sbi)->curseg_array; 3275 int i; 3276 3277 if (!array) 3278 return; 3279 SM_I(sbi)->curseg_array = NULL; 3280 for (i = 0; i < NR_CURSEG_TYPE; i++) { 3281 kfree(array[i].sum_blk); 3282 kfree(array[i].journal); 3283 } 3284 kfree(array); 3285 } 3286 3287 static void destroy_free_segmap(struct f2fs_sb_info *sbi) 3288 { 3289 struct free_segmap_info *free_i = SM_I(sbi)->free_info; 3290 if (!free_i) 3291 return; 3292 SM_I(sbi)->free_info = NULL; 3293 kvfree(free_i->free_segmap); 3294 kvfree(free_i->free_secmap); 3295 kfree(free_i); 3296 } 3297 3298 static void destroy_sit_info(struct f2fs_sb_info *sbi) 3299 { 3300 struct sit_info *sit_i = SIT_I(sbi); 3301 unsigned int start; 3302 3303 if (!sit_i) 3304 return; 3305 3306 if (sit_i->sentries) { 3307 for (start = 0; start < MAIN_SEGS(sbi); start++) { 3308 kfree(sit_i->sentries[start].cur_valid_map); 3309 #ifdef CONFIG_F2FS_CHECK_FS 3310 kfree(sit_i->sentries[start].cur_valid_map_mir); 3311 #endif 3312 kfree(sit_i->sentries[start].ckpt_valid_map); 3313 kfree(sit_i->sentries[start].discard_map); 3314 } 3315 } 3316 kfree(sit_i->tmp_map); 3317 3318 kvfree(sit_i->sentries); 3319 kvfree(sit_i->sec_entries); 3320 kvfree(sit_i->dirty_sentries_bitmap); 3321 3322 SM_I(sbi)->sit_info = NULL; 3323 kfree(sit_i->sit_bitmap); 3324 #ifdef CONFIG_F2FS_CHECK_FS 3325 kfree(sit_i->sit_bitmap_mir); 3326 #endif 3327 kfree(sit_i); 3328 } 3329 3330 void destroy_segment_manager(struct f2fs_sb_info *sbi) 3331 { 3332 struct f2fs_sm_info *sm_info = SM_I(sbi); 3333 3334 if (!sm_info) 3335 return; 3336 destroy_flush_cmd_control(sbi, true); 3337 destroy_discard_cmd_control(sbi); 3338 destroy_dirty_segmap(sbi); 3339 destroy_curseg(sbi); 3340 destroy_free_segmap(sbi); 3341 destroy_sit_info(sbi); 3342 sbi->sm_info = NULL; 3343 kfree(sm_info); 3344 } 3345 3346 int __init create_segment_manager_caches(void) 3347 { 3348 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 3349 sizeof(struct discard_entry)); 3350 if (!discard_entry_slab) 3351 goto fail; 3352 3353 discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd", 3354 sizeof(struct discard_cmd)); 3355 if (!discard_cmd_slab) 3356 goto destroy_discard_entry; 3357 3358 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", 3359 sizeof(struct sit_entry_set)); 3360 if (!sit_entry_set_slab) 3361 goto destroy_discard_cmd; 3362 3363 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", 3364 sizeof(struct inmem_pages)); 3365 if (!inmem_entry_slab) 3366 goto destroy_sit_entry_set; 3367 return 0; 3368 3369 destroy_sit_entry_set: 3370 kmem_cache_destroy(sit_entry_set_slab); 3371 destroy_discard_cmd: 3372 kmem_cache_destroy(discard_cmd_slab); 3373 destroy_discard_entry: 3374 kmem_cache_destroy(discard_entry_slab); 3375 fail: 3376 return -ENOMEM; 3377 } 3378 3379 void destroy_segment_manager_caches(void) 3380 { 3381 kmem_cache_destroy(sit_entry_set_slab); 3382 kmem_cache_destroy(discard_cmd_slab); 3383 kmem_cache_destroy(discard_entry_slab); 3384 kmem_cache_destroy(inmem_entry_slab); 3385 } 3386