1 /* 2 * fs/f2fs/node.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/mpage.h> 14 #include <linux/backing-dev.h> 15 #include <linux/blkdev.h> 16 #include <linux/pagevec.h> 17 #include <linux/swap.h> 18 19 #include "f2fs.h" 20 #include "node.h" 21 #include "segment.h" 22 #include "xattr.h" 23 #include "trace.h" 24 #include <trace/events/f2fs.h> 25 26 #define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) 27 28 static struct kmem_cache *nat_entry_slab; 29 static struct kmem_cache *free_nid_slab; 30 static struct kmem_cache *nat_entry_set_slab; 31 32 bool available_free_memory(struct f2fs_sb_info *sbi, int type) 33 { 34 struct f2fs_nm_info *nm_i = NM_I(sbi); 35 struct sysinfo val; 36 unsigned long avail_ram; 37 unsigned long mem_size = 0; 38 bool res = false; 39 40 si_meminfo(&val); 41 42 /* only uses low memory */ 43 avail_ram = val.totalram - val.totalhigh; 44 45 /* 46 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively 47 */ 48 if (type == FREE_NIDS) { 49 mem_size = (nm_i->nid_cnt[FREE_NID] * 50 sizeof(struct free_nid)) >> PAGE_SHIFT; 51 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 52 } else if (type == NAT_ENTRIES) { 53 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 54 PAGE_SHIFT; 55 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 56 if (excess_cached_nats(sbi)) 57 res = false; 58 } else if (type == DIRTY_DENTS) { 59 if (sbi->sb->s_bdi->wb.dirty_exceeded) 60 return false; 61 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); 62 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 63 } else if (type == INO_ENTRIES) { 64 int i; 65 66 for (i = 0; i < MAX_INO_ENTRY; i++) 67 mem_size += sbi->im[i].ino_num * 68 sizeof(struct ino_entry); 69 mem_size >>= PAGE_SHIFT; 70 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 71 } else if (type == EXTENT_CACHE) { 72 mem_size = (atomic_read(&sbi->total_ext_tree) * 73 sizeof(struct extent_tree) + 74 atomic_read(&sbi->total_ext_node) * 75 sizeof(struct extent_node)) >> PAGE_SHIFT; 76 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 77 } else if (type == INMEM_PAGES) { 78 /* it allows 20% / total_ram for inmemory pages */ 79 mem_size = get_pages(sbi, F2FS_INMEM_PAGES); 80 res = mem_size < (val.totalram / 5); 81 } else { 82 if (!sbi->sb->s_bdi->wb.dirty_exceeded) 83 return true; 84 } 85 return res; 86 } 87 88 static void clear_node_page_dirty(struct page *page) 89 { 90 struct address_space *mapping = page->mapping; 91 unsigned int long flags; 92 93 if (PageDirty(page)) { 94 spin_lock_irqsave(&mapping->tree_lock, flags); 95 radix_tree_tag_clear(&mapping->page_tree, 96 page_index(page), 97 PAGECACHE_TAG_DIRTY); 98 spin_unlock_irqrestore(&mapping->tree_lock, flags); 99 100 clear_page_dirty_for_io(page); 101 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); 102 } 103 ClearPageUptodate(page); 104 } 105 106 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 107 { 108 pgoff_t index = current_nat_addr(sbi, nid); 109 return get_meta_page(sbi, index); 110 } 111 112 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 113 { 114 struct page *src_page; 115 struct page *dst_page; 116 pgoff_t src_off; 117 pgoff_t dst_off; 118 void *src_addr; 119 void *dst_addr; 120 struct f2fs_nm_info *nm_i = NM_I(sbi); 121 122 src_off = current_nat_addr(sbi, nid); 123 dst_off = next_nat_addr(sbi, src_off); 124 125 /* get current nat block page with lock */ 126 src_page = get_meta_page(sbi, src_off); 127 dst_page = grab_meta_page(sbi, dst_off); 128 f2fs_bug_on(sbi, PageDirty(src_page)); 129 130 src_addr = page_address(src_page); 131 dst_addr = page_address(dst_page); 132 memcpy(dst_addr, src_addr, PAGE_SIZE); 133 set_page_dirty(dst_page); 134 f2fs_put_page(src_page, 1); 135 136 set_to_next_nat(nm_i, nid); 137 138 return dst_page; 139 } 140 141 static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) 142 { 143 struct nat_entry *new; 144 145 if (no_fail) 146 new = f2fs_kmem_cache_alloc(nat_entry_slab, 147 GFP_NOFS | __GFP_ZERO); 148 else 149 new = kmem_cache_alloc(nat_entry_slab, 150 GFP_NOFS | __GFP_ZERO); 151 if (new) { 152 nat_set_nid(new, nid); 153 nat_reset_flag(new); 154 } 155 return new; 156 } 157 158 static void __free_nat_entry(struct nat_entry *e) 159 { 160 kmem_cache_free(nat_entry_slab, e); 161 } 162 163 /* must be locked by nat_tree_lock */ 164 static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, 165 struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) 166 { 167 if (no_fail) 168 f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); 169 else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne)) 170 return NULL; 171 172 if (raw_ne) 173 node_info_from_raw_nat(&ne->ni, raw_ne); 174 list_add_tail(&ne->list, &nm_i->nat_entries); 175 nm_i->nat_cnt++; 176 return ne; 177 } 178 179 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 180 { 181 return radix_tree_lookup(&nm_i->nat_root, n); 182 } 183 184 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, 185 nid_t start, unsigned int nr, struct nat_entry **ep) 186 { 187 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); 188 } 189 190 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) 191 { 192 list_del(&e->list); 193 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); 194 nm_i->nat_cnt--; 195 __free_nat_entry(e); 196 } 197 198 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, 199 struct nat_entry *ne) 200 { 201 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 202 struct nat_entry_set *head; 203 204 head = radix_tree_lookup(&nm_i->nat_set_root, set); 205 if (!head) { 206 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); 207 208 INIT_LIST_HEAD(&head->entry_list); 209 INIT_LIST_HEAD(&head->set_list); 210 head->set = set; 211 head->entry_cnt = 0; 212 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); 213 } 214 215 if (get_nat_flag(ne, IS_DIRTY)) 216 goto refresh_list; 217 218 nm_i->dirty_nat_cnt++; 219 head->entry_cnt++; 220 set_nat_flag(ne, IS_DIRTY, true); 221 refresh_list: 222 if (nat_get_blkaddr(ne) == NEW_ADDR) 223 list_del_init(&ne->list); 224 else 225 list_move_tail(&ne->list, &head->entry_list); 226 } 227 228 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, 229 struct nat_entry_set *set, struct nat_entry *ne) 230 { 231 list_move_tail(&ne->list, &nm_i->nat_entries); 232 set_nat_flag(ne, IS_DIRTY, false); 233 set->entry_cnt--; 234 nm_i->dirty_nat_cnt--; 235 } 236 237 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, 238 nid_t start, unsigned int nr, struct nat_entry_set **ep) 239 { 240 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, 241 start, nr); 242 } 243 244 int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) 245 { 246 struct f2fs_nm_info *nm_i = NM_I(sbi); 247 struct nat_entry *e; 248 bool need = false; 249 250 down_read(&nm_i->nat_tree_lock); 251 e = __lookup_nat_cache(nm_i, nid); 252 if (e) { 253 if (!get_nat_flag(e, IS_CHECKPOINTED) && 254 !get_nat_flag(e, HAS_FSYNCED_INODE)) 255 need = true; 256 } 257 up_read(&nm_i->nat_tree_lock); 258 return need; 259 } 260 261 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 262 { 263 struct f2fs_nm_info *nm_i = NM_I(sbi); 264 struct nat_entry *e; 265 bool is_cp = true; 266 267 down_read(&nm_i->nat_tree_lock); 268 e = __lookup_nat_cache(nm_i, nid); 269 if (e && !get_nat_flag(e, IS_CHECKPOINTED)) 270 is_cp = false; 271 up_read(&nm_i->nat_tree_lock); 272 return is_cp; 273 } 274 275 bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) 276 { 277 struct f2fs_nm_info *nm_i = NM_I(sbi); 278 struct nat_entry *e; 279 bool need_update = true; 280 281 down_read(&nm_i->nat_tree_lock); 282 e = __lookup_nat_cache(nm_i, ino); 283 if (e && get_nat_flag(e, HAS_LAST_FSYNC) && 284 (get_nat_flag(e, IS_CHECKPOINTED) || 285 get_nat_flag(e, HAS_FSYNCED_INODE))) 286 need_update = false; 287 up_read(&nm_i->nat_tree_lock); 288 return need_update; 289 } 290 291 /* must be locked by nat_tree_lock */ 292 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, 293 struct f2fs_nat_entry *ne) 294 { 295 struct f2fs_nm_info *nm_i = NM_I(sbi); 296 struct nat_entry *new, *e; 297 298 new = __alloc_nat_entry(nid, false); 299 if (!new) 300 return; 301 302 down_write(&nm_i->nat_tree_lock); 303 e = __lookup_nat_cache(nm_i, nid); 304 if (!e) 305 e = __init_nat_entry(nm_i, new, ne, false); 306 else 307 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || 308 nat_get_blkaddr(e) != 309 le32_to_cpu(ne->block_addr) || 310 nat_get_version(e) != ne->version); 311 up_write(&nm_i->nat_tree_lock); 312 if (e != new) 313 __free_nat_entry(new); 314 } 315 316 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 317 block_t new_blkaddr, bool fsync_done) 318 { 319 struct f2fs_nm_info *nm_i = NM_I(sbi); 320 struct nat_entry *e; 321 struct nat_entry *new = __alloc_nat_entry(ni->nid, true); 322 323 down_write(&nm_i->nat_tree_lock); 324 e = __lookup_nat_cache(nm_i, ni->nid); 325 if (!e) { 326 e = __init_nat_entry(nm_i, new, NULL, true); 327 copy_node_info(&e->ni, ni); 328 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 329 } else if (new_blkaddr == NEW_ADDR) { 330 /* 331 * when nid is reallocated, 332 * previous nat entry can be remained in nat cache. 333 * So, reinitialize it with new information. 334 */ 335 copy_node_info(&e->ni, ni); 336 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 337 } 338 /* let's free early to reduce memory consumption */ 339 if (e != new) 340 __free_nat_entry(new); 341 342 /* sanity check */ 343 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); 344 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && 345 new_blkaddr == NULL_ADDR); 346 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && 347 new_blkaddr == NEW_ADDR); 348 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && 349 nat_get_blkaddr(e) != NULL_ADDR && 350 new_blkaddr == NEW_ADDR); 351 352 /* increment version no as node is removed */ 353 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 354 unsigned char version = nat_get_version(e); 355 nat_set_version(e, inc_node_version(version)); 356 } 357 358 /* change address */ 359 nat_set_blkaddr(e, new_blkaddr); 360 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) 361 set_nat_flag(e, IS_CHECKPOINTED, false); 362 __set_nat_cache_dirty(nm_i, e); 363 364 /* update fsync_mark if its inode nat entry is still alive */ 365 if (ni->nid != ni->ino) 366 e = __lookup_nat_cache(nm_i, ni->ino); 367 if (e) { 368 if (fsync_done && ni->nid == ni->ino) 369 set_nat_flag(e, HAS_FSYNCED_INODE, true); 370 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); 371 } 372 up_write(&nm_i->nat_tree_lock); 373 } 374 375 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 376 { 377 struct f2fs_nm_info *nm_i = NM_I(sbi); 378 int nr = nr_shrink; 379 380 if (!down_write_trylock(&nm_i->nat_tree_lock)) 381 return 0; 382 383 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 384 struct nat_entry *ne; 385 ne = list_first_entry(&nm_i->nat_entries, 386 struct nat_entry, list); 387 __del_from_nat_cache(nm_i, ne); 388 nr_shrink--; 389 } 390 up_write(&nm_i->nat_tree_lock); 391 return nr - nr_shrink; 392 } 393 394 /* 395 * This function always returns success 396 */ 397 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) 398 { 399 struct f2fs_nm_info *nm_i = NM_I(sbi); 400 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 401 struct f2fs_journal *journal = curseg->journal; 402 nid_t start_nid = START_NID(nid); 403 struct f2fs_nat_block *nat_blk; 404 struct page *page = NULL; 405 struct f2fs_nat_entry ne; 406 struct nat_entry *e; 407 pgoff_t index; 408 int i; 409 410 ni->nid = nid; 411 412 /* Check nat cache */ 413 down_read(&nm_i->nat_tree_lock); 414 e = __lookup_nat_cache(nm_i, nid); 415 if (e) { 416 ni->ino = nat_get_ino(e); 417 ni->blk_addr = nat_get_blkaddr(e); 418 ni->version = nat_get_version(e); 419 up_read(&nm_i->nat_tree_lock); 420 return; 421 } 422 423 memset(&ne, 0, sizeof(struct f2fs_nat_entry)); 424 425 /* Check current segment summary */ 426 down_read(&curseg->journal_rwsem); 427 i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); 428 if (i >= 0) { 429 ne = nat_in_journal(journal, i); 430 node_info_from_raw_nat(ni, &ne); 431 } 432 up_read(&curseg->journal_rwsem); 433 if (i >= 0) { 434 up_read(&nm_i->nat_tree_lock); 435 goto cache; 436 } 437 438 /* Fill node_info from nat page */ 439 index = current_nat_addr(sbi, nid); 440 up_read(&nm_i->nat_tree_lock); 441 442 page = get_meta_page(sbi, index); 443 nat_blk = (struct f2fs_nat_block *)page_address(page); 444 ne = nat_blk->entries[nid - start_nid]; 445 node_info_from_raw_nat(ni, &ne); 446 f2fs_put_page(page, 1); 447 cache: 448 /* cache nat entry */ 449 cache_nat_entry(sbi, nid, &ne); 450 } 451 452 /* 453 * readahead MAX_RA_NODE number of node pages. 454 */ 455 static void ra_node_pages(struct page *parent, int start, int n) 456 { 457 struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 458 struct blk_plug plug; 459 int i, end; 460 nid_t nid; 461 462 blk_start_plug(&plug); 463 464 /* Then, try readahead for siblings of the desired node */ 465 end = start + n; 466 end = min(end, NIDS_PER_BLOCK); 467 for (i = start; i < end; i++) { 468 nid = get_nid(parent, i, false); 469 ra_node_page(sbi, nid); 470 } 471 472 blk_finish_plug(&plug); 473 } 474 475 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 476 { 477 const long direct_index = ADDRS_PER_INODE(dn->inode); 478 const long direct_blks = ADDRS_PER_BLOCK; 479 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 480 unsigned int skipped_unit = ADDRS_PER_BLOCK; 481 int cur_level = dn->cur_level; 482 int max_level = dn->max_level; 483 pgoff_t base = 0; 484 485 if (!dn->max_level) 486 return pgofs + 1; 487 488 while (max_level-- > cur_level) 489 skipped_unit *= NIDS_PER_BLOCK; 490 491 switch (dn->max_level) { 492 case 3: 493 base += 2 * indirect_blks; 494 case 2: 495 base += 2 * direct_blks; 496 case 1: 497 base += direct_index; 498 break; 499 default: 500 f2fs_bug_on(F2FS_I_SB(dn->inode), 1); 501 } 502 503 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base; 504 } 505 506 /* 507 * The maximum depth is four. 508 * Offset[0] will have raw inode offset. 509 */ 510 static int get_node_path(struct inode *inode, long block, 511 int offset[4], unsigned int noffset[4]) 512 { 513 const long direct_index = ADDRS_PER_INODE(inode); 514 const long direct_blks = ADDRS_PER_BLOCK; 515 const long dptrs_per_blk = NIDS_PER_BLOCK; 516 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 517 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; 518 int n = 0; 519 int level = 0; 520 521 noffset[0] = 0; 522 523 if (block < direct_index) { 524 offset[n] = block; 525 goto got; 526 } 527 block -= direct_index; 528 if (block < direct_blks) { 529 offset[n++] = NODE_DIR1_BLOCK; 530 noffset[n] = 1; 531 offset[n] = block; 532 level = 1; 533 goto got; 534 } 535 block -= direct_blks; 536 if (block < direct_blks) { 537 offset[n++] = NODE_DIR2_BLOCK; 538 noffset[n] = 2; 539 offset[n] = block; 540 level = 1; 541 goto got; 542 } 543 block -= direct_blks; 544 if (block < indirect_blks) { 545 offset[n++] = NODE_IND1_BLOCK; 546 noffset[n] = 3; 547 offset[n++] = block / direct_blks; 548 noffset[n] = 4 + offset[n - 1]; 549 offset[n] = block % direct_blks; 550 level = 2; 551 goto got; 552 } 553 block -= indirect_blks; 554 if (block < indirect_blks) { 555 offset[n++] = NODE_IND2_BLOCK; 556 noffset[n] = 4 + dptrs_per_blk; 557 offset[n++] = block / direct_blks; 558 noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; 559 offset[n] = block % direct_blks; 560 level = 2; 561 goto got; 562 } 563 block -= indirect_blks; 564 if (block < dindirect_blks) { 565 offset[n++] = NODE_DIND_BLOCK; 566 noffset[n] = 5 + (dptrs_per_blk * 2); 567 offset[n++] = block / indirect_blks; 568 noffset[n] = 6 + (dptrs_per_blk * 2) + 569 offset[n - 1] * (dptrs_per_blk + 1); 570 offset[n++] = (block / direct_blks) % dptrs_per_blk; 571 noffset[n] = 7 + (dptrs_per_blk * 2) + 572 offset[n - 2] * (dptrs_per_blk + 1) + 573 offset[n - 1]; 574 offset[n] = block % direct_blks; 575 level = 3; 576 goto got; 577 } else { 578 return -E2BIG; 579 } 580 got: 581 return level; 582 } 583 584 /* 585 * Caller should call f2fs_put_dnode(dn). 586 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and 587 * f2fs_unlock_op() only if ro is not set RDONLY_NODE. 588 * In the case of RDONLY_NODE, we don't need to care about mutex. 589 */ 590 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 591 { 592 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 593 struct page *npage[4]; 594 struct page *parent = NULL; 595 int offset[4]; 596 unsigned int noffset[4]; 597 nid_t nids[4]; 598 int level, i = 0; 599 int err = 0; 600 601 level = get_node_path(dn->inode, index, offset, noffset); 602 if (level < 0) 603 return level; 604 605 nids[0] = dn->inode->i_ino; 606 npage[0] = dn->inode_page; 607 608 if (!npage[0]) { 609 npage[0] = get_node_page(sbi, nids[0]); 610 if (IS_ERR(npage[0])) 611 return PTR_ERR(npage[0]); 612 } 613 614 /* if inline_data is set, should not report any block indices */ 615 if (f2fs_has_inline_data(dn->inode) && index) { 616 err = -ENOENT; 617 f2fs_put_page(npage[0], 1); 618 goto release_out; 619 } 620 621 parent = npage[0]; 622 if (level != 0) 623 nids[1] = get_nid(parent, offset[0], true); 624 dn->inode_page = npage[0]; 625 dn->inode_page_locked = true; 626 627 /* get indirect or direct nodes */ 628 for (i = 1; i <= level; i++) { 629 bool done = false; 630 631 if (!nids[i] && mode == ALLOC_NODE) { 632 /* alloc new node */ 633 if (!alloc_nid(sbi, &(nids[i]))) { 634 err = -ENOSPC; 635 goto release_pages; 636 } 637 638 dn->nid = nids[i]; 639 npage[i] = new_node_page(dn, noffset[i]); 640 if (IS_ERR(npage[i])) { 641 alloc_nid_failed(sbi, nids[i]); 642 err = PTR_ERR(npage[i]); 643 goto release_pages; 644 } 645 646 set_nid(parent, offset[i - 1], nids[i], i == 1); 647 alloc_nid_done(sbi, nids[i]); 648 done = true; 649 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { 650 npage[i] = get_node_page_ra(parent, offset[i - 1]); 651 if (IS_ERR(npage[i])) { 652 err = PTR_ERR(npage[i]); 653 goto release_pages; 654 } 655 done = true; 656 } 657 if (i == 1) { 658 dn->inode_page_locked = false; 659 unlock_page(parent); 660 } else { 661 f2fs_put_page(parent, 1); 662 } 663 664 if (!done) { 665 npage[i] = get_node_page(sbi, nids[i]); 666 if (IS_ERR(npage[i])) { 667 err = PTR_ERR(npage[i]); 668 f2fs_put_page(npage[0], 0); 669 goto release_out; 670 } 671 } 672 if (i < level) { 673 parent = npage[i]; 674 nids[i + 1] = get_nid(parent, offset[i], false); 675 } 676 } 677 dn->nid = nids[level]; 678 dn->ofs_in_node = offset[level]; 679 dn->node_page = npage[level]; 680 dn->data_blkaddr = datablock_addr(dn->inode, 681 dn->node_page, dn->ofs_in_node); 682 return 0; 683 684 release_pages: 685 f2fs_put_page(parent, 1); 686 if (i > 1) 687 f2fs_put_page(npage[0], 0); 688 release_out: 689 dn->inode_page = NULL; 690 dn->node_page = NULL; 691 if (err == -ENOENT) { 692 dn->cur_level = i; 693 dn->max_level = level; 694 dn->ofs_in_node = offset[level]; 695 } 696 return err; 697 } 698 699 static void truncate_node(struct dnode_of_data *dn) 700 { 701 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 702 struct node_info ni; 703 704 get_node_info(sbi, dn->nid, &ni); 705 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 706 707 /* Deallocate node address */ 708 invalidate_blocks(sbi, ni.blk_addr); 709 dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); 710 set_node_addr(sbi, &ni, NULL_ADDR, false); 711 712 if (dn->nid == dn->inode->i_ino) { 713 remove_orphan_inode(sbi, dn->nid); 714 dec_valid_inode_count(sbi); 715 f2fs_inode_synced(dn->inode); 716 } 717 718 clear_node_page_dirty(dn->node_page); 719 set_sbi_flag(sbi, SBI_IS_DIRTY); 720 721 f2fs_put_page(dn->node_page, 1); 722 723 invalidate_mapping_pages(NODE_MAPPING(sbi), 724 dn->node_page->index, dn->node_page->index); 725 726 dn->node_page = NULL; 727 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); 728 } 729 730 static int truncate_dnode(struct dnode_of_data *dn) 731 { 732 struct page *page; 733 734 if (dn->nid == 0) 735 return 1; 736 737 /* get direct node */ 738 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 739 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 740 return 1; 741 else if (IS_ERR(page)) 742 return PTR_ERR(page); 743 744 /* Make dnode_of_data for parameter */ 745 dn->node_page = page; 746 dn->ofs_in_node = 0; 747 truncate_data_blocks(dn); 748 truncate_node(dn); 749 return 1; 750 } 751 752 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 753 int ofs, int depth) 754 { 755 struct dnode_of_data rdn = *dn; 756 struct page *page; 757 struct f2fs_node *rn; 758 nid_t child_nid; 759 unsigned int child_nofs; 760 int freed = 0; 761 int i, ret; 762 763 if (dn->nid == 0) 764 return NIDS_PER_BLOCK + 1; 765 766 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 767 768 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 769 if (IS_ERR(page)) { 770 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); 771 return PTR_ERR(page); 772 } 773 774 ra_node_pages(page, ofs, NIDS_PER_BLOCK); 775 776 rn = F2FS_NODE(page); 777 if (depth < 3) { 778 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { 779 child_nid = le32_to_cpu(rn->in.nid[i]); 780 if (child_nid == 0) 781 continue; 782 rdn.nid = child_nid; 783 ret = truncate_dnode(&rdn); 784 if (ret < 0) 785 goto out_err; 786 if (set_nid(page, i, 0, false)) 787 dn->node_changed = true; 788 } 789 } else { 790 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; 791 for (i = ofs; i < NIDS_PER_BLOCK; i++) { 792 child_nid = le32_to_cpu(rn->in.nid[i]); 793 if (child_nid == 0) { 794 child_nofs += NIDS_PER_BLOCK + 1; 795 continue; 796 } 797 rdn.nid = child_nid; 798 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); 799 if (ret == (NIDS_PER_BLOCK + 1)) { 800 if (set_nid(page, i, 0, false)) 801 dn->node_changed = true; 802 child_nofs += ret; 803 } else if (ret < 0 && ret != -ENOENT) { 804 goto out_err; 805 } 806 } 807 freed = child_nofs; 808 } 809 810 if (!ofs) { 811 /* remove current indirect node */ 812 dn->node_page = page; 813 truncate_node(dn); 814 freed++; 815 } else { 816 f2fs_put_page(page, 1); 817 } 818 trace_f2fs_truncate_nodes_exit(dn->inode, freed); 819 return freed; 820 821 out_err: 822 f2fs_put_page(page, 1); 823 trace_f2fs_truncate_nodes_exit(dn->inode, ret); 824 return ret; 825 } 826 827 static int truncate_partial_nodes(struct dnode_of_data *dn, 828 struct f2fs_inode *ri, int *offset, int depth) 829 { 830 struct page *pages[2]; 831 nid_t nid[3]; 832 nid_t child_nid; 833 int err = 0; 834 int i; 835 int idx = depth - 2; 836 837 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 838 if (!nid[0]) 839 return 0; 840 841 /* get indirect nodes in the path */ 842 for (i = 0; i < idx + 1; i++) { 843 /* reference count'll be increased */ 844 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); 845 if (IS_ERR(pages[i])) { 846 err = PTR_ERR(pages[i]); 847 idx = i - 1; 848 goto fail; 849 } 850 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 851 } 852 853 ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); 854 855 /* free direct nodes linked to a partial indirect node */ 856 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 857 child_nid = get_nid(pages[idx], i, false); 858 if (!child_nid) 859 continue; 860 dn->nid = child_nid; 861 err = truncate_dnode(dn); 862 if (err < 0) 863 goto fail; 864 if (set_nid(pages[idx], i, 0, false)) 865 dn->node_changed = true; 866 } 867 868 if (offset[idx + 1] == 0) { 869 dn->node_page = pages[idx]; 870 dn->nid = nid[idx]; 871 truncate_node(dn); 872 } else { 873 f2fs_put_page(pages[idx], 1); 874 } 875 offset[idx]++; 876 offset[idx + 1] = 0; 877 idx--; 878 fail: 879 for (i = idx; i >= 0; i--) 880 f2fs_put_page(pages[i], 1); 881 882 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); 883 884 return err; 885 } 886 887 /* 888 * All the block addresses of data and nodes should be nullified. 889 */ 890 int truncate_inode_blocks(struct inode *inode, pgoff_t from) 891 { 892 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 893 int err = 0, cont = 1; 894 int level, offset[4], noffset[4]; 895 unsigned int nofs = 0; 896 struct f2fs_inode *ri; 897 struct dnode_of_data dn; 898 struct page *page; 899 900 trace_f2fs_truncate_inode_blocks_enter(inode, from); 901 902 level = get_node_path(inode, from, offset, noffset); 903 if (level < 0) 904 return level; 905 906 page = get_node_page(sbi, inode->i_ino); 907 if (IS_ERR(page)) { 908 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); 909 return PTR_ERR(page); 910 } 911 912 set_new_dnode(&dn, inode, page, NULL, 0); 913 unlock_page(page); 914 915 ri = F2FS_INODE(page); 916 switch (level) { 917 case 0: 918 case 1: 919 nofs = noffset[1]; 920 break; 921 case 2: 922 nofs = noffset[1]; 923 if (!offset[level - 1]) 924 goto skip_partial; 925 err = truncate_partial_nodes(&dn, ri, offset, level); 926 if (err < 0 && err != -ENOENT) 927 goto fail; 928 nofs += 1 + NIDS_PER_BLOCK; 929 break; 930 case 3: 931 nofs = 5 + 2 * NIDS_PER_BLOCK; 932 if (!offset[level - 1]) 933 goto skip_partial; 934 err = truncate_partial_nodes(&dn, ri, offset, level); 935 if (err < 0 && err != -ENOENT) 936 goto fail; 937 break; 938 default: 939 BUG(); 940 } 941 942 skip_partial: 943 while (cont) { 944 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 945 switch (offset[0]) { 946 case NODE_DIR1_BLOCK: 947 case NODE_DIR2_BLOCK: 948 err = truncate_dnode(&dn); 949 break; 950 951 case NODE_IND1_BLOCK: 952 case NODE_IND2_BLOCK: 953 err = truncate_nodes(&dn, nofs, offset[1], 2); 954 break; 955 956 case NODE_DIND_BLOCK: 957 err = truncate_nodes(&dn, nofs, offset[1], 3); 958 cont = 0; 959 break; 960 961 default: 962 BUG(); 963 } 964 if (err < 0 && err != -ENOENT) 965 goto fail; 966 if (offset[1] == 0 && 967 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 968 lock_page(page); 969 BUG_ON(page->mapping != NODE_MAPPING(sbi)); 970 f2fs_wait_on_page_writeback(page, NODE, true); 971 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 972 set_page_dirty(page); 973 unlock_page(page); 974 } 975 offset[1] = 0; 976 offset[0]++; 977 nofs += err; 978 } 979 fail: 980 f2fs_put_page(page, 0); 981 trace_f2fs_truncate_inode_blocks_exit(inode, err); 982 return err > 0 ? 0 : err; 983 } 984 985 /* caller must lock inode page */ 986 int truncate_xattr_node(struct inode *inode) 987 { 988 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 989 nid_t nid = F2FS_I(inode)->i_xattr_nid; 990 struct dnode_of_data dn; 991 struct page *npage; 992 993 if (!nid) 994 return 0; 995 996 npage = get_node_page(sbi, nid); 997 if (IS_ERR(npage)) 998 return PTR_ERR(npage); 999 1000 f2fs_i_xnid_write(inode, 0); 1001 1002 set_new_dnode(&dn, inode, NULL, npage, nid); 1003 truncate_node(&dn); 1004 return 0; 1005 } 1006 1007 /* 1008 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 1009 * f2fs_unlock_op(). 1010 */ 1011 int remove_inode_page(struct inode *inode) 1012 { 1013 struct dnode_of_data dn; 1014 int err; 1015 1016 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 1017 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); 1018 if (err) 1019 return err; 1020 1021 err = truncate_xattr_node(inode); 1022 if (err) { 1023 f2fs_put_dnode(&dn); 1024 return err; 1025 } 1026 1027 /* remove potential inline_data blocks */ 1028 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1029 S_ISLNK(inode->i_mode)) 1030 truncate_data_blocks_range(&dn, 1); 1031 1032 /* 0 is possible, after f2fs_new_inode() has failed */ 1033 f2fs_bug_on(F2FS_I_SB(inode), 1034 inode->i_blocks != 0 && inode->i_blocks != 8); 1035 1036 /* will put inode & node pages */ 1037 truncate_node(&dn); 1038 return 0; 1039 } 1040 1041 struct page *new_inode_page(struct inode *inode) 1042 { 1043 struct dnode_of_data dn; 1044 1045 /* allocate inode page for new inode */ 1046 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 1047 1048 /* caller should f2fs_put_page(page, 1); */ 1049 return new_node_page(&dn, 0); 1050 } 1051 1052 struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) 1053 { 1054 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1055 struct node_info new_ni; 1056 struct page *page; 1057 int err; 1058 1059 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1060 return ERR_PTR(-EPERM); 1061 1062 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); 1063 if (!page) 1064 return ERR_PTR(-ENOMEM); 1065 1066 if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) 1067 goto fail; 1068 1069 #ifdef CONFIG_F2FS_CHECK_FS 1070 get_node_info(sbi, dn->nid, &new_ni); 1071 f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); 1072 #endif 1073 new_ni.nid = dn->nid; 1074 new_ni.ino = dn->inode->i_ino; 1075 new_ni.blk_addr = NULL_ADDR; 1076 new_ni.flag = 0; 1077 new_ni.version = 0; 1078 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1079 1080 f2fs_wait_on_page_writeback(page, NODE, true); 1081 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 1082 set_cold_node(dn->inode, page); 1083 if (!PageUptodate(page)) 1084 SetPageUptodate(page); 1085 if (set_page_dirty(page)) 1086 dn->node_changed = true; 1087 1088 if (f2fs_has_xattr_block(ofs)) 1089 f2fs_i_xnid_write(dn->inode, dn->nid); 1090 1091 if (ofs == 0) 1092 inc_valid_inode_count(sbi); 1093 return page; 1094 1095 fail: 1096 clear_node_page_dirty(page); 1097 f2fs_put_page(page, 1); 1098 return ERR_PTR(err); 1099 } 1100 1101 /* 1102 * Caller should do after getting the following values. 1103 * 0: f2fs_put_page(page, 0) 1104 * LOCKED_PAGE or error: f2fs_put_page(page, 1) 1105 */ 1106 static int read_node_page(struct page *page, int op_flags) 1107 { 1108 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1109 struct node_info ni; 1110 struct f2fs_io_info fio = { 1111 .sbi = sbi, 1112 .type = NODE, 1113 .op = REQ_OP_READ, 1114 .op_flags = op_flags, 1115 .page = page, 1116 .encrypted_page = NULL, 1117 }; 1118 1119 if (PageUptodate(page)) 1120 return LOCKED_PAGE; 1121 1122 get_node_info(sbi, page->index, &ni); 1123 1124 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1125 ClearPageUptodate(page); 1126 return -ENOENT; 1127 } 1128 1129 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; 1130 return f2fs_submit_page_bio(&fio); 1131 } 1132 1133 /* 1134 * Readahead a node page 1135 */ 1136 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 1137 { 1138 struct page *apage; 1139 int err; 1140 1141 if (!nid) 1142 return; 1143 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1144 1145 rcu_read_lock(); 1146 apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); 1147 rcu_read_unlock(); 1148 if (apage) 1149 return; 1150 1151 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1152 if (!apage) 1153 return; 1154 1155 err = read_node_page(apage, REQ_RAHEAD); 1156 f2fs_put_page(apage, err ? 1 : 0); 1157 } 1158 1159 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, 1160 struct page *parent, int start) 1161 { 1162 struct page *page; 1163 int err; 1164 1165 if (!nid) 1166 return ERR_PTR(-ENOENT); 1167 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1168 repeat: 1169 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1170 if (!page) 1171 return ERR_PTR(-ENOMEM); 1172 1173 err = read_node_page(page, 0); 1174 if (err < 0) { 1175 f2fs_put_page(page, 1); 1176 return ERR_PTR(err); 1177 } else if (err == LOCKED_PAGE) { 1178 err = 0; 1179 goto page_hit; 1180 } 1181 1182 if (parent) 1183 ra_node_pages(parent, start + 1, MAX_RA_NODE); 1184 1185 lock_page(page); 1186 1187 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1188 f2fs_put_page(page, 1); 1189 goto repeat; 1190 } 1191 1192 if (unlikely(!PageUptodate(page))) { 1193 err = -EIO; 1194 goto out_err; 1195 } 1196 1197 if (!f2fs_inode_chksum_verify(sbi, page)) { 1198 err = -EBADMSG; 1199 goto out_err; 1200 } 1201 page_hit: 1202 if(unlikely(nid != nid_of_node(page))) { 1203 f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " 1204 "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", 1205 nid, nid_of_node(page), ino_of_node(page), 1206 ofs_of_node(page), cpver_of_node(page), 1207 next_blkaddr_of_node(page)); 1208 err = -EINVAL; 1209 out_err: 1210 ClearPageUptodate(page); 1211 f2fs_put_page(page, 1); 1212 return ERR_PTR(err); 1213 } 1214 return page; 1215 } 1216 1217 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 1218 { 1219 return __get_node_page(sbi, nid, NULL, 0); 1220 } 1221 1222 struct page *get_node_page_ra(struct page *parent, int start) 1223 { 1224 struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 1225 nid_t nid = get_nid(parent, start, false); 1226 1227 return __get_node_page(sbi, nid, parent, start); 1228 } 1229 1230 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) 1231 { 1232 struct inode *inode; 1233 struct page *page; 1234 int ret; 1235 1236 /* should flush inline_data before evict_inode */ 1237 inode = ilookup(sbi->sb, ino); 1238 if (!inode) 1239 return; 1240 1241 page = f2fs_pagecache_get_page(inode->i_mapping, 0, 1242 FGP_LOCK|FGP_NOWAIT, 0); 1243 if (!page) 1244 goto iput_out; 1245 1246 if (!PageUptodate(page)) 1247 goto page_out; 1248 1249 if (!PageDirty(page)) 1250 goto page_out; 1251 1252 if (!clear_page_dirty_for_io(page)) 1253 goto page_out; 1254 1255 ret = f2fs_write_inline_data(inode, page); 1256 inode_dec_dirty_pages(inode); 1257 remove_dirty_inode(inode); 1258 if (ret) 1259 set_page_dirty(page); 1260 page_out: 1261 f2fs_put_page(page, 1); 1262 iput_out: 1263 iput(inode); 1264 } 1265 1266 static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1267 { 1268 pgoff_t index; 1269 struct pagevec pvec; 1270 struct page *last_page = NULL; 1271 int nr_pages; 1272 1273 pagevec_init(&pvec); 1274 index = 0; 1275 1276 while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1277 PAGECACHE_TAG_DIRTY))) { 1278 int i; 1279 1280 for (i = 0; i < nr_pages; i++) { 1281 struct page *page = pvec.pages[i]; 1282 1283 if (unlikely(f2fs_cp_error(sbi))) { 1284 f2fs_put_page(last_page, 0); 1285 pagevec_release(&pvec); 1286 return ERR_PTR(-EIO); 1287 } 1288 1289 if (!IS_DNODE(page) || !is_cold_node(page)) 1290 continue; 1291 if (ino_of_node(page) != ino) 1292 continue; 1293 1294 lock_page(page); 1295 1296 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1297 continue_unlock: 1298 unlock_page(page); 1299 continue; 1300 } 1301 if (ino_of_node(page) != ino) 1302 goto continue_unlock; 1303 1304 if (!PageDirty(page)) { 1305 /* someone wrote it for us */ 1306 goto continue_unlock; 1307 } 1308 1309 if (last_page) 1310 f2fs_put_page(last_page, 0); 1311 1312 get_page(page); 1313 last_page = page; 1314 unlock_page(page); 1315 } 1316 pagevec_release(&pvec); 1317 cond_resched(); 1318 } 1319 return last_page; 1320 } 1321 1322 static int __write_node_page(struct page *page, bool atomic, bool *submitted, 1323 struct writeback_control *wbc, bool do_balance, 1324 enum iostat_type io_type) 1325 { 1326 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1327 nid_t nid; 1328 struct node_info ni; 1329 struct f2fs_io_info fio = { 1330 .sbi = sbi, 1331 .ino = ino_of_node(page), 1332 .type = NODE, 1333 .op = REQ_OP_WRITE, 1334 .op_flags = wbc_to_write_flags(wbc), 1335 .page = page, 1336 .encrypted_page = NULL, 1337 .submitted = false, 1338 .io_type = io_type, 1339 }; 1340 1341 trace_f2fs_writepage(page, NODE); 1342 1343 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1344 goto redirty_out; 1345 if (unlikely(f2fs_cp_error(sbi))) 1346 goto redirty_out; 1347 1348 /* get old block addr of this node page */ 1349 nid = nid_of_node(page); 1350 f2fs_bug_on(sbi, page->index != nid); 1351 1352 if (wbc->for_reclaim) { 1353 if (!down_read_trylock(&sbi->node_write)) 1354 goto redirty_out; 1355 } else { 1356 down_read(&sbi->node_write); 1357 } 1358 1359 get_node_info(sbi, nid, &ni); 1360 1361 /* This page is already truncated */ 1362 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1363 ClearPageUptodate(page); 1364 dec_page_count(sbi, F2FS_DIRTY_NODES); 1365 up_read(&sbi->node_write); 1366 unlock_page(page); 1367 return 0; 1368 } 1369 1370 if (atomic && !test_opt(sbi, NOBARRIER)) 1371 fio.op_flags |= REQ_PREFLUSH | REQ_FUA; 1372 1373 set_page_writeback(page); 1374 fio.old_blkaddr = ni.blk_addr; 1375 write_node_page(nid, &fio); 1376 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); 1377 dec_page_count(sbi, F2FS_DIRTY_NODES); 1378 up_read(&sbi->node_write); 1379 1380 if (wbc->for_reclaim) { 1381 f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, 1382 page->index, NODE); 1383 submitted = NULL; 1384 } 1385 1386 unlock_page(page); 1387 1388 if (unlikely(f2fs_cp_error(sbi))) { 1389 f2fs_submit_merged_write(sbi, NODE); 1390 submitted = NULL; 1391 } 1392 if (submitted) 1393 *submitted = fio.submitted; 1394 1395 if (do_balance) 1396 f2fs_balance_fs(sbi, false); 1397 return 0; 1398 1399 redirty_out: 1400 redirty_page_for_writepage(wbc, page); 1401 return AOP_WRITEPAGE_ACTIVATE; 1402 } 1403 1404 void move_node_page(struct page *node_page, int gc_type) 1405 { 1406 if (gc_type == FG_GC) { 1407 struct writeback_control wbc = { 1408 .sync_mode = WB_SYNC_ALL, 1409 .nr_to_write = 1, 1410 .for_reclaim = 0, 1411 }; 1412 1413 set_page_dirty(node_page); 1414 f2fs_wait_on_page_writeback(node_page, NODE, true); 1415 1416 f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); 1417 if (!clear_page_dirty_for_io(node_page)) 1418 goto out_page; 1419 1420 if (__write_node_page(node_page, false, NULL, 1421 &wbc, false, FS_GC_NODE_IO)) 1422 unlock_page(node_page); 1423 goto release_page; 1424 } else { 1425 /* set page dirty and write it */ 1426 if (!PageWriteback(node_page)) 1427 set_page_dirty(node_page); 1428 } 1429 out_page: 1430 unlock_page(node_page); 1431 release_page: 1432 f2fs_put_page(node_page, 0); 1433 } 1434 1435 static int f2fs_write_node_page(struct page *page, 1436 struct writeback_control *wbc) 1437 { 1438 return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); 1439 } 1440 1441 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, 1442 struct writeback_control *wbc, bool atomic) 1443 { 1444 pgoff_t index; 1445 pgoff_t last_idx = ULONG_MAX; 1446 struct pagevec pvec; 1447 int ret = 0; 1448 struct page *last_page = NULL; 1449 bool marked = false; 1450 nid_t ino = inode->i_ino; 1451 int nr_pages; 1452 1453 if (atomic) { 1454 last_page = last_fsync_dnode(sbi, ino); 1455 if (IS_ERR_OR_NULL(last_page)) 1456 return PTR_ERR_OR_ZERO(last_page); 1457 } 1458 retry: 1459 pagevec_init(&pvec); 1460 index = 0; 1461 1462 while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1463 PAGECACHE_TAG_DIRTY))) { 1464 int i; 1465 1466 for (i = 0; i < nr_pages; i++) { 1467 struct page *page = pvec.pages[i]; 1468 bool submitted = false; 1469 1470 if (unlikely(f2fs_cp_error(sbi))) { 1471 f2fs_put_page(last_page, 0); 1472 pagevec_release(&pvec); 1473 ret = -EIO; 1474 goto out; 1475 } 1476 1477 if (!IS_DNODE(page) || !is_cold_node(page)) 1478 continue; 1479 if (ino_of_node(page) != ino) 1480 continue; 1481 1482 lock_page(page); 1483 1484 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1485 continue_unlock: 1486 unlock_page(page); 1487 continue; 1488 } 1489 if (ino_of_node(page) != ino) 1490 goto continue_unlock; 1491 1492 if (!PageDirty(page) && page != last_page) { 1493 /* someone wrote it for us */ 1494 goto continue_unlock; 1495 } 1496 1497 f2fs_wait_on_page_writeback(page, NODE, true); 1498 BUG_ON(PageWriteback(page)); 1499 1500 set_fsync_mark(page, 0); 1501 set_dentry_mark(page, 0); 1502 1503 if (!atomic || page == last_page) { 1504 set_fsync_mark(page, 1); 1505 if (IS_INODE(page)) { 1506 if (is_inode_flag_set(inode, 1507 FI_DIRTY_INODE)) 1508 update_inode(inode, page); 1509 set_dentry_mark(page, 1510 need_dentry_mark(sbi, ino)); 1511 } 1512 /* may be written by other thread */ 1513 if (!PageDirty(page)) 1514 set_page_dirty(page); 1515 } 1516 1517 if (!clear_page_dirty_for_io(page)) 1518 goto continue_unlock; 1519 1520 ret = __write_node_page(page, atomic && 1521 page == last_page, 1522 &submitted, wbc, true, 1523 FS_NODE_IO); 1524 if (ret) { 1525 unlock_page(page); 1526 f2fs_put_page(last_page, 0); 1527 break; 1528 } else if (submitted) { 1529 last_idx = page->index; 1530 } 1531 1532 if (page == last_page) { 1533 f2fs_put_page(page, 0); 1534 marked = true; 1535 break; 1536 } 1537 } 1538 pagevec_release(&pvec); 1539 cond_resched(); 1540 1541 if (ret || marked) 1542 break; 1543 } 1544 if (!ret && atomic && !marked) { 1545 f2fs_msg(sbi->sb, KERN_DEBUG, 1546 "Retry to write fsync mark: ino=%u, idx=%lx", 1547 ino, last_page->index); 1548 lock_page(last_page); 1549 f2fs_wait_on_page_writeback(last_page, NODE, true); 1550 set_page_dirty(last_page); 1551 unlock_page(last_page); 1552 goto retry; 1553 } 1554 out: 1555 if (last_idx != ULONG_MAX) 1556 f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); 1557 return ret ? -EIO: 0; 1558 } 1559 1560 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, 1561 bool do_balance, enum iostat_type io_type) 1562 { 1563 pgoff_t index; 1564 struct pagevec pvec; 1565 int step = 0; 1566 int nwritten = 0; 1567 int ret = 0; 1568 int nr_pages; 1569 1570 pagevec_init(&pvec); 1571 1572 next_step: 1573 index = 0; 1574 1575 while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1576 PAGECACHE_TAG_DIRTY))) { 1577 int i; 1578 1579 for (i = 0; i < nr_pages; i++) { 1580 struct page *page = pvec.pages[i]; 1581 bool submitted = false; 1582 1583 if (unlikely(f2fs_cp_error(sbi))) { 1584 pagevec_release(&pvec); 1585 ret = -EIO; 1586 goto out; 1587 } 1588 1589 /* 1590 * flushing sequence with step: 1591 * 0. indirect nodes 1592 * 1. dentry dnodes 1593 * 2. file dnodes 1594 */ 1595 if (step == 0 && IS_DNODE(page)) 1596 continue; 1597 if (step == 1 && (!IS_DNODE(page) || 1598 is_cold_node(page))) 1599 continue; 1600 if (step == 2 && (!IS_DNODE(page) || 1601 !is_cold_node(page))) 1602 continue; 1603 lock_node: 1604 if (!trylock_page(page)) 1605 continue; 1606 1607 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1608 continue_unlock: 1609 unlock_page(page); 1610 continue; 1611 } 1612 1613 if (!PageDirty(page)) { 1614 /* someone wrote it for us */ 1615 goto continue_unlock; 1616 } 1617 1618 /* flush inline_data */ 1619 if (is_inline_node(page)) { 1620 clear_inline_node(page); 1621 unlock_page(page); 1622 flush_inline_data(sbi, ino_of_node(page)); 1623 goto lock_node; 1624 } 1625 1626 f2fs_wait_on_page_writeback(page, NODE, true); 1627 1628 BUG_ON(PageWriteback(page)); 1629 if (!clear_page_dirty_for_io(page)) 1630 goto continue_unlock; 1631 1632 set_fsync_mark(page, 0); 1633 set_dentry_mark(page, 0); 1634 1635 ret = __write_node_page(page, false, &submitted, 1636 wbc, do_balance, io_type); 1637 if (ret) 1638 unlock_page(page); 1639 else if (submitted) 1640 nwritten++; 1641 1642 if (--wbc->nr_to_write == 0) 1643 break; 1644 } 1645 pagevec_release(&pvec); 1646 cond_resched(); 1647 1648 if (wbc->nr_to_write == 0) { 1649 step = 2; 1650 break; 1651 } 1652 } 1653 1654 if (step < 2) { 1655 step++; 1656 goto next_step; 1657 } 1658 out: 1659 if (nwritten) 1660 f2fs_submit_merged_write(sbi, NODE); 1661 return ret; 1662 } 1663 1664 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) 1665 { 1666 pgoff_t index = 0; 1667 struct pagevec pvec; 1668 int ret2, ret = 0; 1669 int nr_pages; 1670 1671 pagevec_init(&pvec); 1672 1673 while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1674 PAGECACHE_TAG_WRITEBACK))) { 1675 int i; 1676 1677 for (i = 0; i < nr_pages; i++) { 1678 struct page *page = pvec.pages[i]; 1679 1680 if (ino && ino_of_node(page) == ino) { 1681 f2fs_wait_on_page_writeback(page, NODE, true); 1682 if (TestClearPageError(page)) 1683 ret = -EIO; 1684 } 1685 } 1686 pagevec_release(&pvec); 1687 cond_resched(); 1688 } 1689 1690 ret2 = filemap_check_errors(NODE_MAPPING(sbi)); 1691 if (!ret) 1692 ret = ret2; 1693 return ret; 1694 } 1695 1696 static int f2fs_write_node_pages(struct address_space *mapping, 1697 struct writeback_control *wbc) 1698 { 1699 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 1700 struct blk_plug plug; 1701 long diff; 1702 1703 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1704 goto skip_write; 1705 1706 /* balancing f2fs's metadata in background */ 1707 f2fs_balance_fs_bg(sbi); 1708 1709 /* collect a number of dirty node pages and write together */ 1710 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) 1711 goto skip_write; 1712 1713 trace_f2fs_writepages(mapping->host, wbc, NODE); 1714 1715 diff = nr_pages_to_write(sbi, NODE, wbc); 1716 wbc->sync_mode = WB_SYNC_NONE; 1717 blk_start_plug(&plug); 1718 sync_node_pages(sbi, wbc, true, FS_NODE_IO); 1719 blk_finish_plug(&plug); 1720 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1721 return 0; 1722 1723 skip_write: 1724 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES); 1725 trace_f2fs_writepages(mapping->host, wbc, NODE); 1726 return 0; 1727 } 1728 1729 static int f2fs_set_node_page_dirty(struct page *page) 1730 { 1731 trace_f2fs_set_page_dirty(page, NODE); 1732 1733 if (!PageUptodate(page)) 1734 SetPageUptodate(page); 1735 if (!PageDirty(page)) { 1736 f2fs_set_page_dirty_nobuffers(page); 1737 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); 1738 SetPagePrivate(page); 1739 f2fs_trace_pid(page); 1740 return 1; 1741 } 1742 return 0; 1743 } 1744 1745 /* 1746 * Structure of the f2fs node operations 1747 */ 1748 const struct address_space_operations f2fs_node_aops = { 1749 .writepage = f2fs_write_node_page, 1750 .writepages = f2fs_write_node_pages, 1751 .set_page_dirty = f2fs_set_node_page_dirty, 1752 .invalidatepage = f2fs_invalidate_page, 1753 .releasepage = f2fs_release_page, 1754 #ifdef CONFIG_MIGRATION 1755 .migratepage = f2fs_migrate_page, 1756 #endif 1757 }; 1758 1759 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, 1760 nid_t n) 1761 { 1762 return radix_tree_lookup(&nm_i->free_nid_root, n); 1763 } 1764 1765 static int __insert_free_nid(struct f2fs_sb_info *sbi, 1766 struct free_nid *i, enum nid_state state) 1767 { 1768 struct f2fs_nm_info *nm_i = NM_I(sbi); 1769 1770 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); 1771 if (err) 1772 return err; 1773 1774 f2fs_bug_on(sbi, state != i->state); 1775 nm_i->nid_cnt[state]++; 1776 if (state == FREE_NID) 1777 list_add_tail(&i->list, &nm_i->free_nid_list); 1778 return 0; 1779 } 1780 1781 static void __remove_free_nid(struct f2fs_sb_info *sbi, 1782 struct free_nid *i, enum nid_state state) 1783 { 1784 struct f2fs_nm_info *nm_i = NM_I(sbi); 1785 1786 f2fs_bug_on(sbi, state != i->state); 1787 nm_i->nid_cnt[state]--; 1788 if (state == FREE_NID) 1789 list_del(&i->list); 1790 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1791 } 1792 1793 static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, 1794 enum nid_state org_state, enum nid_state dst_state) 1795 { 1796 struct f2fs_nm_info *nm_i = NM_I(sbi); 1797 1798 f2fs_bug_on(sbi, org_state != i->state); 1799 i->state = dst_state; 1800 nm_i->nid_cnt[org_state]--; 1801 nm_i->nid_cnt[dst_state]++; 1802 1803 switch (dst_state) { 1804 case PREALLOC_NID: 1805 list_del(&i->list); 1806 break; 1807 case FREE_NID: 1808 list_add_tail(&i->list, &nm_i->free_nid_list); 1809 break; 1810 default: 1811 BUG_ON(1); 1812 } 1813 } 1814 1815 /* return if the nid is recognized as free */ 1816 static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1817 { 1818 struct f2fs_nm_info *nm_i = NM_I(sbi); 1819 struct free_nid *i, *e; 1820 struct nat_entry *ne; 1821 int err = -EINVAL; 1822 bool ret = false; 1823 1824 /* 0 nid should not be used */ 1825 if (unlikely(nid == 0)) 1826 return false; 1827 1828 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1829 i->nid = nid; 1830 i->state = FREE_NID; 1831 1832 if (radix_tree_preload(GFP_NOFS)) 1833 goto err; 1834 1835 spin_lock(&nm_i->nid_list_lock); 1836 1837 if (build) { 1838 /* 1839 * Thread A Thread B 1840 * - f2fs_create 1841 * - f2fs_new_inode 1842 * - alloc_nid 1843 * - __insert_nid_to_list(PREALLOC_NID) 1844 * - f2fs_balance_fs_bg 1845 * - build_free_nids 1846 * - __build_free_nids 1847 * - scan_nat_page 1848 * - add_free_nid 1849 * - __lookup_nat_cache 1850 * - f2fs_add_link 1851 * - init_inode_metadata 1852 * - new_inode_page 1853 * - new_node_page 1854 * - set_node_addr 1855 * - alloc_nid_done 1856 * - __remove_nid_from_list(PREALLOC_NID) 1857 * - __insert_nid_to_list(FREE_NID) 1858 */ 1859 ne = __lookup_nat_cache(nm_i, nid); 1860 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1861 nat_get_blkaddr(ne) != NULL_ADDR)) 1862 goto err_out; 1863 1864 e = __lookup_free_nid_list(nm_i, nid); 1865 if (e) { 1866 if (e->state == FREE_NID) 1867 ret = true; 1868 goto err_out; 1869 } 1870 } 1871 ret = true; 1872 err = __insert_free_nid(sbi, i, FREE_NID); 1873 err_out: 1874 spin_unlock(&nm_i->nid_list_lock); 1875 radix_tree_preload_end(); 1876 err: 1877 if (err) 1878 kmem_cache_free(free_nid_slab, i); 1879 return ret; 1880 } 1881 1882 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) 1883 { 1884 struct f2fs_nm_info *nm_i = NM_I(sbi); 1885 struct free_nid *i; 1886 bool need_free = false; 1887 1888 spin_lock(&nm_i->nid_list_lock); 1889 i = __lookup_free_nid_list(nm_i, nid); 1890 if (i && i->state == FREE_NID) { 1891 __remove_free_nid(sbi, i, FREE_NID); 1892 need_free = true; 1893 } 1894 spin_unlock(&nm_i->nid_list_lock); 1895 1896 if (need_free) 1897 kmem_cache_free(free_nid_slab, i); 1898 } 1899 1900 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, 1901 bool set, bool build) 1902 { 1903 struct f2fs_nm_info *nm_i = NM_I(sbi); 1904 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); 1905 unsigned int nid_ofs = nid - START_NID(nid); 1906 1907 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) 1908 return; 1909 1910 if (set) { 1911 if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) 1912 return; 1913 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 1914 nm_i->free_nid_count[nat_ofs]++; 1915 } else { 1916 if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) 1917 return; 1918 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 1919 if (!build) 1920 nm_i->free_nid_count[nat_ofs]--; 1921 } 1922 } 1923 1924 static void scan_nat_page(struct f2fs_sb_info *sbi, 1925 struct page *nat_page, nid_t start_nid) 1926 { 1927 struct f2fs_nm_info *nm_i = NM_I(sbi); 1928 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1929 block_t blk_addr; 1930 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); 1931 int i; 1932 1933 if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) 1934 return; 1935 1936 __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); 1937 1938 i = start_nid % NAT_ENTRY_PER_BLOCK; 1939 1940 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1941 bool freed = false; 1942 1943 if (unlikely(start_nid >= nm_i->max_nid)) 1944 break; 1945 1946 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1947 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1948 if (blk_addr == NULL_ADDR) 1949 freed = add_free_nid(sbi, start_nid, true); 1950 spin_lock(&NM_I(sbi)->nid_list_lock); 1951 update_free_nid_bitmap(sbi, start_nid, freed, true); 1952 spin_unlock(&NM_I(sbi)->nid_list_lock); 1953 } 1954 } 1955 1956 static void scan_curseg_cache(struct f2fs_sb_info *sbi) 1957 { 1958 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1959 struct f2fs_journal *journal = curseg->journal; 1960 int i; 1961 1962 down_read(&curseg->journal_rwsem); 1963 for (i = 0; i < nats_in_cursum(journal); i++) { 1964 block_t addr; 1965 nid_t nid; 1966 1967 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); 1968 nid = le32_to_cpu(nid_in_journal(journal, i)); 1969 if (addr == NULL_ADDR) 1970 add_free_nid(sbi, nid, true); 1971 else 1972 remove_free_nid(sbi, nid); 1973 } 1974 up_read(&curseg->journal_rwsem); 1975 } 1976 1977 static void scan_free_nid_bits(struct f2fs_sb_info *sbi) 1978 { 1979 struct f2fs_nm_info *nm_i = NM_I(sbi); 1980 unsigned int i, idx; 1981 nid_t nid; 1982 1983 down_read(&nm_i->nat_tree_lock); 1984 1985 for (i = 0; i < nm_i->nat_blocks; i++) { 1986 if (!test_bit_le(i, nm_i->nat_block_bitmap)) 1987 continue; 1988 if (!nm_i->free_nid_count[i]) 1989 continue; 1990 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { 1991 idx = find_next_bit_le(nm_i->free_nid_bitmap[i], 1992 NAT_ENTRY_PER_BLOCK, idx); 1993 if (idx >= NAT_ENTRY_PER_BLOCK) 1994 break; 1995 1996 nid = i * NAT_ENTRY_PER_BLOCK + idx; 1997 add_free_nid(sbi, nid, true); 1998 1999 if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) 2000 goto out; 2001 } 2002 } 2003 out: 2004 scan_curseg_cache(sbi); 2005 2006 up_read(&nm_i->nat_tree_lock); 2007 } 2008 2009 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) 2010 { 2011 struct f2fs_nm_info *nm_i = NM_I(sbi); 2012 int i = 0; 2013 nid_t nid = nm_i->next_scan_nid; 2014 2015 if (unlikely(nid >= nm_i->max_nid)) 2016 nid = 0; 2017 2018 /* Enough entries */ 2019 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) 2020 return; 2021 2022 if (!sync && !available_free_memory(sbi, FREE_NIDS)) 2023 return; 2024 2025 if (!mount) { 2026 /* try to find free nids in free_nid_bitmap */ 2027 scan_free_nid_bits(sbi); 2028 2029 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) 2030 return; 2031 } 2032 2033 /* readahead nat pages to be scanned */ 2034 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 2035 META_NAT, true); 2036 2037 down_read(&nm_i->nat_tree_lock); 2038 2039 while (1) { 2040 struct page *page = get_current_nat_page(sbi, nid); 2041 2042 scan_nat_page(sbi, page, nid); 2043 f2fs_put_page(page, 1); 2044 2045 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 2046 if (unlikely(nid >= nm_i->max_nid)) 2047 nid = 0; 2048 2049 if (++i >= FREE_NID_PAGES) 2050 break; 2051 } 2052 2053 /* go to the next free nat pages to find free nids abundantly */ 2054 nm_i->next_scan_nid = nid; 2055 2056 /* find free nids from current sum_pages */ 2057 scan_curseg_cache(sbi); 2058 2059 up_read(&nm_i->nat_tree_lock); 2060 2061 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 2062 nm_i->ra_nid_pages, META_NAT, false); 2063 } 2064 2065 void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) 2066 { 2067 mutex_lock(&NM_I(sbi)->build_lock); 2068 __build_free_nids(sbi, sync, mount); 2069 mutex_unlock(&NM_I(sbi)->build_lock); 2070 } 2071 2072 /* 2073 * If this function returns success, caller can obtain a new nid 2074 * from second parameter of this function. 2075 * The returned nid could be used ino as well as nid when inode is created. 2076 */ 2077 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) 2078 { 2079 struct f2fs_nm_info *nm_i = NM_I(sbi); 2080 struct free_nid *i = NULL; 2081 retry: 2082 #ifdef CONFIG_F2FS_FAULT_INJECTION 2083 if (time_to_inject(sbi, FAULT_ALLOC_NID)) { 2084 f2fs_show_injection_info(FAULT_ALLOC_NID); 2085 return false; 2086 } 2087 #endif 2088 spin_lock(&nm_i->nid_list_lock); 2089 2090 if (unlikely(nm_i->available_nids == 0)) { 2091 spin_unlock(&nm_i->nid_list_lock); 2092 return false; 2093 } 2094 2095 /* We should not use stale free nids created by build_free_nids */ 2096 if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) { 2097 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 2098 i = list_first_entry(&nm_i->free_nid_list, 2099 struct free_nid, list); 2100 *nid = i->nid; 2101 2102 __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); 2103 nm_i->available_nids--; 2104 2105 update_free_nid_bitmap(sbi, *nid, false, false); 2106 2107 spin_unlock(&nm_i->nid_list_lock); 2108 return true; 2109 } 2110 spin_unlock(&nm_i->nid_list_lock); 2111 2112 /* Let's scan nat pages and its caches to get free nids */ 2113 build_free_nids(sbi, true, false); 2114 goto retry; 2115 } 2116 2117 /* 2118 * alloc_nid() should be called prior to this function. 2119 */ 2120 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) 2121 { 2122 struct f2fs_nm_info *nm_i = NM_I(sbi); 2123 struct free_nid *i; 2124 2125 spin_lock(&nm_i->nid_list_lock); 2126 i = __lookup_free_nid_list(nm_i, nid); 2127 f2fs_bug_on(sbi, !i); 2128 __remove_free_nid(sbi, i, PREALLOC_NID); 2129 spin_unlock(&nm_i->nid_list_lock); 2130 2131 kmem_cache_free(free_nid_slab, i); 2132 } 2133 2134 /* 2135 * alloc_nid() should be called prior to this function. 2136 */ 2137 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) 2138 { 2139 struct f2fs_nm_info *nm_i = NM_I(sbi); 2140 struct free_nid *i; 2141 bool need_free = false; 2142 2143 if (!nid) 2144 return; 2145 2146 spin_lock(&nm_i->nid_list_lock); 2147 i = __lookup_free_nid_list(nm_i, nid); 2148 f2fs_bug_on(sbi, !i); 2149 2150 if (!available_free_memory(sbi, FREE_NIDS)) { 2151 __remove_free_nid(sbi, i, PREALLOC_NID); 2152 need_free = true; 2153 } else { 2154 __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID); 2155 } 2156 2157 nm_i->available_nids++; 2158 2159 update_free_nid_bitmap(sbi, nid, true, false); 2160 2161 spin_unlock(&nm_i->nid_list_lock); 2162 2163 if (need_free) 2164 kmem_cache_free(free_nid_slab, i); 2165 } 2166 2167 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) 2168 { 2169 struct f2fs_nm_info *nm_i = NM_I(sbi); 2170 struct free_nid *i, *next; 2171 int nr = nr_shrink; 2172 2173 if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) 2174 return 0; 2175 2176 if (!mutex_trylock(&nm_i->build_lock)) 2177 return 0; 2178 2179 spin_lock(&nm_i->nid_list_lock); 2180 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { 2181 if (nr_shrink <= 0 || 2182 nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) 2183 break; 2184 2185 __remove_free_nid(sbi, i, FREE_NID); 2186 kmem_cache_free(free_nid_slab, i); 2187 nr_shrink--; 2188 } 2189 spin_unlock(&nm_i->nid_list_lock); 2190 mutex_unlock(&nm_i->build_lock); 2191 2192 return nr - nr_shrink; 2193 } 2194 2195 void recover_inline_xattr(struct inode *inode, struct page *page) 2196 { 2197 void *src_addr, *dst_addr; 2198 size_t inline_size; 2199 struct page *ipage; 2200 struct f2fs_inode *ri; 2201 2202 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 2203 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); 2204 2205 ri = F2FS_INODE(page); 2206 if (!(ri->i_inline & F2FS_INLINE_XATTR)) { 2207 clear_inode_flag(inode, FI_INLINE_XATTR); 2208 goto update_inode; 2209 } 2210 2211 dst_addr = inline_xattr_addr(inode, ipage); 2212 src_addr = inline_xattr_addr(inode, page); 2213 inline_size = inline_xattr_size(inode); 2214 2215 f2fs_wait_on_page_writeback(ipage, NODE, true); 2216 memcpy(dst_addr, src_addr, inline_size); 2217 update_inode: 2218 update_inode(inode, ipage); 2219 f2fs_put_page(ipage, 1); 2220 } 2221 2222 int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 2223 { 2224 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2225 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 2226 nid_t new_xnid; 2227 struct dnode_of_data dn; 2228 struct node_info ni; 2229 struct page *xpage; 2230 2231 if (!prev_xnid) 2232 goto recover_xnid; 2233 2234 /* 1: invalidate the previous xattr nid */ 2235 get_node_info(sbi, prev_xnid, &ni); 2236 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 2237 invalidate_blocks(sbi, ni.blk_addr); 2238 dec_valid_node_count(sbi, inode, false); 2239 set_node_addr(sbi, &ni, NULL_ADDR, false); 2240 2241 recover_xnid: 2242 /* 2: update xattr nid in inode */ 2243 if (!alloc_nid(sbi, &new_xnid)) 2244 return -ENOSPC; 2245 2246 set_new_dnode(&dn, inode, NULL, NULL, new_xnid); 2247 xpage = new_node_page(&dn, XATTR_NODE_OFFSET); 2248 if (IS_ERR(xpage)) { 2249 alloc_nid_failed(sbi, new_xnid); 2250 return PTR_ERR(xpage); 2251 } 2252 2253 alloc_nid_done(sbi, new_xnid); 2254 update_inode_page(inode); 2255 2256 /* 3: update and set xattr node page dirty */ 2257 memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); 2258 2259 set_page_dirty(xpage); 2260 f2fs_put_page(xpage, 1); 2261 2262 return 0; 2263 } 2264 2265 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 2266 { 2267 struct f2fs_inode *src, *dst; 2268 nid_t ino = ino_of_node(page); 2269 struct node_info old_ni, new_ni; 2270 struct page *ipage; 2271 2272 get_node_info(sbi, ino, &old_ni); 2273 2274 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2275 return -EINVAL; 2276 retry: 2277 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); 2278 if (!ipage) { 2279 congestion_wait(BLK_RW_ASYNC, HZ/50); 2280 goto retry; 2281 } 2282 2283 /* Should not use this inode from free nid list */ 2284 remove_free_nid(sbi, ino); 2285 2286 if (!PageUptodate(ipage)) 2287 SetPageUptodate(ipage); 2288 fill_node_footer(ipage, ino, ino, 0, true); 2289 2290 src = F2FS_INODE(page); 2291 dst = F2FS_INODE(ipage); 2292 2293 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); 2294 dst->i_size = 0; 2295 dst->i_blocks = cpu_to_le64(1); 2296 dst->i_links = cpu_to_le32(1); 2297 dst->i_xattr_nid = 0; 2298 dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); 2299 if (dst->i_inline & F2FS_EXTRA_ATTR) { 2300 dst->i_extra_isize = src->i_extra_isize; 2301 2302 if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && 2303 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), 2304 i_inline_xattr_size)) 2305 dst->i_inline_xattr_size = src->i_inline_xattr_size; 2306 2307 if (f2fs_sb_has_project_quota(sbi->sb) && 2308 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), 2309 i_projid)) 2310 dst->i_projid = src->i_projid; 2311 } 2312 2313 new_ni = old_ni; 2314 new_ni.ino = ino; 2315 2316 if (unlikely(inc_valid_node_count(sbi, NULL, true))) 2317 WARN_ON(1); 2318 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 2319 inc_valid_inode_count(sbi); 2320 set_page_dirty(ipage); 2321 f2fs_put_page(ipage, 1); 2322 return 0; 2323 } 2324 2325 int restore_node_summary(struct f2fs_sb_info *sbi, 2326 unsigned int segno, struct f2fs_summary_block *sum) 2327 { 2328 struct f2fs_node *rn; 2329 struct f2fs_summary *sum_entry; 2330 block_t addr; 2331 int i, idx, last_offset, nrpages; 2332 2333 /* scan the node segment */ 2334 last_offset = sbi->blocks_per_seg; 2335 addr = START_BLOCK(sbi, segno); 2336 sum_entry = &sum->entries[0]; 2337 2338 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 2339 nrpages = min(last_offset - i, BIO_MAX_PAGES); 2340 2341 /* readahead node pages */ 2342 ra_meta_pages(sbi, addr, nrpages, META_POR, true); 2343 2344 for (idx = addr; idx < addr + nrpages; idx++) { 2345 struct page *page = get_tmp_page(sbi, idx); 2346 2347 rn = F2FS_NODE(page); 2348 sum_entry->nid = rn->footer.nid; 2349 sum_entry->version = 0; 2350 sum_entry->ofs_in_node = 0; 2351 sum_entry++; 2352 f2fs_put_page(page, 1); 2353 } 2354 2355 invalidate_mapping_pages(META_MAPPING(sbi), addr, 2356 addr + nrpages); 2357 } 2358 return 0; 2359 } 2360 2361 static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 2362 { 2363 struct f2fs_nm_info *nm_i = NM_I(sbi); 2364 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2365 struct f2fs_journal *journal = curseg->journal; 2366 int i; 2367 2368 down_write(&curseg->journal_rwsem); 2369 for (i = 0; i < nats_in_cursum(journal); i++) { 2370 struct nat_entry *ne; 2371 struct f2fs_nat_entry raw_ne; 2372 nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); 2373 2374 raw_ne = nat_in_journal(journal, i); 2375 2376 ne = __lookup_nat_cache(nm_i, nid); 2377 if (!ne) { 2378 ne = __alloc_nat_entry(nid, true); 2379 __init_nat_entry(nm_i, ne, &raw_ne, true); 2380 } 2381 2382 /* 2383 * if a free nat in journal has not been used after last 2384 * checkpoint, we should remove it from available nids, 2385 * since later we will add it again. 2386 */ 2387 if (!get_nat_flag(ne, IS_DIRTY) && 2388 le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) { 2389 spin_lock(&nm_i->nid_list_lock); 2390 nm_i->available_nids--; 2391 spin_unlock(&nm_i->nid_list_lock); 2392 } 2393 2394 __set_nat_cache_dirty(nm_i, ne); 2395 } 2396 update_nats_in_cursum(journal, -i); 2397 up_write(&curseg->journal_rwsem); 2398 } 2399 2400 static void __adjust_nat_entry_set(struct nat_entry_set *nes, 2401 struct list_head *head, int max) 2402 { 2403 struct nat_entry_set *cur; 2404 2405 if (nes->entry_cnt >= max) 2406 goto add_out; 2407 2408 list_for_each_entry(cur, head, set_list) { 2409 if (cur->entry_cnt >= nes->entry_cnt) { 2410 list_add(&nes->set_list, cur->set_list.prev); 2411 return; 2412 } 2413 } 2414 add_out: 2415 list_add_tail(&nes->set_list, head); 2416 } 2417 2418 static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, 2419 struct page *page) 2420 { 2421 struct f2fs_nm_info *nm_i = NM_I(sbi); 2422 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; 2423 struct f2fs_nat_block *nat_blk = page_address(page); 2424 int valid = 0; 2425 int i = 0; 2426 2427 if (!enabled_nat_bits(sbi, NULL)) 2428 return; 2429 2430 if (nat_index == 0) { 2431 valid = 1; 2432 i = 1; 2433 } 2434 for (; i < NAT_ENTRY_PER_BLOCK; i++) { 2435 if (nat_blk->entries[i].block_addr != NULL_ADDR) 2436 valid++; 2437 } 2438 if (valid == 0) { 2439 __set_bit_le(nat_index, nm_i->empty_nat_bits); 2440 __clear_bit_le(nat_index, nm_i->full_nat_bits); 2441 return; 2442 } 2443 2444 __clear_bit_le(nat_index, nm_i->empty_nat_bits); 2445 if (valid == NAT_ENTRY_PER_BLOCK) 2446 __set_bit_le(nat_index, nm_i->full_nat_bits); 2447 else 2448 __clear_bit_le(nat_index, nm_i->full_nat_bits); 2449 } 2450 2451 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 2452 struct nat_entry_set *set, struct cp_control *cpc) 2453 { 2454 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2455 struct f2fs_journal *journal = curseg->journal; 2456 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; 2457 bool to_journal = true; 2458 struct f2fs_nat_block *nat_blk; 2459 struct nat_entry *ne, *cur; 2460 struct page *page = NULL; 2461 2462 /* 2463 * there are two steps to flush nat entries: 2464 * #1, flush nat entries to journal in current hot data summary block. 2465 * #2, flush nat entries to nat page. 2466 */ 2467 if (enabled_nat_bits(sbi, cpc) || 2468 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) 2469 to_journal = false; 2470 2471 if (to_journal) { 2472 down_write(&curseg->journal_rwsem); 2473 } else { 2474 page = get_next_nat_page(sbi, start_nid); 2475 nat_blk = page_address(page); 2476 f2fs_bug_on(sbi, !nat_blk); 2477 } 2478 2479 /* flush dirty nats in nat entry set */ 2480 list_for_each_entry_safe(ne, cur, &set->entry_list, list) { 2481 struct f2fs_nat_entry *raw_ne; 2482 nid_t nid = nat_get_nid(ne); 2483 int offset; 2484 2485 f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); 2486 2487 if (to_journal) { 2488 offset = lookup_journal_in_cursum(journal, 2489 NAT_JOURNAL, nid, 1); 2490 f2fs_bug_on(sbi, offset < 0); 2491 raw_ne = &nat_in_journal(journal, offset); 2492 nid_in_journal(journal, offset) = cpu_to_le32(nid); 2493 } else { 2494 raw_ne = &nat_blk->entries[nid - start_nid]; 2495 } 2496 raw_nat_from_node_info(raw_ne, &ne->ni); 2497 nat_reset_flag(ne); 2498 __clear_nat_cache_dirty(NM_I(sbi), set, ne); 2499 if (nat_get_blkaddr(ne) == NULL_ADDR) { 2500 add_free_nid(sbi, nid, false); 2501 spin_lock(&NM_I(sbi)->nid_list_lock); 2502 NM_I(sbi)->available_nids++; 2503 update_free_nid_bitmap(sbi, nid, true, false); 2504 spin_unlock(&NM_I(sbi)->nid_list_lock); 2505 } else { 2506 spin_lock(&NM_I(sbi)->nid_list_lock); 2507 update_free_nid_bitmap(sbi, nid, false, false); 2508 spin_unlock(&NM_I(sbi)->nid_list_lock); 2509 } 2510 } 2511 2512 if (to_journal) { 2513 up_write(&curseg->journal_rwsem); 2514 } else { 2515 __update_nat_bits(sbi, start_nid, page); 2516 f2fs_put_page(page, 1); 2517 } 2518 2519 /* Allow dirty nats by node block allocation in write_begin */ 2520 if (!set->entry_cnt) { 2521 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 2522 kmem_cache_free(nat_entry_set_slab, set); 2523 } 2524 } 2525 2526 /* 2527 * This function is called during the checkpointing process. 2528 */ 2529 void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 2530 { 2531 struct f2fs_nm_info *nm_i = NM_I(sbi); 2532 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2533 struct f2fs_journal *journal = curseg->journal; 2534 struct nat_entry_set *setvec[SETVEC_SIZE]; 2535 struct nat_entry_set *set, *tmp; 2536 unsigned int found; 2537 nid_t set_idx = 0; 2538 LIST_HEAD(sets); 2539 2540 if (!nm_i->dirty_nat_cnt) 2541 return; 2542 2543 down_write(&nm_i->nat_tree_lock); 2544 2545 /* 2546 * if there are no enough space in journal to store dirty nat 2547 * entries, remove all entries from journal and merge them 2548 * into nat entry set. 2549 */ 2550 if (enabled_nat_bits(sbi, cpc) || 2551 !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 2552 remove_nats_in_journal(sbi); 2553 2554 while ((found = __gang_lookup_nat_set(nm_i, 2555 set_idx, SETVEC_SIZE, setvec))) { 2556 unsigned idx; 2557 set_idx = setvec[found - 1]->set + 1; 2558 for (idx = 0; idx < found; idx++) 2559 __adjust_nat_entry_set(setvec[idx], &sets, 2560 MAX_NAT_JENTRIES(journal)); 2561 } 2562 2563 /* flush dirty nats in nat entry set */ 2564 list_for_each_entry_safe(set, tmp, &sets, set_list) 2565 __flush_nat_entry_set(sbi, set, cpc); 2566 2567 up_write(&nm_i->nat_tree_lock); 2568 /* Allow dirty nats by node block allocation in write_begin */ 2569 } 2570 2571 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) 2572 { 2573 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 2574 struct f2fs_nm_info *nm_i = NM_I(sbi); 2575 unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE; 2576 unsigned int i; 2577 __u64 cp_ver = cur_cp_version(ckpt); 2578 block_t nat_bits_addr; 2579 2580 if (!enabled_nat_bits(sbi, NULL)) 2581 return 0; 2582 2583 nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + 2584 F2FS_BLKSIZE - 1); 2585 nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, 2586 GFP_KERNEL); 2587 if (!nm_i->nat_bits) 2588 return -ENOMEM; 2589 2590 nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - 2591 nm_i->nat_bits_blocks; 2592 for (i = 0; i < nm_i->nat_bits_blocks; i++) { 2593 struct page *page = get_meta_page(sbi, nat_bits_addr++); 2594 2595 memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), 2596 page_address(page), F2FS_BLKSIZE); 2597 f2fs_put_page(page, 1); 2598 } 2599 2600 cp_ver |= (cur_cp_crc(ckpt) << 32); 2601 if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) { 2602 disable_nat_bits(sbi, true); 2603 return 0; 2604 } 2605 2606 nm_i->full_nat_bits = nm_i->nat_bits + 8; 2607 nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; 2608 2609 f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint"); 2610 return 0; 2611 } 2612 2613 static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) 2614 { 2615 struct f2fs_nm_info *nm_i = NM_I(sbi); 2616 unsigned int i = 0; 2617 nid_t nid, last_nid; 2618 2619 if (!enabled_nat_bits(sbi, NULL)) 2620 return; 2621 2622 for (i = 0; i < nm_i->nat_blocks; i++) { 2623 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); 2624 if (i >= nm_i->nat_blocks) 2625 break; 2626 2627 __set_bit_le(i, nm_i->nat_block_bitmap); 2628 2629 nid = i * NAT_ENTRY_PER_BLOCK; 2630 last_nid = nid + NAT_ENTRY_PER_BLOCK; 2631 2632 spin_lock(&NM_I(sbi)->nid_list_lock); 2633 for (; nid < last_nid; nid++) 2634 update_free_nid_bitmap(sbi, nid, true, true); 2635 spin_unlock(&NM_I(sbi)->nid_list_lock); 2636 } 2637 2638 for (i = 0; i < nm_i->nat_blocks; i++) { 2639 i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); 2640 if (i >= nm_i->nat_blocks) 2641 break; 2642 2643 __set_bit_le(i, nm_i->nat_block_bitmap); 2644 } 2645 } 2646 2647 static int init_node_manager(struct f2fs_sb_info *sbi) 2648 { 2649 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 2650 struct f2fs_nm_info *nm_i = NM_I(sbi); 2651 unsigned char *version_bitmap; 2652 unsigned int nat_segs; 2653 int err; 2654 2655 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 2656 2657 /* segment_count_nat includes pair segment so divide to 2. */ 2658 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 2659 nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 2660 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks; 2661 2662 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2663 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - 2664 F2FS_RESERVED_NODE_NUM; 2665 nm_i->nid_cnt[FREE_NID] = 0; 2666 nm_i->nid_cnt[PREALLOC_NID] = 0; 2667 nm_i->nat_cnt = 0; 2668 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 2669 nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 2670 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; 2671 2672 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 2673 INIT_LIST_HEAD(&nm_i->free_nid_list); 2674 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); 2675 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); 2676 INIT_LIST_HEAD(&nm_i->nat_entries); 2677 2678 mutex_init(&nm_i->build_lock); 2679 spin_lock_init(&nm_i->nid_list_lock); 2680 init_rwsem(&nm_i->nat_tree_lock); 2681 2682 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 2683 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 2684 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); 2685 if (!version_bitmap) 2686 return -EFAULT; 2687 2688 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, 2689 GFP_KERNEL); 2690 if (!nm_i->nat_bitmap) 2691 return -ENOMEM; 2692 2693 err = __get_nat_bitmaps(sbi); 2694 if (err) 2695 return err; 2696 2697 #ifdef CONFIG_F2FS_CHECK_FS 2698 nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size, 2699 GFP_KERNEL); 2700 if (!nm_i->nat_bitmap_mir) 2701 return -ENOMEM; 2702 #endif 2703 2704 return 0; 2705 } 2706 2707 static int init_free_nid_cache(struct f2fs_sb_info *sbi) 2708 { 2709 struct f2fs_nm_info *nm_i = NM_I(sbi); 2710 2711 nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks * 2712 NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); 2713 if (!nm_i->free_nid_bitmap) 2714 return -ENOMEM; 2715 2716 nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8, 2717 GFP_KERNEL); 2718 if (!nm_i->nat_block_bitmap) 2719 return -ENOMEM; 2720 2721 nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks * 2722 sizeof(unsigned short), GFP_KERNEL); 2723 if (!nm_i->free_nid_count) 2724 return -ENOMEM; 2725 return 0; 2726 } 2727 2728 int build_node_manager(struct f2fs_sb_info *sbi) 2729 { 2730 int err; 2731 2732 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); 2733 if (!sbi->nm_info) 2734 return -ENOMEM; 2735 2736 err = init_node_manager(sbi); 2737 if (err) 2738 return err; 2739 2740 err = init_free_nid_cache(sbi); 2741 if (err) 2742 return err; 2743 2744 /* load free nid status from nat_bits table */ 2745 load_free_nid_bitmap(sbi); 2746 2747 build_free_nids(sbi, true, true); 2748 return 0; 2749 } 2750 2751 void destroy_node_manager(struct f2fs_sb_info *sbi) 2752 { 2753 struct f2fs_nm_info *nm_i = NM_I(sbi); 2754 struct free_nid *i, *next_i; 2755 struct nat_entry *natvec[NATVEC_SIZE]; 2756 struct nat_entry_set *setvec[SETVEC_SIZE]; 2757 nid_t nid = 0; 2758 unsigned int found; 2759 2760 if (!nm_i) 2761 return; 2762 2763 /* destroy free nid list */ 2764 spin_lock(&nm_i->nid_list_lock); 2765 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2766 __remove_free_nid(sbi, i, FREE_NID); 2767 spin_unlock(&nm_i->nid_list_lock); 2768 kmem_cache_free(free_nid_slab, i); 2769 spin_lock(&nm_i->nid_list_lock); 2770 } 2771 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]); 2772 f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]); 2773 f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list)); 2774 spin_unlock(&nm_i->nid_list_lock); 2775 2776 /* destroy nat cache */ 2777 down_write(&nm_i->nat_tree_lock); 2778 while ((found = __gang_lookup_nat_cache(nm_i, 2779 nid, NATVEC_SIZE, natvec))) { 2780 unsigned idx; 2781 2782 nid = nat_get_nid(natvec[found - 1]) + 1; 2783 for (idx = 0; idx < found; idx++) 2784 __del_from_nat_cache(nm_i, natvec[idx]); 2785 } 2786 f2fs_bug_on(sbi, nm_i->nat_cnt); 2787 2788 /* destroy nat set cache */ 2789 nid = 0; 2790 while ((found = __gang_lookup_nat_set(nm_i, 2791 nid, SETVEC_SIZE, setvec))) { 2792 unsigned idx; 2793 2794 nid = setvec[found - 1]->set + 1; 2795 for (idx = 0; idx < found; idx++) { 2796 /* entry_cnt is not zero, when cp_error was occurred */ 2797 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); 2798 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); 2799 kmem_cache_free(nat_entry_set_slab, setvec[idx]); 2800 } 2801 } 2802 up_write(&nm_i->nat_tree_lock); 2803 2804 kvfree(nm_i->nat_block_bitmap); 2805 kvfree(nm_i->free_nid_bitmap); 2806 kvfree(nm_i->free_nid_count); 2807 2808 kfree(nm_i->nat_bitmap); 2809 kfree(nm_i->nat_bits); 2810 #ifdef CONFIG_F2FS_CHECK_FS 2811 kfree(nm_i->nat_bitmap_mir); 2812 #endif 2813 sbi->nm_info = NULL; 2814 kfree(nm_i); 2815 } 2816 2817 int __init create_node_manager_caches(void) 2818 { 2819 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2820 sizeof(struct nat_entry)); 2821 if (!nat_entry_slab) 2822 goto fail; 2823 2824 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2825 sizeof(struct free_nid)); 2826 if (!free_nid_slab) 2827 goto destroy_nat_entry; 2828 2829 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", 2830 sizeof(struct nat_entry_set)); 2831 if (!nat_entry_set_slab) 2832 goto destroy_free_nid; 2833 return 0; 2834 2835 destroy_free_nid: 2836 kmem_cache_destroy(free_nid_slab); 2837 destroy_nat_entry: 2838 kmem_cache_destroy(nat_entry_slab); 2839 fail: 2840 return -ENOMEM; 2841 } 2842 2843 void destroy_node_manager_caches(void) 2844 { 2845 kmem_cache_destroy(nat_entry_set_slab); 2846 kmem_cache_destroy(free_nid_slab); 2847 kmem_cache_destroy(nat_entry_slab); 2848 } 2849