1 /* 2 * fs/f2fs/node.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/mpage.h> 14 #include <linux/backing-dev.h> 15 #include <linux/blkdev.h> 16 #include <linux/pagevec.h> 17 #include <linux/swap.h> 18 19 #include "f2fs.h" 20 #include "node.h" 21 #include "segment.h" 22 23 static struct kmem_cache *nat_entry_slab; 24 static struct kmem_cache *free_nid_slab; 25 26 static void clear_node_page_dirty(struct page *page) 27 { 28 struct address_space *mapping = page->mapping; 29 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 30 unsigned int long flags; 31 32 if (PageDirty(page)) { 33 spin_lock_irqsave(&mapping->tree_lock, flags); 34 radix_tree_tag_clear(&mapping->page_tree, 35 page_index(page), 36 PAGECACHE_TAG_DIRTY); 37 spin_unlock_irqrestore(&mapping->tree_lock, flags); 38 39 clear_page_dirty_for_io(page); 40 dec_page_count(sbi, F2FS_DIRTY_NODES); 41 } 42 ClearPageUptodate(page); 43 } 44 45 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 46 { 47 pgoff_t index = current_nat_addr(sbi, nid); 48 return get_meta_page(sbi, index); 49 } 50 51 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 52 { 53 struct page *src_page; 54 struct page *dst_page; 55 pgoff_t src_off; 56 pgoff_t dst_off; 57 void *src_addr; 58 void *dst_addr; 59 struct f2fs_nm_info *nm_i = NM_I(sbi); 60 61 src_off = current_nat_addr(sbi, nid); 62 dst_off = next_nat_addr(sbi, src_off); 63 64 /* get current nat block page with lock */ 65 src_page = get_meta_page(sbi, src_off); 66 67 /* Dirty src_page means that it is already the new target NAT page. */ 68 if (PageDirty(src_page)) 69 return src_page; 70 71 dst_page = grab_meta_page(sbi, dst_off); 72 73 src_addr = page_address(src_page); 74 dst_addr = page_address(dst_page); 75 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); 76 set_page_dirty(dst_page); 77 f2fs_put_page(src_page, 1); 78 79 set_to_next_nat(nm_i, nid); 80 81 return dst_page; 82 } 83 84 /* 85 * Readahead NAT pages 86 */ 87 static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) 88 { 89 struct address_space *mapping = sbi->meta_inode->i_mapping; 90 struct f2fs_nm_info *nm_i = NM_I(sbi); 91 struct page *page; 92 pgoff_t index; 93 int i; 94 95 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { 96 if (nid >= nm_i->max_nid) 97 nid = 0; 98 index = current_nat_addr(sbi, nid); 99 100 page = grab_cache_page(mapping, index); 101 if (!page) 102 continue; 103 if (f2fs_readpage(sbi, page, index, READ)) { 104 f2fs_put_page(page, 1); 105 continue; 106 } 107 page_cache_release(page); 108 } 109 } 110 111 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 112 { 113 return radix_tree_lookup(&nm_i->nat_root, n); 114 } 115 116 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, 117 nid_t start, unsigned int nr, struct nat_entry **ep) 118 { 119 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); 120 } 121 122 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) 123 { 124 list_del(&e->list); 125 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); 126 nm_i->nat_cnt--; 127 kmem_cache_free(nat_entry_slab, e); 128 } 129 130 int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 131 { 132 struct f2fs_nm_info *nm_i = NM_I(sbi); 133 struct nat_entry *e; 134 int is_cp = 1; 135 136 read_lock(&nm_i->nat_tree_lock); 137 e = __lookup_nat_cache(nm_i, nid); 138 if (e && !e->checkpointed) 139 is_cp = 0; 140 read_unlock(&nm_i->nat_tree_lock); 141 return is_cp; 142 } 143 144 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 145 { 146 struct nat_entry *new; 147 148 new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); 149 if (!new) 150 return NULL; 151 if (radix_tree_insert(&nm_i->nat_root, nid, new)) { 152 kmem_cache_free(nat_entry_slab, new); 153 return NULL; 154 } 155 memset(new, 0, sizeof(struct nat_entry)); 156 nat_set_nid(new, nid); 157 list_add_tail(&new->list, &nm_i->nat_entries); 158 nm_i->nat_cnt++; 159 return new; 160 } 161 162 static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, 163 struct f2fs_nat_entry *ne) 164 { 165 struct nat_entry *e; 166 retry: 167 write_lock(&nm_i->nat_tree_lock); 168 e = __lookup_nat_cache(nm_i, nid); 169 if (!e) { 170 e = grab_nat_entry(nm_i, nid); 171 if (!e) { 172 write_unlock(&nm_i->nat_tree_lock); 173 goto retry; 174 } 175 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); 176 nat_set_ino(e, le32_to_cpu(ne->ino)); 177 nat_set_version(e, ne->version); 178 e->checkpointed = true; 179 } 180 write_unlock(&nm_i->nat_tree_lock); 181 } 182 183 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 184 block_t new_blkaddr) 185 { 186 struct f2fs_nm_info *nm_i = NM_I(sbi); 187 struct nat_entry *e; 188 retry: 189 write_lock(&nm_i->nat_tree_lock); 190 e = __lookup_nat_cache(nm_i, ni->nid); 191 if (!e) { 192 e = grab_nat_entry(nm_i, ni->nid); 193 if (!e) { 194 write_unlock(&nm_i->nat_tree_lock); 195 goto retry; 196 } 197 e->ni = *ni; 198 e->checkpointed = true; 199 BUG_ON(ni->blk_addr == NEW_ADDR); 200 } else if (new_blkaddr == NEW_ADDR) { 201 /* 202 * when nid is reallocated, 203 * previous nat entry can be remained in nat cache. 204 * So, reinitialize it with new information. 205 */ 206 e->ni = *ni; 207 BUG_ON(ni->blk_addr != NULL_ADDR); 208 } 209 210 if (new_blkaddr == NEW_ADDR) 211 e->checkpointed = false; 212 213 /* sanity check */ 214 BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); 215 BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && 216 new_blkaddr == NULL_ADDR); 217 BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && 218 new_blkaddr == NEW_ADDR); 219 BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && 220 nat_get_blkaddr(e) != NULL_ADDR && 221 new_blkaddr == NEW_ADDR); 222 223 /* increament version no as node is removed */ 224 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 225 unsigned char version = nat_get_version(e); 226 nat_set_version(e, inc_node_version(version)); 227 } 228 229 /* change address */ 230 nat_set_blkaddr(e, new_blkaddr); 231 __set_nat_cache_dirty(nm_i, e); 232 write_unlock(&nm_i->nat_tree_lock); 233 } 234 235 static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 236 { 237 struct f2fs_nm_info *nm_i = NM_I(sbi); 238 239 if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD) 240 return 0; 241 242 write_lock(&nm_i->nat_tree_lock); 243 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 244 struct nat_entry *ne; 245 ne = list_first_entry(&nm_i->nat_entries, 246 struct nat_entry, list); 247 __del_from_nat_cache(nm_i, ne); 248 nr_shrink--; 249 } 250 write_unlock(&nm_i->nat_tree_lock); 251 return nr_shrink; 252 } 253 254 /* 255 * This function returns always success 256 */ 257 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) 258 { 259 struct f2fs_nm_info *nm_i = NM_I(sbi); 260 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 261 struct f2fs_summary_block *sum = curseg->sum_blk; 262 nid_t start_nid = START_NID(nid); 263 struct f2fs_nat_block *nat_blk; 264 struct page *page = NULL; 265 struct f2fs_nat_entry ne; 266 struct nat_entry *e; 267 int i; 268 269 ni->nid = nid; 270 271 /* Check nat cache */ 272 read_lock(&nm_i->nat_tree_lock); 273 e = __lookup_nat_cache(nm_i, nid); 274 if (e) { 275 ni->ino = nat_get_ino(e); 276 ni->blk_addr = nat_get_blkaddr(e); 277 ni->version = nat_get_version(e); 278 } 279 read_unlock(&nm_i->nat_tree_lock); 280 if (e) 281 return; 282 283 /* Check current segment summary */ 284 mutex_lock(&curseg->curseg_mutex); 285 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); 286 if (i >= 0) { 287 ne = nat_in_journal(sum, i); 288 node_info_from_raw_nat(ni, &ne); 289 } 290 mutex_unlock(&curseg->curseg_mutex); 291 if (i >= 0) 292 goto cache; 293 294 /* Fill node_info from nat page */ 295 page = get_current_nat_page(sbi, start_nid); 296 nat_blk = (struct f2fs_nat_block *)page_address(page); 297 ne = nat_blk->entries[nid - start_nid]; 298 node_info_from_raw_nat(ni, &ne); 299 f2fs_put_page(page, 1); 300 cache: 301 /* cache nat entry */ 302 cache_nat_entry(NM_I(sbi), nid, &ne); 303 } 304 305 /* 306 * The maximum depth is four. 307 * Offset[0] will have raw inode offset. 308 */ 309 static int get_node_path(long block, int offset[4], unsigned int noffset[4]) 310 { 311 const long direct_index = ADDRS_PER_INODE; 312 const long direct_blks = ADDRS_PER_BLOCK; 313 const long dptrs_per_blk = NIDS_PER_BLOCK; 314 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 315 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; 316 int n = 0; 317 int level = 0; 318 319 noffset[0] = 0; 320 321 if (block < direct_index) { 322 offset[n++] = block; 323 level = 0; 324 goto got; 325 } 326 block -= direct_index; 327 if (block < direct_blks) { 328 offset[n++] = NODE_DIR1_BLOCK; 329 noffset[n] = 1; 330 offset[n++] = block; 331 level = 1; 332 goto got; 333 } 334 block -= direct_blks; 335 if (block < direct_blks) { 336 offset[n++] = NODE_DIR2_BLOCK; 337 noffset[n] = 2; 338 offset[n++] = block; 339 level = 1; 340 goto got; 341 } 342 block -= direct_blks; 343 if (block < indirect_blks) { 344 offset[n++] = NODE_IND1_BLOCK; 345 noffset[n] = 3; 346 offset[n++] = block / direct_blks; 347 noffset[n] = 4 + offset[n - 1]; 348 offset[n++] = block % direct_blks; 349 level = 2; 350 goto got; 351 } 352 block -= indirect_blks; 353 if (block < indirect_blks) { 354 offset[n++] = NODE_IND2_BLOCK; 355 noffset[n] = 4 + dptrs_per_blk; 356 offset[n++] = block / direct_blks; 357 noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; 358 offset[n++] = block % direct_blks; 359 level = 2; 360 goto got; 361 } 362 block -= indirect_blks; 363 if (block < dindirect_blks) { 364 offset[n++] = NODE_DIND_BLOCK; 365 noffset[n] = 5 + (dptrs_per_blk * 2); 366 offset[n++] = block / indirect_blks; 367 noffset[n] = 6 + (dptrs_per_blk * 2) + 368 offset[n - 1] * (dptrs_per_blk + 1); 369 offset[n++] = (block / direct_blks) % dptrs_per_blk; 370 noffset[n] = 7 + (dptrs_per_blk * 2) + 371 offset[n - 2] * (dptrs_per_blk + 1) + 372 offset[n - 1]; 373 offset[n++] = block % direct_blks; 374 level = 3; 375 goto got; 376 } else { 377 BUG(); 378 } 379 got: 380 return level; 381 } 382 383 /* 384 * Caller should call f2fs_put_dnode(dn). 385 */ 386 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) 387 { 388 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 389 struct page *npage[4]; 390 struct page *parent; 391 int offset[4]; 392 unsigned int noffset[4]; 393 nid_t nids[4]; 394 int level, i; 395 int err = 0; 396 397 level = get_node_path(index, offset, noffset); 398 399 nids[0] = dn->inode->i_ino; 400 npage[0] = get_node_page(sbi, nids[0]); 401 if (IS_ERR(npage[0])) 402 return PTR_ERR(npage[0]); 403 404 parent = npage[0]; 405 nids[1] = get_nid(parent, offset[0], true); 406 dn->inode_page = npage[0]; 407 dn->inode_page_locked = true; 408 409 /* get indirect or direct nodes */ 410 for (i = 1; i <= level; i++) { 411 bool done = false; 412 413 if (!nids[i] && !ro) { 414 mutex_lock_op(sbi, NODE_NEW); 415 416 /* alloc new node */ 417 if (!alloc_nid(sbi, &(nids[i]))) { 418 mutex_unlock_op(sbi, NODE_NEW); 419 err = -ENOSPC; 420 goto release_pages; 421 } 422 423 dn->nid = nids[i]; 424 npage[i] = new_node_page(dn, noffset[i]); 425 if (IS_ERR(npage[i])) { 426 alloc_nid_failed(sbi, nids[i]); 427 mutex_unlock_op(sbi, NODE_NEW); 428 err = PTR_ERR(npage[i]); 429 goto release_pages; 430 } 431 432 set_nid(parent, offset[i - 1], nids[i], i == 1); 433 alloc_nid_done(sbi, nids[i]); 434 mutex_unlock_op(sbi, NODE_NEW); 435 done = true; 436 } else if (ro && i == level && level > 1) { 437 npage[i] = get_node_page_ra(parent, offset[i - 1]); 438 if (IS_ERR(npage[i])) { 439 err = PTR_ERR(npage[i]); 440 goto release_pages; 441 } 442 done = true; 443 } 444 if (i == 1) { 445 dn->inode_page_locked = false; 446 unlock_page(parent); 447 } else { 448 f2fs_put_page(parent, 1); 449 } 450 451 if (!done) { 452 npage[i] = get_node_page(sbi, nids[i]); 453 if (IS_ERR(npage[i])) { 454 err = PTR_ERR(npage[i]); 455 f2fs_put_page(npage[0], 0); 456 goto release_out; 457 } 458 } 459 if (i < level) { 460 parent = npage[i]; 461 nids[i + 1] = get_nid(parent, offset[i], false); 462 } 463 } 464 dn->nid = nids[level]; 465 dn->ofs_in_node = offset[level]; 466 dn->node_page = npage[level]; 467 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); 468 return 0; 469 470 release_pages: 471 f2fs_put_page(parent, 1); 472 if (i > 1) 473 f2fs_put_page(npage[0], 0); 474 release_out: 475 dn->inode_page = NULL; 476 dn->node_page = NULL; 477 return err; 478 } 479 480 static void truncate_node(struct dnode_of_data *dn) 481 { 482 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 483 struct node_info ni; 484 485 get_node_info(sbi, dn->nid, &ni); 486 BUG_ON(ni.blk_addr == NULL_ADDR); 487 488 if (ni.blk_addr != NULL_ADDR) 489 invalidate_blocks(sbi, ni.blk_addr); 490 491 /* Deallocate node address */ 492 dec_valid_node_count(sbi, dn->inode, 1); 493 set_node_addr(sbi, &ni, NULL_ADDR); 494 495 if (dn->nid == dn->inode->i_ino) { 496 remove_orphan_inode(sbi, dn->nid); 497 dec_valid_inode_count(sbi); 498 } else { 499 sync_inode_page(dn); 500 } 501 502 clear_node_page_dirty(dn->node_page); 503 F2FS_SET_SB_DIRT(sbi); 504 505 f2fs_put_page(dn->node_page, 1); 506 dn->node_page = NULL; 507 } 508 509 static int truncate_dnode(struct dnode_of_data *dn) 510 { 511 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 512 struct page *page; 513 514 if (dn->nid == 0) 515 return 1; 516 517 /* get direct node */ 518 page = get_node_page(sbi, dn->nid); 519 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 520 return 1; 521 else if (IS_ERR(page)) 522 return PTR_ERR(page); 523 524 /* Make dnode_of_data for parameter */ 525 dn->node_page = page; 526 dn->ofs_in_node = 0; 527 truncate_data_blocks(dn); 528 truncate_node(dn); 529 return 1; 530 } 531 532 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 533 int ofs, int depth) 534 { 535 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 536 struct dnode_of_data rdn = *dn; 537 struct page *page; 538 struct f2fs_node *rn; 539 nid_t child_nid; 540 unsigned int child_nofs; 541 int freed = 0; 542 int i, ret; 543 544 if (dn->nid == 0) 545 return NIDS_PER_BLOCK + 1; 546 547 page = get_node_page(sbi, dn->nid); 548 if (IS_ERR(page)) 549 return PTR_ERR(page); 550 551 rn = (struct f2fs_node *)page_address(page); 552 if (depth < 3) { 553 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { 554 child_nid = le32_to_cpu(rn->in.nid[i]); 555 if (child_nid == 0) 556 continue; 557 rdn.nid = child_nid; 558 ret = truncate_dnode(&rdn); 559 if (ret < 0) 560 goto out_err; 561 set_nid(page, i, 0, false); 562 } 563 } else { 564 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; 565 for (i = ofs; i < NIDS_PER_BLOCK; i++) { 566 child_nid = le32_to_cpu(rn->in.nid[i]); 567 if (child_nid == 0) { 568 child_nofs += NIDS_PER_BLOCK + 1; 569 continue; 570 } 571 rdn.nid = child_nid; 572 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); 573 if (ret == (NIDS_PER_BLOCK + 1)) { 574 set_nid(page, i, 0, false); 575 child_nofs += ret; 576 } else if (ret < 0 && ret != -ENOENT) { 577 goto out_err; 578 } 579 } 580 freed = child_nofs; 581 } 582 583 if (!ofs) { 584 /* remove current indirect node */ 585 dn->node_page = page; 586 truncate_node(dn); 587 freed++; 588 } else { 589 f2fs_put_page(page, 1); 590 } 591 return freed; 592 593 out_err: 594 f2fs_put_page(page, 1); 595 return ret; 596 } 597 598 static int truncate_partial_nodes(struct dnode_of_data *dn, 599 struct f2fs_inode *ri, int *offset, int depth) 600 { 601 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 602 struct page *pages[2]; 603 nid_t nid[3]; 604 nid_t child_nid; 605 int err = 0; 606 int i; 607 int idx = depth - 2; 608 609 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 610 if (!nid[0]) 611 return 0; 612 613 /* get indirect nodes in the path */ 614 for (i = 0; i < depth - 1; i++) { 615 /* refernece count'll be increased */ 616 pages[i] = get_node_page(sbi, nid[i]); 617 if (IS_ERR(pages[i])) { 618 depth = i + 1; 619 err = PTR_ERR(pages[i]); 620 goto fail; 621 } 622 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 623 } 624 625 /* free direct nodes linked to a partial indirect node */ 626 for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { 627 child_nid = get_nid(pages[idx], i, false); 628 if (!child_nid) 629 continue; 630 dn->nid = child_nid; 631 err = truncate_dnode(dn); 632 if (err < 0) 633 goto fail; 634 set_nid(pages[idx], i, 0, false); 635 } 636 637 if (offset[depth - 1] == 0) { 638 dn->node_page = pages[idx]; 639 dn->nid = nid[idx]; 640 truncate_node(dn); 641 } else { 642 f2fs_put_page(pages[idx], 1); 643 } 644 offset[idx]++; 645 offset[depth - 1] = 0; 646 fail: 647 for (i = depth - 3; i >= 0; i--) 648 f2fs_put_page(pages[i], 1); 649 return err; 650 } 651 652 /* 653 * All the block addresses of data and nodes should be nullified. 654 */ 655 int truncate_inode_blocks(struct inode *inode, pgoff_t from) 656 { 657 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 658 int err = 0, cont = 1; 659 int level, offset[4], noffset[4]; 660 unsigned int nofs; 661 struct f2fs_node *rn; 662 struct dnode_of_data dn; 663 struct page *page; 664 665 level = get_node_path(from, offset, noffset); 666 667 page = get_node_page(sbi, inode->i_ino); 668 if (IS_ERR(page)) 669 return PTR_ERR(page); 670 671 set_new_dnode(&dn, inode, page, NULL, 0); 672 unlock_page(page); 673 674 rn = page_address(page); 675 switch (level) { 676 case 0: 677 case 1: 678 nofs = noffset[1]; 679 break; 680 case 2: 681 nofs = noffset[1]; 682 if (!offset[level - 1]) 683 goto skip_partial; 684 err = truncate_partial_nodes(&dn, &rn->i, offset, level); 685 if (err < 0 && err != -ENOENT) 686 goto fail; 687 nofs += 1 + NIDS_PER_BLOCK; 688 break; 689 case 3: 690 nofs = 5 + 2 * NIDS_PER_BLOCK; 691 if (!offset[level - 1]) 692 goto skip_partial; 693 err = truncate_partial_nodes(&dn, &rn->i, offset, level); 694 if (err < 0 && err != -ENOENT) 695 goto fail; 696 break; 697 default: 698 BUG(); 699 } 700 701 skip_partial: 702 while (cont) { 703 dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); 704 switch (offset[0]) { 705 case NODE_DIR1_BLOCK: 706 case NODE_DIR2_BLOCK: 707 err = truncate_dnode(&dn); 708 break; 709 710 case NODE_IND1_BLOCK: 711 case NODE_IND2_BLOCK: 712 err = truncate_nodes(&dn, nofs, offset[1], 2); 713 break; 714 715 case NODE_DIND_BLOCK: 716 err = truncate_nodes(&dn, nofs, offset[1], 3); 717 cont = 0; 718 break; 719 720 default: 721 BUG(); 722 } 723 if (err < 0 && err != -ENOENT) 724 goto fail; 725 if (offset[1] == 0 && 726 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { 727 lock_page(page); 728 wait_on_page_writeback(page); 729 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 730 set_page_dirty(page); 731 unlock_page(page); 732 } 733 offset[1] = 0; 734 offset[0]++; 735 nofs += err; 736 } 737 fail: 738 f2fs_put_page(page, 0); 739 return err > 0 ? 0 : err; 740 } 741 742 int remove_inode_page(struct inode *inode) 743 { 744 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 745 struct page *page; 746 nid_t ino = inode->i_ino; 747 struct dnode_of_data dn; 748 749 mutex_lock_op(sbi, NODE_TRUNC); 750 page = get_node_page(sbi, ino); 751 if (IS_ERR(page)) { 752 mutex_unlock_op(sbi, NODE_TRUNC); 753 return PTR_ERR(page); 754 } 755 756 if (F2FS_I(inode)->i_xattr_nid) { 757 nid_t nid = F2FS_I(inode)->i_xattr_nid; 758 struct page *npage = get_node_page(sbi, nid); 759 760 if (IS_ERR(npage)) { 761 mutex_unlock_op(sbi, NODE_TRUNC); 762 return PTR_ERR(npage); 763 } 764 765 F2FS_I(inode)->i_xattr_nid = 0; 766 set_new_dnode(&dn, inode, page, npage, nid); 767 dn.inode_page_locked = 1; 768 truncate_node(&dn); 769 } 770 if (inode->i_blocks == 1) { 771 /* inernally call f2fs_put_page() */ 772 set_new_dnode(&dn, inode, page, page, ino); 773 truncate_node(&dn); 774 } else if (inode->i_blocks == 0) { 775 struct node_info ni; 776 get_node_info(sbi, inode->i_ino, &ni); 777 778 /* called after f2fs_new_inode() is failed */ 779 BUG_ON(ni.blk_addr != NULL_ADDR); 780 f2fs_put_page(page, 1); 781 } else { 782 BUG(); 783 } 784 mutex_unlock_op(sbi, NODE_TRUNC); 785 return 0; 786 } 787 788 int new_inode_page(struct inode *inode, struct dentry *dentry) 789 { 790 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 791 struct page *page; 792 struct dnode_of_data dn; 793 794 /* allocate inode page for new inode */ 795 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 796 mutex_lock_op(sbi, NODE_NEW); 797 page = new_node_page(&dn, 0); 798 init_dent_inode(dentry, page); 799 mutex_unlock_op(sbi, NODE_NEW); 800 if (IS_ERR(page)) 801 return PTR_ERR(page); 802 f2fs_put_page(page, 1); 803 return 0; 804 } 805 806 struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) 807 { 808 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 809 struct address_space *mapping = sbi->node_inode->i_mapping; 810 struct node_info old_ni, new_ni; 811 struct page *page; 812 int err; 813 814 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) 815 return ERR_PTR(-EPERM); 816 817 page = grab_cache_page(mapping, dn->nid); 818 if (!page) 819 return ERR_PTR(-ENOMEM); 820 821 get_node_info(sbi, dn->nid, &old_ni); 822 823 SetPageUptodate(page); 824 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 825 826 /* Reinitialize old_ni with new node page */ 827 BUG_ON(old_ni.blk_addr != NULL_ADDR); 828 new_ni = old_ni; 829 new_ni.ino = dn->inode->i_ino; 830 831 if (!inc_valid_node_count(sbi, dn->inode, 1)) { 832 err = -ENOSPC; 833 goto fail; 834 } 835 set_node_addr(sbi, &new_ni, NEW_ADDR); 836 837 dn->node_page = page; 838 sync_inode_page(dn); 839 set_page_dirty(page); 840 set_cold_node(dn->inode, page); 841 if (ofs == 0) 842 inc_valid_inode_count(sbi); 843 844 return page; 845 846 fail: 847 f2fs_put_page(page, 1); 848 return ERR_PTR(err); 849 } 850 851 static int read_node_page(struct page *page, int type) 852 { 853 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 854 struct node_info ni; 855 856 get_node_info(sbi, page->index, &ni); 857 858 if (ni.blk_addr == NULL_ADDR) 859 return -ENOENT; 860 return f2fs_readpage(sbi, page, ni.blk_addr, type); 861 } 862 863 /* 864 * Readahead a node page 865 */ 866 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 867 { 868 struct address_space *mapping = sbi->node_inode->i_mapping; 869 struct page *apage; 870 871 apage = find_get_page(mapping, nid); 872 if (apage && PageUptodate(apage)) 873 goto release_out; 874 f2fs_put_page(apage, 0); 875 876 apage = grab_cache_page(mapping, nid); 877 if (!apage) 878 return; 879 880 if (read_node_page(apage, READA)) 881 goto unlock_out; 882 883 page_cache_release(apage); 884 return; 885 886 unlock_out: 887 unlock_page(apage); 888 release_out: 889 page_cache_release(apage); 890 } 891 892 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 893 { 894 int err; 895 struct page *page; 896 struct address_space *mapping = sbi->node_inode->i_mapping; 897 898 page = grab_cache_page(mapping, nid); 899 if (!page) 900 return ERR_PTR(-ENOMEM); 901 902 err = read_node_page(page, READ_SYNC); 903 if (err) { 904 f2fs_put_page(page, 1); 905 return ERR_PTR(err); 906 } 907 908 BUG_ON(nid != nid_of_node(page)); 909 mark_page_accessed(page); 910 return page; 911 } 912 913 /* 914 * Return a locked page for the desired node page. 915 * And, readahead MAX_RA_NODE number of node pages. 916 */ 917 struct page *get_node_page_ra(struct page *parent, int start) 918 { 919 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 920 struct address_space *mapping = sbi->node_inode->i_mapping; 921 int i, end; 922 int err = 0; 923 nid_t nid; 924 struct page *page; 925 926 /* First, try getting the desired direct node. */ 927 nid = get_nid(parent, start, false); 928 if (!nid) 929 return ERR_PTR(-ENOENT); 930 931 page = find_get_page(mapping, nid); 932 if (page && PageUptodate(page)) 933 goto page_hit; 934 f2fs_put_page(page, 0); 935 936 repeat: 937 page = grab_cache_page(mapping, nid); 938 if (!page) 939 return ERR_PTR(-ENOMEM); 940 941 err = read_node_page(page, READA); 942 if (err) { 943 f2fs_put_page(page, 1); 944 return ERR_PTR(err); 945 } 946 947 /* Then, try readahead for siblings of the desired node */ 948 end = start + MAX_RA_NODE; 949 end = min(end, NIDS_PER_BLOCK); 950 for (i = start + 1; i < end; i++) { 951 nid = get_nid(parent, i, false); 952 if (!nid) 953 continue; 954 ra_node_page(sbi, nid); 955 } 956 957 page_hit: 958 lock_page(page); 959 if (PageError(page)) { 960 f2fs_put_page(page, 1); 961 return ERR_PTR(-EIO); 962 } 963 964 /* Has the page been truncated? */ 965 if (page->mapping != mapping) { 966 f2fs_put_page(page, 1); 967 goto repeat; 968 } 969 return page; 970 } 971 972 void sync_inode_page(struct dnode_of_data *dn) 973 { 974 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { 975 update_inode(dn->inode, dn->node_page); 976 } else if (dn->inode_page) { 977 if (!dn->inode_page_locked) 978 lock_page(dn->inode_page); 979 update_inode(dn->inode, dn->inode_page); 980 if (!dn->inode_page_locked) 981 unlock_page(dn->inode_page); 982 } else { 983 f2fs_write_inode(dn->inode, NULL); 984 } 985 } 986 987 int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 988 struct writeback_control *wbc) 989 { 990 struct address_space *mapping = sbi->node_inode->i_mapping; 991 pgoff_t index, end; 992 struct pagevec pvec; 993 int step = ino ? 2 : 0; 994 int nwritten = 0, wrote = 0; 995 996 pagevec_init(&pvec, 0); 997 998 next_step: 999 index = 0; 1000 end = LONG_MAX; 1001 1002 while (index <= end) { 1003 int i, nr_pages; 1004 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1005 PAGECACHE_TAG_DIRTY, 1006 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1007 if (nr_pages == 0) 1008 break; 1009 1010 for (i = 0; i < nr_pages; i++) { 1011 struct page *page = pvec.pages[i]; 1012 1013 /* 1014 * flushing sequence with step: 1015 * 0. indirect nodes 1016 * 1. dentry dnodes 1017 * 2. file dnodes 1018 */ 1019 if (step == 0 && IS_DNODE(page)) 1020 continue; 1021 if (step == 1 && (!IS_DNODE(page) || 1022 is_cold_node(page))) 1023 continue; 1024 if (step == 2 && (!IS_DNODE(page) || 1025 !is_cold_node(page))) 1026 continue; 1027 1028 /* 1029 * If an fsync mode, 1030 * we should not skip writing node pages. 1031 */ 1032 if (ino && ino_of_node(page) == ino) 1033 lock_page(page); 1034 else if (!trylock_page(page)) 1035 continue; 1036 1037 if (unlikely(page->mapping != mapping)) { 1038 continue_unlock: 1039 unlock_page(page); 1040 continue; 1041 } 1042 if (ino && ino_of_node(page) != ino) 1043 goto continue_unlock; 1044 1045 if (!PageDirty(page)) { 1046 /* someone wrote it for us */ 1047 goto continue_unlock; 1048 } 1049 1050 if (!clear_page_dirty_for_io(page)) 1051 goto continue_unlock; 1052 1053 /* called by fsync() */ 1054 if (ino && IS_DNODE(page)) { 1055 int mark = !is_checkpointed_node(sbi, ino); 1056 set_fsync_mark(page, 1); 1057 if (IS_INODE(page)) 1058 set_dentry_mark(page, mark); 1059 nwritten++; 1060 } else { 1061 set_fsync_mark(page, 0); 1062 set_dentry_mark(page, 0); 1063 } 1064 mapping->a_ops->writepage(page, wbc); 1065 wrote++; 1066 1067 if (--wbc->nr_to_write == 0) 1068 break; 1069 } 1070 pagevec_release(&pvec); 1071 cond_resched(); 1072 1073 if (wbc->nr_to_write == 0) { 1074 step = 2; 1075 break; 1076 } 1077 } 1078 1079 if (step < 2) { 1080 step++; 1081 goto next_step; 1082 } 1083 1084 if (wrote) 1085 f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); 1086 1087 return nwritten; 1088 } 1089 1090 static int f2fs_write_node_page(struct page *page, 1091 struct writeback_control *wbc) 1092 { 1093 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1094 nid_t nid; 1095 unsigned int nofs; 1096 block_t new_addr; 1097 struct node_info ni; 1098 1099 if (wbc->for_reclaim) { 1100 dec_page_count(sbi, F2FS_DIRTY_NODES); 1101 wbc->pages_skipped++; 1102 set_page_dirty(page); 1103 return AOP_WRITEPAGE_ACTIVATE; 1104 } 1105 1106 wait_on_page_writeback(page); 1107 1108 mutex_lock_op(sbi, NODE_WRITE); 1109 1110 /* get old block addr of this node page */ 1111 nid = nid_of_node(page); 1112 nofs = ofs_of_node(page); 1113 BUG_ON(page->index != nid); 1114 1115 get_node_info(sbi, nid, &ni); 1116 1117 /* This page is already truncated */ 1118 if (ni.blk_addr == NULL_ADDR) 1119 return 0; 1120 1121 set_page_writeback(page); 1122 1123 /* insert node offset */ 1124 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); 1125 set_node_addr(sbi, &ni, new_addr); 1126 dec_page_count(sbi, F2FS_DIRTY_NODES); 1127 1128 mutex_unlock_op(sbi, NODE_WRITE); 1129 unlock_page(page); 1130 return 0; 1131 } 1132 1133 static int f2fs_write_node_pages(struct address_space *mapping, 1134 struct writeback_control *wbc) 1135 { 1136 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1137 struct block_device *bdev = sbi->sb->s_bdev; 1138 long nr_to_write = wbc->nr_to_write; 1139 1140 if (wbc->for_kupdate) 1141 return 0; 1142 1143 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0) 1144 return 0; 1145 1146 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { 1147 write_checkpoint(sbi, false, false); 1148 return 0; 1149 } 1150 1151 /* if mounting is failed, skip writing node pages */ 1152 wbc->nr_to_write = bio_get_nr_vecs(bdev); 1153 sync_node_pages(sbi, 0, wbc); 1154 wbc->nr_to_write = nr_to_write - 1155 (bio_get_nr_vecs(bdev) - wbc->nr_to_write); 1156 return 0; 1157 } 1158 1159 static int f2fs_set_node_page_dirty(struct page *page) 1160 { 1161 struct address_space *mapping = page->mapping; 1162 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1163 1164 SetPageUptodate(page); 1165 if (!PageDirty(page)) { 1166 __set_page_dirty_nobuffers(page); 1167 inc_page_count(sbi, F2FS_DIRTY_NODES); 1168 SetPagePrivate(page); 1169 return 1; 1170 } 1171 return 0; 1172 } 1173 1174 static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) 1175 { 1176 struct inode *inode = page->mapping->host; 1177 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1178 if (PageDirty(page)) 1179 dec_page_count(sbi, F2FS_DIRTY_NODES); 1180 ClearPagePrivate(page); 1181 } 1182 1183 static int f2fs_release_node_page(struct page *page, gfp_t wait) 1184 { 1185 ClearPagePrivate(page); 1186 return 0; 1187 } 1188 1189 /* 1190 * Structure of the f2fs node operations 1191 */ 1192 const struct address_space_operations f2fs_node_aops = { 1193 .writepage = f2fs_write_node_page, 1194 .writepages = f2fs_write_node_pages, 1195 .set_page_dirty = f2fs_set_node_page_dirty, 1196 .invalidatepage = f2fs_invalidate_node_page, 1197 .releasepage = f2fs_release_node_page, 1198 }; 1199 1200 static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) 1201 { 1202 struct list_head *this; 1203 struct free_nid *i = NULL; 1204 list_for_each(this, head) { 1205 i = list_entry(this, struct free_nid, list); 1206 if (i->nid == n) 1207 break; 1208 i = NULL; 1209 } 1210 return i; 1211 } 1212 1213 static void __del_from_free_nid_list(struct free_nid *i) 1214 { 1215 list_del(&i->list); 1216 kmem_cache_free(free_nid_slab, i); 1217 } 1218 1219 static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1220 { 1221 struct free_nid *i; 1222 1223 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) 1224 return 0; 1225 retry: 1226 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1227 if (!i) { 1228 cond_resched(); 1229 goto retry; 1230 } 1231 i->nid = nid; 1232 i->state = NID_NEW; 1233 1234 spin_lock(&nm_i->free_nid_list_lock); 1235 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { 1236 spin_unlock(&nm_i->free_nid_list_lock); 1237 kmem_cache_free(free_nid_slab, i); 1238 return 0; 1239 } 1240 list_add_tail(&i->list, &nm_i->free_nid_list); 1241 nm_i->fcnt++; 1242 spin_unlock(&nm_i->free_nid_list_lock); 1243 return 1; 1244 } 1245 1246 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1247 { 1248 struct free_nid *i; 1249 spin_lock(&nm_i->free_nid_list_lock); 1250 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1251 if (i && i->state == NID_NEW) { 1252 __del_from_free_nid_list(i); 1253 nm_i->fcnt--; 1254 } 1255 spin_unlock(&nm_i->free_nid_list_lock); 1256 } 1257 1258 static int scan_nat_page(struct f2fs_nm_info *nm_i, 1259 struct page *nat_page, nid_t start_nid) 1260 { 1261 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1262 block_t blk_addr; 1263 int fcnt = 0; 1264 int i; 1265 1266 /* 0 nid should not be used */ 1267 if (start_nid == 0) 1268 ++start_nid; 1269 1270 i = start_nid % NAT_ENTRY_PER_BLOCK; 1271 1272 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1273 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1274 BUG_ON(blk_addr == NEW_ADDR); 1275 if (blk_addr == NULL_ADDR) 1276 fcnt += add_free_nid(nm_i, start_nid); 1277 } 1278 return fcnt; 1279 } 1280 1281 static void build_free_nids(struct f2fs_sb_info *sbi) 1282 { 1283 struct free_nid *fnid, *next_fnid; 1284 struct f2fs_nm_info *nm_i = NM_I(sbi); 1285 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1286 struct f2fs_summary_block *sum = curseg->sum_blk; 1287 nid_t nid = 0; 1288 bool is_cycled = false; 1289 int fcnt = 0; 1290 int i; 1291 1292 nid = nm_i->next_scan_nid; 1293 nm_i->init_scan_nid = nid; 1294 1295 ra_nat_pages(sbi, nid); 1296 1297 while (1) { 1298 struct page *page = get_current_nat_page(sbi, nid); 1299 1300 fcnt += scan_nat_page(nm_i, page, nid); 1301 f2fs_put_page(page, 1); 1302 1303 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1304 1305 if (nid >= nm_i->max_nid) { 1306 nid = 0; 1307 is_cycled = true; 1308 } 1309 if (fcnt > MAX_FREE_NIDS) 1310 break; 1311 if (is_cycled && nm_i->init_scan_nid <= nid) 1312 break; 1313 } 1314 1315 nm_i->next_scan_nid = nid; 1316 1317 /* find free nids from current sum_pages */ 1318 mutex_lock(&curseg->curseg_mutex); 1319 for (i = 0; i < nats_in_cursum(sum); i++) { 1320 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); 1321 nid = le32_to_cpu(nid_in_journal(sum, i)); 1322 if (addr == NULL_ADDR) 1323 add_free_nid(nm_i, nid); 1324 else 1325 remove_free_nid(nm_i, nid); 1326 } 1327 mutex_unlock(&curseg->curseg_mutex); 1328 1329 /* remove the free nids from current allocated nids */ 1330 list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) { 1331 struct nat_entry *ne; 1332 1333 read_lock(&nm_i->nat_tree_lock); 1334 ne = __lookup_nat_cache(nm_i, fnid->nid); 1335 if (ne && nat_get_blkaddr(ne) != NULL_ADDR) 1336 remove_free_nid(nm_i, fnid->nid); 1337 read_unlock(&nm_i->nat_tree_lock); 1338 } 1339 } 1340 1341 /* 1342 * If this function returns success, caller can obtain a new nid 1343 * from second parameter of this function. 1344 * The returned nid could be used ino as well as nid when inode is created. 1345 */ 1346 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) 1347 { 1348 struct f2fs_nm_info *nm_i = NM_I(sbi); 1349 struct free_nid *i = NULL; 1350 struct list_head *this; 1351 retry: 1352 mutex_lock(&nm_i->build_lock); 1353 if (!nm_i->fcnt) { 1354 /* scan NAT in order to build free nid list */ 1355 build_free_nids(sbi); 1356 if (!nm_i->fcnt) { 1357 mutex_unlock(&nm_i->build_lock); 1358 return false; 1359 } 1360 } 1361 mutex_unlock(&nm_i->build_lock); 1362 1363 /* 1364 * We check fcnt again since previous check is racy as 1365 * we didn't hold free_nid_list_lock. So other thread 1366 * could consume all of free nids. 1367 */ 1368 spin_lock(&nm_i->free_nid_list_lock); 1369 if (!nm_i->fcnt) { 1370 spin_unlock(&nm_i->free_nid_list_lock); 1371 goto retry; 1372 } 1373 1374 BUG_ON(list_empty(&nm_i->free_nid_list)); 1375 list_for_each(this, &nm_i->free_nid_list) { 1376 i = list_entry(this, struct free_nid, list); 1377 if (i->state == NID_NEW) 1378 break; 1379 } 1380 1381 BUG_ON(i->state != NID_NEW); 1382 *nid = i->nid; 1383 i->state = NID_ALLOC; 1384 nm_i->fcnt--; 1385 spin_unlock(&nm_i->free_nid_list_lock); 1386 return true; 1387 } 1388 1389 /* 1390 * alloc_nid() should be called prior to this function. 1391 */ 1392 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) 1393 { 1394 struct f2fs_nm_info *nm_i = NM_I(sbi); 1395 struct free_nid *i; 1396 1397 spin_lock(&nm_i->free_nid_list_lock); 1398 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1399 if (i) { 1400 BUG_ON(i->state != NID_ALLOC); 1401 __del_from_free_nid_list(i); 1402 } 1403 spin_unlock(&nm_i->free_nid_list_lock); 1404 } 1405 1406 /* 1407 * alloc_nid() should be called prior to this function. 1408 */ 1409 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) 1410 { 1411 alloc_nid_done(sbi, nid); 1412 add_free_nid(NM_I(sbi), nid); 1413 } 1414 1415 void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, 1416 struct f2fs_summary *sum, struct node_info *ni, 1417 block_t new_blkaddr) 1418 { 1419 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); 1420 set_node_addr(sbi, ni, new_blkaddr); 1421 clear_node_page_dirty(page); 1422 } 1423 1424 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1425 { 1426 struct address_space *mapping = sbi->node_inode->i_mapping; 1427 struct f2fs_node *src, *dst; 1428 nid_t ino = ino_of_node(page); 1429 struct node_info old_ni, new_ni; 1430 struct page *ipage; 1431 1432 ipage = grab_cache_page(mapping, ino); 1433 if (!ipage) 1434 return -ENOMEM; 1435 1436 /* Should not use this inode from free nid list */ 1437 remove_free_nid(NM_I(sbi), ino); 1438 1439 get_node_info(sbi, ino, &old_ni); 1440 SetPageUptodate(ipage); 1441 fill_node_footer(ipage, ino, ino, 0, true); 1442 1443 src = (struct f2fs_node *)page_address(page); 1444 dst = (struct f2fs_node *)page_address(ipage); 1445 1446 memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); 1447 dst->i.i_size = 0; 1448 dst->i.i_blocks = cpu_to_le64(1); 1449 dst->i.i_links = cpu_to_le32(1); 1450 dst->i.i_xattr_nid = 0; 1451 1452 new_ni = old_ni; 1453 new_ni.ino = ino; 1454 1455 set_node_addr(sbi, &new_ni, NEW_ADDR); 1456 inc_valid_inode_count(sbi); 1457 1458 f2fs_put_page(ipage, 1); 1459 return 0; 1460 } 1461 1462 int restore_node_summary(struct f2fs_sb_info *sbi, 1463 unsigned int segno, struct f2fs_summary_block *sum) 1464 { 1465 struct f2fs_node *rn; 1466 struct f2fs_summary *sum_entry; 1467 struct page *page; 1468 block_t addr; 1469 int i, last_offset; 1470 1471 /* alloc temporal page for read node */ 1472 page = alloc_page(GFP_NOFS | __GFP_ZERO); 1473 if (IS_ERR(page)) 1474 return PTR_ERR(page); 1475 lock_page(page); 1476 1477 /* scan the node segment */ 1478 last_offset = sbi->blocks_per_seg; 1479 addr = START_BLOCK(sbi, segno); 1480 sum_entry = &sum->entries[0]; 1481 1482 for (i = 0; i < last_offset; i++, sum_entry++) { 1483 if (f2fs_readpage(sbi, page, addr, READ_SYNC)) 1484 goto out; 1485 1486 rn = (struct f2fs_node *)page_address(page); 1487 sum_entry->nid = rn->footer.nid; 1488 sum_entry->version = 0; 1489 sum_entry->ofs_in_node = 0; 1490 addr++; 1491 1492 /* 1493 * In order to read next node page, 1494 * we must clear PageUptodate flag. 1495 */ 1496 ClearPageUptodate(page); 1497 } 1498 out: 1499 unlock_page(page); 1500 __free_pages(page, 0); 1501 return 0; 1502 } 1503 1504 static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1505 { 1506 struct f2fs_nm_info *nm_i = NM_I(sbi); 1507 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1508 struct f2fs_summary_block *sum = curseg->sum_blk; 1509 int i; 1510 1511 mutex_lock(&curseg->curseg_mutex); 1512 1513 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { 1514 mutex_unlock(&curseg->curseg_mutex); 1515 return false; 1516 } 1517 1518 for (i = 0; i < nats_in_cursum(sum); i++) { 1519 struct nat_entry *ne; 1520 struct f2fs_nat_entry raw_ne; 1521 nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); 1522 1523 raw_ne = nat_in_journal(sum, i); 1524 retry: 1525 write_lock(&nm_i->nat_tree_lock); 1526 ne = __lookup_nat_cache(nm_i, nid); 1527 if (ne) { 1528 __set_nat_cache_dirty(nm_i, ne); 1529 write_unlock(&nm_i->nat_tree_lock); 1530 continue; 1531 } 1532 ne = grab_nat_entry(nm_i, nid); 1533 if (!ne) { 1534 write_unlock(&nm_i->nat_tree_lock); 1535 goto retry; 1536 } 1537 nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); 1538 nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); 1539 nat_set_version(ne, raw_ne.version); 1540 __set_nat_cache_dirty(nm_i, ne); 1541 write_unlock(&nm_i->nat_tree_lock); 1542 } 1543 update_nats_in_cursum(sum, -i); 1544 mutex_unlock(&curseg->curseg_mutex); 1545 return true; 1546 } 1547 1548 /* 1549 * This function is called during the checkpointing process. 1550 */ 1551 void flush_nat_entries(struct f2fs_sb_info *sbi) 1552 { 1553 struct f2fs_nm_info *nm_i = NM_I(sbi); 1554 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1555 struct f2fs_summary_block *sum = curseg->sum_blk; 1556 struct list_head *cur, *n; 1557 struct page *page = NULL; 1558 struct f2fs_nat_block *nat_blk = NULL; 1559 nid_t start_nid = 0, end_nid = 0; 1560 bool flushed; 1561 1562 flushed = flush_nats_in_journal(sbi); 1563 1564 if (!flushed) 1565 mutex_lock(&curseg->curseg_mutex); 1566 1567 /* 1) flush dirty nat caches */ 1568 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { 1569 struct nat_entry *ne; 1570 nid_t nid; 1571 struct f2fs_nat_entry raw_ne; 1572 int offset = -1; 1573 block_t old_blkaddr, new_blkaddr; 1574 1575 ne = list_entry(cur, struct nat_entry, list); 1576 nid = nat_get_nid(ne); 1577 1578 if (nat_get_blkaddr(ne) == NEW_ADDR) 1579 continue; 1580 if (flushed) 1581 goto to_nat_page; 1582 1583 /* if there is room for nat enries in curseg->sumpage */ 1584 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1585 if (offset >= 0) { 1586 raw_ne = nat_in_journal(sum, offset); 1587 old_blkaddr = le32_to_cpu(raw_ne.block_addr); 1588 goto flush_now; 1589 } 1590 to_nat_page: 1591 if (!page || (start_nid > nid || nid > end_nid)) { 1592 if (page) { 1593 f2fs_put_page(page, 1); 1594 page = NULL; 1595 } 1596 start_nid = START_NID(nid); 1597 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; 1598 1599 /* 1600 * get nat block with dirty flag, increased reference 1601 * count, mapped and lock 1602 */ 1603 page = get_next_nat_page(sbi, start_nid); 1604 nat_blk = page_address(page); 1605 } 1606 1607 BUG_ON(!nat_blk); 1608 raw_ne = nat_blk->entries[nid - start_nid]; 1609 old_blkaddr = le32_to_cpu(raw_ne.block_addr); 1610 flush_now: 1611 new_blkaddr = nat_get_blkaddr(ne); 1612 1613 raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); 1614 raw_ne.block_addr = cpu_to_le32(new_blkaddr); 1615 raw_ne.version = nat_get_version(ne); 1616 1617 if (offset < 0) { 1618 nat_blk->entries[nid - start_nid] = raw_ne; 1619 } else { 1620 nat_in_journal(sum, offset) = raw_ne; 1621 nid_in_journal(sum, offset) = cpu_to_le32(nid); 1622 } 1623 1624 if (nat_get_blkaddr(ne) == NULL_ADDR) { 1625 write_lock(&nm_i->nat_tree_lock); 1626 __del_from_nat_cache(nm_i, ne); 1627 write_unlock(&nm_i->nat_tree_lock); 1628 1629 /* We can reuse this freed nid at this point */ 1630 add_free_nid(NM_I(sbi), nid); 1631 } else { 1632 write_lock(&nm_i->nat_tree_lock); 1633 __clear_nat_cache_dirty(nm_i, ne); 1634 ne->checkpointed = true; 1635 write_unlock(&nm_i->nat_tree_lock); 1636 } 1637 } 1638 if (!flushed) 1639 mutex_unlock(&curseg->curseg_mutex); 1640 f2fs_put_page(page, 1); 1641 1642 /* 2) shrink nat caches if necessary */ 1643 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); 1644 } 1645 1646 static int init_node_manager(struct f2fs_sb_info *sbi) 1647 { 1648 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 1649 struct f2fs_nm_info *nm_i = NM_I(sbi); 1650 unsigned char *version_bitmap; 1651 unsigned int nat_segs, nat_blocks; 1652 1653 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 1654 1655 /* segment_count_nat includes pair segment so divide to 2. */ 1656 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 1657 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 1658 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 1659 nm_i->fcnt = 0; 1660 nm_i->nat_cnt = 0; 1661 1662 INIT_LIST_HEAD(&nm_i->free_nid_list); 1663 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1664 INIT_LIST_HEAD(&nm_i->nat_entries); 1665 INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1666 1667 mutex_init(&nm_i->build_lock); 1668 spin_lock_init(&nm_i->free_nid_list_lock); 1669 rwlock_init(&nm_i->nat_tree_lock); 1670 1671 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 1672 nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 1673 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 1674 1675 nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL); 1676 if (!nm_i->nat_bitmap) 1677 return -ENOMEM; 1678 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); 1679 if (!version_bitmap) 1680 return -EFAULT; 1681 1682 /* copy version bitmap */ 1683 memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size); 1684 return 0; 1685 } 1686 1687 int build_node_manager(struct f2fs_sb_info *sbi) 1688 { 1689 int err; 1690 1691 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); 1692 if (!sbi->nm_info) 1693 return -ENOMEM; 1694 1695 err = init_node_manager(sbi); 1696 if (err) 1697 return err; 1698 1699 build_free_nids(sbi); 1700 return 0; 1701 } 1702 1703 void destroy_node_manager(struct f2fs_sb_info *sbi) 1704 { 1705 struct f2fs_nm_info *nm_i = NM_I(sbi); 1706 struct free_nid *i, *next_i; 1707 struct nat_entry *natvec[NATVEC_SIZE]; 1708 nid_t nid = 0; 1709 unsigned int found; 1710 1711 if (!nm_i) 1712 return; 1713 1714 /* destroy free nid list */ 1715 spin_lock(&nm_i->free_nid_list_lock); 1716 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 1717 BUG_ON(i->state == NID_ALLOC); 1718 __del_from_free_nid_list(i); 1719 nm_i->fcnt--; 1720 } 1721 BUG_ON(nm_i->fcnt); 1722 spin_unlock(&nm_i->free_nid_list_lock); 1723 1724 /* destroy nat cache */ 1725 write_lock(&nm_i->nat_tree_lock); 1726 while ((found = __gang_lookup_nat_cache(nm_i, 1727 nid, NATVEC_SIZE, natvec))) { 1728 unsigned idx; 1729 for (idx = 0; idx < found; idx++) { 1730 struct nat_entry *e = natvec[idx]; 1731 nid = nat_get_nid(e) + 1; 1732 __del_from_nat_cache(nm_i, e); 1733 } 1734 } 1735 BUG_ON(nm_i->nat_cnt); 1736 write_unlock(&nm_i->nat_tree_lock); 1737 1738 kfree(nm_i->nat_bitmap); 1739 sbi->nm_info = NULL; 1740 kfree(nm_i); 1741 } 1742 1743 int create_node_manager_caches(void) 1744 { 1745 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1746 sizeof(struct nat_entry), NULL); 1747 if (!nat_entry_slab) 1748 return -ENOMEM; 1749 1750 free_nid_slab = f2fs_kmem_cache_create("free_nid", 1751 sizeof(struct free_nid), NULL); 1752 if (!free_nid_slab) { 1753 kmem_cache_destroy(nat_entry_slab); 1754 return -ENOMEM; 1755 } 1756 return 0; 1757 } 1758 1759 void destroy_node_manager_caches(void) 1760 { 1761 kmem_cache_destroy(free_nid_slab); 1762 kmem_cache_destroy(nat_entry_slab); 1763 } 1764