1 /* 2 * fs/f2fs/node.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/mpage.h> 14 #include <linux/backing-dev.h> 15 #include <linux/blkdev.h> 16 #include <linux/pagevec.h> 17 #include <linux/swap.h> 18 19 #include "f2fs.h" 20 #include "node.h" 21 #include "segment.h" 22 #include "trace.h" 23 #include <trace/events/f2fs.h> 24 25 #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) 26 27 static struct kmem_cache *nat_entry_slab; 28 static struct kmem_cache *free_nid_slab; 29 static struct kmem_cache *nat_entry_set_slab; 30 31 bool available_free_memory(struct f2fs_sb_info *sbi, int type) 32 { 33 struct f2fs_nm_info *nm_i = NM_I(sbi); 34 struct sysinfo val; 35 unsigned long avail_ram; 36 unsigned long mem_size = 0; 37 bool res = false; 38 39 si_meminfo(&val); 40 41 /* only uses low memory */ 42 avail_ram = val.totalram - val.totalhigh; 43 44 /* 45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively 46 */ 47 if (type == FREE_NIDS) { 48 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 49 PAGE_SHIFT; 50 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 51 } else if (type == NAT_ENTRIES) { 52 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 53 PAGE_SHIFT; 54 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 55 if (excess_cached_nats(sbi)) 56 res = false; 57 } else if (type == DIRTY_DENTS) { 58 if (sbi->sb->s_bdi->wb.dirty_exceeded) 59 return false; 60 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); 61 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 62 } else if (type == INO_ENTRIES) { 63 int i; 64 65 for (i = 0; i <= UPDATE_INO; i++) 66 mem_size += (sbi->im[i].ino_num * 67 sizeof(struct ino_entry)) >> PAGE_SHIFT; 68 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 69 } else if (type == EXTENT_CACHE) { 70 mem_size = (atomic_read(&sbi->total_ext_tree) * 71 sizeof(struct extent_tree) + 72 atomic_read(&sbi->total_ext_node) * 73 sizeof(struct extent_node)) >> PAGE_SHIFT; 74 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 75 } else { 76 if (!sbi->sb->s_bdi->wb.dirty_exceeded) 77 return true; 78 } 79 return res; 80 } 81 82 static void clear_node_page_dirty(struct page *page) 83 { 84 struct address_space *mapping = page->mapping; 85 unsigned int long flags; 86 87 if (PageDirty(page)) { 88 spin_lock_irqsave(&mapping->tree_lock, flags); 89 radix_tree_tag_clear(&mapping->page_tree, 90 page_index(page), 91 PAGECACHE_TAG_DIRTY); 92 spin_unlock_irqrestore(&mapping->tree_lock, flags); 93 94 clear_page_dirty_for_io(page); 95 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); 96 } 97 ClearPageUptodate(page); 98 } 99 100 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 101 { 102 pgoff_t index = current_nat_addr(sbi, nid); 103 return get_meta_page(sbi, index); 104 } 105 106 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) 107 { 108 struct page *src_page; 109 struct page *dst_page; 110 pgoff_t src_off; 111 pgoff_t dst_off; 112 void *src_addr; 113 void *dst_addr; 114 struct f2fs_nm_info *nm_i = NM_I(sbi); 115 116 src_off = current_nat_addr(sbi, nid); 117 dst_off = next_nat_addr(sbi, src_off); 118 119 /* get current nat block page with lock */ 120 src_page = get_meta_page(sbi, src_off); 121 dst_page = grab_meta_page(sbi, dst_off); 122 f2fs_bug_on(sbi, PageDirty(src_page)); 123 124 src_addr = page_address(src_page); 125 dst_addr = page_address(dst_page); 126 memcpy(dst_addr, src_addr, PAGE_SIZE); 127 set_page_dirty(dst_page); 128 f2fs_put_page(src_page, 1); 129 130 set_to_next_nat(nm_i, nid); 131 132 return dst_page; 133 } 134 135 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 136 { 137 return radix_tree_lookup(&nm_i->nat_root, n); 138 } 139 140 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, 141 nid_t start, unsigned int nr, struct nat_entry **ep) 142 { 143 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); 144 } 145 146 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) 147 { 148 list_del(&e->list); 149 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); 150 nm_i->nat_cnt--; 151 kmem_cache_free(nat_entry_slab, e); 152 } 153 154 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, 155 struct nat_entry *ne) 156 { 157 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 158 struct nat_entry_set *head; 159 160 if (get_nat_flag(ne, IS_DIRTY)) 161 return; 162 163 head = radix_tree_lookup(&nm_i->nat_set_root, set); 164 if (!head) { 165 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); 166 167 INIT_LIST_HEAD(&head->entry_list); 168 INIT_LIST_HEAD(&head->set_list); 169 head->set = set; 170 head->entry_cnt = 0; 171 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); 172 } 173 list_move_tail(&ne->list, &head->entry_list); 174 nm_i->dirty_nat_cnt++; 175 head->entry_cnt++; 176 set_nat_flag(ne, IS_DIRTY, true); 177 } 178 179 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, 180 struct nat_entry *ne) 181 { 182 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 183 struct nat_entry_set *head; 184 185 head = radix_tree_lookup(&nm_i->nat_set_root, set); 186 if (head) { 187 list_move_tail(&ne->list, &nm_i->nat_entries); 188 set_nat_flag(ne, IS_DIRTY, false); 189 head->entry_cnt--; 190 nm_i->dirty_nat_cnt--; 191 } 192 } 193 194 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, 195 nid_t start, unsigned int nr, struct nat_entry_set **ep) 196 { 197 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, 198 start, nr); 199 } 200 201 int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) 202 { 203 struct f2fs_nm_info *nm_i = NM_I(sbi); 204 struct nat_entry *e; 205 bool need = false; 206 207 down_read(&nm_i->nat_tree_lock); 208 e = __lookup_nat_cache(nm_i, nid); 209 if (e) { 210 if (!get_nat_flag(e, IS_CHECKPOINTED) && 211 !get_nat_flag(e, HAS_FSYNCED_INODE)) 212 need = true; 213 } 214 up_read(&nm_i->nat_tree_lock); 215 return need; 216 } 217 218 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 219 { 220 struct f2fs_nm_info *nm_i = NM_I(sbi); 221 struct nat_entry *e; 222 bool is_cp = true; 223 224 down_read(&nm_i->nat_tree_lock); 225 e = __lookup_nat_cache(nm_i, nid); 226 if (e && !get_nat_flag(e, IS_CHECKPOINTED)) 227 is_cp = false; 228 up_read(&nm_i->nat_tree_lock); 229 return is_cp; 230 } 231 232 bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) 233 { 234 struct f2fs_nm_info *nm_i = NM_I(sbi); 235 struct nat_entry *e; 236 bool need_update = true; 237 238 down_read(&nm_i->nat_tree_lock); 239 e = __lookup_nat_cache(nm_i, ino); 240 if (e && get_nat_flag(e, HAS_LAST_FSYNC) && 241 (get_nat_flag(e, IS_CHECKPOINTED) || 242 get_nat_flag(e, HAS_FSYNCED_INODE))) 243 need_update = false; 244 up_read(&nm_i->nat_tree_lock); 245 return need_update; 246 } 247 248 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 249 { 250 struct nat_entry *new; 251 252 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); 253 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); 254 memset(new, 0, sizeof(struct nat_entry)); 255 nat_set_nid(new, nid); 256 nat_reset_flag(new); 257 list_add_tail(&new->list, &nm_i->nat_entries); 258 nm_i->nat_cnt++; 259 return new; 260 } 261 262 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, 263 struct f2fs_nat_entry *ne) 264 { 265 struct f2fs_nm_info *nm_i = NM_I(sbi); 266 struct nat_entry *e; 267 268 e = __lookup_nat_cache(nm_i, nid); 269 if (!e) { 270 e = grab_nat_entry(nm_i, nid); 271 node_info_from_raw_nat(&e->ni, ne); 272 } else { 273 f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino || 274 nat_get_blkaddr(e) != ne->block_addr || 275 nat_get_version(e) != ne->version); 276 } 277 } 278 279 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 280 block_t new_blkaddr, bool fsync_done) 281 { 282 struct f2fs_nm_info *nm_i = NM_I(sbi); 283 struct nat_entry *e; 284 285 down_write(&nm_i->nat_tree_lock); 286 e = __lookup_nat_cache(nm_i, ni->nid); 287 if (!e) { 288 e = grab_nat_entry(nm_i, ni->nid); 289 copy_node_info(&e->ni, ni); 290 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 291 } else if (new_blkaddr == NEW_ADDR) { 292 /* 293 * when nid is reallocated, 294 * previous nat entry can be remained in nat cache. 295 * So, reinitialize it with new information. 296 */ 297 copy_node_info(&e->ni, ni); 298 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 299 } 300 301 /* sanity check */ 302 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); 303 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && 304 new_blkaddr == NULL_ADDR); 305 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && 306 new_blkaddr == NEW_ADDR); 307 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && 308 nat_get_blkaddr(e) != NULL_ADDR && 309 new_blkaddr == NEW_ADDR); 310 311 /* increment version no as node is removed */ 312 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 313 unsigned char version = nat_get_version(e); 314 nat_set_version(e, inc_node_version(version)); 315 316 /* in order to reuse the nid */ 317 if (nm_i->next_scan_nid > ni->nid) 318 nm_i->next_scan_nid = ni->nid; 319 } 320 321 /* change address */ 322 nat_set_blkaddr(e, new_blkaddr); 323 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) 324 set_nat_flag(e, IS_CHECKPOINTED, false); 325 __set_nat_cache_dirty(nm_i, e); 326 327 /* update fsync_mark if its inode nat entry is still alive */ 328 if (ni->nid != ni->ino) 329 e = __lookup_nat_cache(nm_i, ni->ino); 330 if (e) { 331 if (fsync_done && ni->nid == ni->ino) 332 set_nat_flag(e, HAS_FSYNCED_INODE, true); 333 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); 334 } 335 up_write(&nm_i->nat_tree_lock); 336 } 337 338 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 339 { 340 struct f2fs_nm_info *nm_i = NM_I(sbi); 341 int nr = nr_shrink; 342 343 if (!down_write_trylock(&nm_i->nat_tree_lock)) 344 return 0; 345 346 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 347 struct nat_entry *ne; 348 ne = list_first_entry(&nm_i->nat_entries, 349 struct nat_entry, list); 350 __del_from_nat_cache(nm_i, ne); 351 nr_shrink--; 352 } 353 up_write(&nm_i->nat_tree_lock); 354 return nr - nr_shrink; 355 } 356 357 /* 358 * This function always returns success 359 */ 360 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) 361 { 362 struct f2fs_nm_info *nm_i = NM_I(sbi); 363 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 364 struct f2fs_journal *journal = curseg->journal; 365 nid_t start_nid = START_NID(nid); 366 struct f2fs_nat_block *nat_blk; 367 struct page *page = NULL; 368 struct f2fs_nat_entry ne; 369 struct nat_entry *e; 370 int i; 371 372 ni->nid = nid; 373 374 /* Check nat cache */ 375 down_read(&nm_i->nat_tree_lock); 376 e = __lookup_nat_cache(nm_i, nid); 377 if (e) { 378 ni->ino = nat_get_ino(e); 379 ni->blk_addr = nat_get_blkaddr(e); 380 ni->version = nat_get_version(e); 381 up_read(&nm_i->nat_tree_lock); 382 return; 383 } 384 385 memset(&ne, 0, sizeof(struct f2fs_nat_entry)); 386 387 /* Check current segment summary */ 388 down_read(&curseg->journal_rwsem); 389 i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); 390 if (i >= 0) { 391 ne = nat_in_journal(journal, i); 392 node_info_from_raw_nat(ni, &ne); 393 } 394 up_read(&curseg->journal_rwsem); 395 if (i >= 0) 396 goto cache; 397 398 /* Fill node_info from nat page */ 399 page = get_current_nat_page(sbi, start_nid); 400 nat_blk = (struct f2fs_nat_block *)page_address(page); 401 ne = nat_blk->entries[nid - start_nid]; 402 node_info_from_raw_nat(ni, &ne); 403 f2fs_put_page(page, 1); 404 cache: 405 up_read(&nm_i->nat_tree_lock); 406 /* cache nat entry */ 407 down_write(&nm_i->nat_tree_lock); 408 cache_nat_entry(sbi, nid, &ne); 409 up_write(&nm_i->nat_tree_lock); 410 } 411 412 /* 413 * readahead MAX_RA_NODE number of node pages. 414 */ 415 static void ra_node_pages(struct page *parent, int start, int n) 416 { 417 struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 418 struct blk_plug plug; 419 int i, end; 420 nid_t nid; 421 422 blk_start_plug(&plug); 423 424 /* Then, try readahead for siblings of the desired node */ 425 end = start + n; 426 end = min(end, NIDS_PER_BLOCK); 427 for (i = start; i < end; i++) { 428 nid = get_nid(parent, i, false); 429 ra_node_page(sbi, nid); 430 } 431 432 blk_finish_plug(&plug); 433 } 434 435 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 436 { 437 const long direct_index = ADDRS_PER_INODE(dn->inode); 438 const long direct_blks = ADDRS_PER_BLOCK; 439 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 440 unsigned int skipped_unit = ADDRS_PER_BLOCK; 441 int cur_level = dn->cur_level; 442 int max_level = dn->max_level; 443 pgoff_t base = 0; 444 445 if (!dn->max_level) 446 return pgofs + 1; 447 448 while (max_level-- > cur_level) 449 skipped_unit *= NIDS_PER_BLOCK; 450 451 switch (dn->max_level) { 452 case 3: 453 base += 2 * indirect_blks; 454 case 2: 455 base += 2 * direct_blks; 456 case 1: 457 base += direct_index; 458 break; 459 default: 460 f2fs_bug_on(F2FS_I_SB(dn->inode), 1); 461 } 462 463 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base; 464 } 465 466 /* 467 * The maximum depth is four. 468 * Offset[0] will have raw inode offset. 469 */ 470 static int get_node_path(struct inode *inode, long block, 471 int offset[4], unsigned int noffset[4]) 472 { 473 const long direct_index = ADDRS_PER_INODE(inode); 474 const long direct_blks = ADDRS_PER_BLOCK; 475 const long dptrs_per_blk = NIDS_PER_BLOCK; 476 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; 477 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; 478 int n = 0; 479 int level = 0; 480 481 noffset[0] = 0; 482 483 if (block < direct_index) { 484 offset[n] = block; 485 goto got; 486 } 487 block -= direct_index; 488 if (block < direct_blks) { 489 offset[n++] = NODE_DIR1_BLOCK; 490 noffset[n] = 1; 491 offset[n] = block; 492 level = 1; 493 goto got; 494 } 495 block -= direct_blks; 496 if (block < direct_blks) { 497 offset[n++] = NODE_DIR2_BLOCK; 498 noffset[n] = 2; 499 offset[n] = block; 500 level = 1; 501 goto got; 502 } 503 block -= direct_blks; 504 if (block < indirect_blks) { 505 offset[n++] = NODE_IND1_BLOCK; 506 noffset[n] = 3; 507 offset[n++] = block / direct_blks; 508 noffset[n] = 4 + offset[n - 1]; 509 offset[n] = block % direct_blks; 510 level = 2; 511 goto got; 512 } 513 block -= indirect_blks; 514 if (block < indirect_blks) { 515 offset[n++] = NODE_IND2_BLOCK; 516 noffset[n] = 4 + dptrs_per_blk; 517 offset[n++] = block / direct_blks; 518 noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; 519 offset[n] = block % direct_blks; 520 level = 2; 521 goto got; 522 } 523 block -= indirect_blks; 524 if (block < dindirect_blks) { 525 offset[n++] = NODE_DIND_BLOCK; 526 noffset[n] = 5 + (dptrs_per_blk * 2); 527 offset[n++] = block / indirect_blks; 528 noffset[n] = 6 + (dptrs_per_blk * 2) + 529 offset[n - 1] * (dptrs_per_blk + 1); 530 offset[n++] = (block / direct_blks) % dptrs_per_blk; 531 noffset[n] = 7 + (dptrs_per_blk * 2) + 532 offset[n - 2] * (dptrs_per_blk + 1) + 533 offset[n - 1]; 534 offset[n] = block % direct_blks; 535 level = 3; 536 goto got; 537 } else { 538 BUG(); 539 } 540 got: 541 return level; 542 } 543 544 /* 545 * Caller should call f2fs_put_dnode(dn). 546 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and 547 * f2fs_unlock_op() only if ro is not set RDONLY_NODE. 548 * In the case of RDONLY_NODE, we don't need to care about mutex. 549 */ 550 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 551 { 552 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 553 struct page *npage[4]; 554 struct page *parent = NULL; 555 int offset[4]; 556 unsigned int noffset[4]; 557 nid_t nids[4]; 558 int level, i = 0; 559 int err = 0; 560 561 level = get_node_path(dn->inode, index, offset, noffset); 562 563 nids[0] = dn->inode->i_ino; 564 npage[0] = dn->inode_page; 565 566 if (!npage[0]) { 567 npage[0] = get_node_page(sbi, nids[0]); 568 if (IS_ERR(npage[0])) 569 return PTR_ERR(npage[0]); 570 } 571 572 /* if inline_data is set, should not report any block indices */ 573 if (f2fs_has_inline_data(dn->inode) && index) { 574 err = -ENOENT; 575 f2fs_put_page(npage[0], 1); 576 goto release_out; 577 } 578 579 parent = npage[0]; 580 if (level != 0) 581 nids[1] = get_nid(parent, offset[0], true); 582 dn->inode_page = npage[0]; 583 dn->inode_page_locked = true; 584 585 /* get indirect or direct nodes */ 586 for (i = 1; i <= level; i++) { 587 bool done = false; 588 589 if (!nids[i] && mode == ALLOC_NODE) { 590 /* alloc new node */ 591 if (!alloc_nid(sbi, &(nids[i]))) { 592 err = -ENOSPC; 593 goto release_pages; 594 } 595 596 dn->nid = nids[i]; 597 npage[i] = new_node_page(dn, noffset[i], NULL); 598 if (IS_ERR(npage[i])) { 599 alloc_nid_failed(sbi, nids[i]); 600 err = PTR_ERR(npage[i]); 601 goto release_pages; 602 } 603 604 set_nid(parent, offset[i - 1], nids[i], i == 1); 605 alloc_nid_done(sbi, nids[i]); 606 done = true; 607 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { 608 npage[i] = get_node_page_ra(parent, offset[i - 1]); 609 if (IS_ERR(npage[i])) { 610 err = PTR_ERR(npage[i]); 611 goto release_pages; 612 } 613 done = true; 614 } 615 if (i == 1) { 616 dn->inode_page_locked = false; 617 unlock_page(parent); 618 } else { 619 f2fs_put_page(parent, 1); 620 } 621 622 if (!done) { 623 npage[i] = get_node_page(sbi, nids[i]); 624 if (IS_ERR(npage[i])) { 625 err = PTR_ERR(npage[i]); 626 f2fs_put_page(npage[0], 0); 627 goto release_out; 628 } 629 } 630 if (i < level) { 631 parent = npage[i]; 632 nids[i + 1] = get_nid(parent, offset[i], false); 633 } 634 } 635 dn->nid = nids[level]; 636 dn->ofs_in_node = offset[level]; 637 dn->node_page = npage[level]; 638 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); 639 return 0; 640 641 release_pages: 642 f2fs_put_page(parent, 1); 643 if (i > 1) 644 f2fs_put_page(npage[0], 0); 645 release_out: 646 dn->inode_page = NULL; 647 dn->node_page = NULL; 648 if (err == -ENOENT) { 649 dn->cur_level = i; 650 dn->max_level = level; 651 dn->ofs_in_node = offset[level]; 652 } 653 return err; 654 } 655 656 static void truncate_node(struct dnode_of_data *dn) 657 { 658 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 659 struct node_info ni; 660 661 get_node_info(sbi, dn->nid, &ni); 662 if (dn->inode->i_blocks == 0) { 663 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); 664 goto invalidate; 665 } 666 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 667 668 /* Deallocate node address */ 669 invalidate_blocks(sbi, ni.blk_addr); 670 dec_valid_node_count(sbi, dn->inode); 671 set_node_addr(sbi, &ni, NULL_ADDR, false); 672 673 if (dn->nid == dn->inode->i_ino) { 674 remove_orphan_inode(sbi, dn->nid); 675 dec_valid_inode_count(sbi); 676 f2fs_inode_synced(dn->inode); 677 } 678 invalidate: 679 clear_node_page_dirty(dn->node_page); 680 set_sbi_flag(sbi, SBI_IS_DIRTY); 681 682 f2fs_put_page(dn->node_page, 1); 683 684 invalidate_mapping_pages(NODE_MAPPING(sbi), 685 dn->node_page->index, dn->node_page->index); 686 687 dn->node_page = NULL; 688 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); 689 } 690 691 static int truncate_dnode(struct dnode_of_data *dn) 692 { 693 struct page *page; 694 695 if (dn->nid == 0) 696 return 1; 697 698 /* get direct node */ 699 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 700 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 701 return 1; 702 else if (IS_ERR(page)) 703 return PTR_ERR(page); 704 705 /* Make dnode_of_data for parameter */ 706 dn->node_page = page; 707 dn->ofs_in_node = 0; 708 truncate_data_blocks(dn); 709 truncate_node(dn); 710 return 1; 711 } 712 713 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 714 int ofs, int depth) 715 { 716 struct dnode_of_data rdn = *dn; 717 struct page *page; 718 struct f2fs_node *rn; 719 nid_t child_nid; 720 unsigned int child_nofs; 721 int freed = 0; 722 int i, ret; 723 724 if (dn->nid == 0) 725 return NIDS_PER_BLOCK + 1; 726 727 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 728 729 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 730 if (IS_ERR(page)) { 731 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); 732 return PTR_ERR(page); 733 } 734 735 ra_node_pages(page, ofs, NIDS_PER_BLOCK); 736 737 rn = F2FS_NODE(page); 738 if (depth < 3) { 739 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { 740 child_nid = le32_to_cpu(rn->in.nid[i]); 741 if (child_nid == 0) 742 continue; 743 rdn.nid = child_nid; 744 ret = truncate_dnode(&rdn); 745 if (ret < 0) 746 goto out_err; 747 if (set_nid(page, i, 0, false)) 748 dn->node_changed = true; 749 } 750 } else { 751 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; 752 for (i = ofs; i < NIDS_PER_BLOCK; i++) { 753 child_nid = le32_to_cpu(rn->in.nid[i]); 754 if (child_nid == 0) { 755 child_nofs += NIDS_PER_BLOCK + 1; 756 continue; 757 } 758 rdn.nid = child_nid; 759 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); 760 if (ret == (NIDS_PER_BLOCK + 1)) { 761 if (set_nid(page, i, 0, false)) 762 dn->node_changed = true; 763 child_nofs += ret; 764 } else if (ret < 0 && ret != -ENOENT) { 765 goto out_err; 766 } 767 } 768 freed = child_nofs; 769 } 770 771 if (!ofs) { 772 /* remove current indirect node */ 773 dn->node_page = page; 774 truncate_node(dn); 775 freed++; 776 } else { 777 f2fs_put_page(page, 1); 778 } 779 trace_f2fs_truncate_nodes_exit(dn->inode, freed); 780 return freed; 781 782 out_err: 783 f2fs_put_page(page, 1); 784 trace_f2fs_truncate_nodes_exit(dn->inode, ret); 785 return ret; 786 } 787 788 static int truncate_partial_nodes(struct dnode_of_data *dn, 789 struct f2fs_inode *ri, int *offset, int depth) 790 { 791 struct page *pages[2]; 792 nid_t nid[3]; 793 nid_t child_nid; 794 int err = 0; 795 int i; 796 int idx = depth - 2; 797 798 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 799 if (!nid[0]) 800 return 0; 801 802 /* get indirect nodes in the path */ 803 for (i = 0; i < idx + 1; i++) { 804 /* reference count'll be increased */ 805 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); 806 if (IS_ERR(pages[i])) { 807 err = PTR_ERR(pages[i]); 808 idx = i - 1; 809 goto fail; 810 } 811 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 812 } 813 814 ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); 815 816 /* free direct nodes linked to a partial indirect node */ 817 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 818 child_nid = get_nid(pages[idx], i, false); 819 if (!child_nid) 820 continue; 821 dn->nid = child_nid; 822 err = truncate_dnode(dn); 823 if (err < 0) 824 goto fail; 825 if (set_nid(pages[idx], i, 0, false)) 826 dn->node_changed = true; 827 } 828 829 if (offset[idx + 1] == 0) { 830 dn->node_page = pages[idx]; 831 dn->nid = nid[idx]; 832 truncate_node(dn); 833 } else { 834 f2fs_put_page(pages[idx], 1); 835 } 836 offset[idx]++; 837 offset[idx + 1] = 0; 838 idx--; 839 fail: 840 for (i = idx; i >= 0; i--) 841 f2fs_put_page(pages[i], 1); 842 843 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); 844 845 return err; 846 } 847 848 /* 849 * All the block addresses of data and nodes should be nullified. 850 */ 851 int truncate_inode_blocks(struct inode *inode, pgoff_t from) 852 { 853 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 854 int err = 0, cont = 1; 855 int level, offset[4], noffset[4]; 856 unsigned int nofs = 0; 857 struct f2fs_inode *ri; 858 struct dnode_of_data dn; 859 struct page *page; 860 861 trace_f2fs_truncate_inode_blocks_enter(inode, from); 862 863 level = get_node_path(inode, from, offset, noffset); 864 865 page = get_node_page(sbi, inode->i_ino); 866 if (IS_ERR(page)) { 867 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); 868 return PTR_ERR(page); 869 } 870 871 set_new_dnode(&dn, inode, page, NULL, 0); 872 unlock_page(page); 873 874 ri = F2FS_INODE(page); 875 switch (level) { 876 case 0: 877 case 1: 878 nofs = noffset[1]; 879 break; 880 case 2: 881 nofs = noffset[1]; 882 if (!offset[level - 1]) 883 goto skip_partial; 884 err = truncate_partial_nodes(&dn, ri, offset, level); 885 if (err < 0 && err != -ENOENT) 886 goto fail; 887 nofs += 1 + NIDS_PER_BLOCK; 888 break; 889 case 3: 890 nofs = 5 + 2 * NIDS_PER_BLOCK; 891 if (!offset[level - 1]) 892 goto skip_partial; 893 err = truncate_partial_nodes(&dn, ri, offset, level); 894 if (err < 0 && err != -ENOENT) 895 goto fail; 896 break; 897 default: 898 BUG(); 899 } 900 901 skip_partial: 902 while (cont) { 903 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 904 switch (offset[0]) { 905 case NODE_DIR1_BLOCK: 906 case NODE_DIR2_BLOCK: 907 err = truncate_dnode(&dn); 908 break; 909 910 case NODE_IND1_BLOCK: 911 case NODE_IND2_BLOCK: 912 err = truncate_nodes(&dn, nofs, offset[1], 2); 913 break; 914 915 case NODE_DIND_BLOCK: 916 err = truncate_nodes(&dn, nofs, offset[1], 3); 917 cont = 0; 918 break; 919 920 default: 921 BUG(); 922 } 923 if (err < 0 && err != -ENOENT) 924 goto fail; 925 if (offset[1] == 0 && 926 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 927 lock_page(page); 928 BUG_ON(page->mapping != NODE_MAPPING(sbi)); 929 f2fs_wait_on_page_writeback(page, NODE, true); 930 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 931 set_page_dirty(page); 932 unlock_page(page); 933 } 934 offset[1] = 0; 935 offset[0]++; 936 nofs += err; 937 } 938 fail: 939 f2fs_put_page(page, 0); 940 trace_f2fs_truncate_inode_blocks_exit(inode, err); 941 return err > 0 ? 0 : err; 942 } 943 944 int truncate_xattr_node(struct inode *inode, struct page *page) 945 { 946 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 947 nid_t nid = F2FS_I(inode)->i_xattr_nid; 948 struct dnode_of_data dn; 949 struct page *npage; 950 951 if (!nid) 952 return 0; 953 954 npage = get_node_page(sbi, nid); 955 if (IS_ERR(npage)) 956 return PTR_ERR(npage); 957 958 f2fs_i_xnid_write(inode, 0); 959 960 /* need to do checkpoint during fsync */ 961 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); 962 963 set_new_dnode(&dn, inode, page, npage, nid); 964 965 if (page) 966 dn.inode_page_locked = true; 967 truncate_node(&dn); 968 return 0; 969 } 970 971 /* 972 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 973 * f2fs_unlock_op(). 974 */ 975 int remove_inode_page(struct inode *inode) 976 { 977 struct dnode_of_data dn; 978 int err; 979 980 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 981 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); 982 if (err) 983 return err; 984 985 err = truncate_xattr_node(inode, dn.inode_page); 986 if (err) { 987 f2fs_put_dnode(&dn); 988 return err; 989 } 990 991 /* remove potential inline_data blocks */ 992 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 993 S_ISLNK(inode->i_mode)) 994 truncate_data_blocks_range(&dn, 1); 995 996 /* 0 is possible, after f2fs_new_inode() has failed */ 997 f2fs_bug_on(F2FS_I_SB(inode), 998 inode->i_blocks != 0 && inode->i_blocks != 1); 999 1000 /* will put inode & node pages */ 1001 truncate_node(&dn); 1002 return 0; 1003 } 1004 1005 struct page *new_inode_page(struct inode *inode) 1006 { 1007 struct dnode_of_data dn; 1008 1009 /* allocate inode page for new inode */ 1010 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 1011 1012 /* caller should f2fs_put_page(page, 1); */ 1013 return new_node_page(&dn, 0, NULL); 1014 } 1015 1016 struct page *new_node_page(struct dnode_of_data *dn, 1017 unsigned int ofs, struct page *ipage) 1018 { 1019 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1020 struct node_info old_ni, new_ni; 1021 struct page *page; 1022 int err; 1023 1024 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1025 return ERR_PTR(-EPERM); 1026 1027 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); 1028 if (!page) 1029 return ERR_PTR(-ENOMEM); 1030 1031 if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { 1032 err = -ENOSPC; 1033 goto fail; 1034 } 1035 1036 get_node_info(sbi, dn->nid, &old_ni); 1037 1038 /* Reinitialize old_ni with new node page */ 1039 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); 1040 new_ni = old_ni; 1041 new_ni.ino = dn->inode->i_ino; 1042 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1043 1044 f2fs_wait_on_page_writeback(page, NODE, true); 1045 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 1046 set_cold_node(dn->inode, page); 1047 if (!PageUptodate(page)) 1048 SetPageUptodate(page); 1049 if (set_page_dirty(page)) 1050 dn->node_changed = true; 1051 1052 if (f2fs_has_xattr_block(ofs)) 1053 f2fs_i_xnid_write(dn->inode, dn->nid); 1054 1055 if (ofs == 0) 1056 inc_valid_inode_count(sbi); 1057 return page; 1058 1059 fail: 1060 clear_node_page_dirty(page); 1061 f2fs_put_page(page, 1); 1062 return ERR_PTR(err); 1063 } 1064 1065 /* 1066 * Caller should do after getting the following values. 1067 * 0: f2fs_put_page(page, 0) 1068 * LOCKED_PAGE or error: f2fs_put_page(page, 1) 1069 */ 1070 static int read_node_page(struct page *page, int op_flags) 1071 { 1072 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1073 struct node_info ni; 1074 struct f2fs_io_info fio = { 1075 .sbi = sbi, 1076 .type = NODE, 1077 .op = REQ_OP_READ, 1078 .op_flags = op_flags, 1079 .page = page, 1080 .encrypted_page = NULL, 1081 }; 1082 1083 if (PageUptodate(page)) 1084 return LOCKED_PAGE; 1085 1086 get_node_info(sbi, page->index, &ni); 1087 1088 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1089 ClearPageUptodate(page); 1090 return -ENOENT; 1091 } 1092 1093 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; 1094 return f2fs_submit_page_bio(&fio); 1095 } 1096 1097 /* 1098 * Readahead a node page 1099 */ 1100 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 1101 { 1102 struct page *apage; 1103 int err; 1104 1105 if (!nid) 1106 return; 1107 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1108 1109 rcu_read_lock(); 1110 apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); 1111 rcu_read_unlock(); 1112 if (apage) 1113 return; 1114 1115 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1116 if (!apage) 1117 return; 1118 1119 err = read_node_page(apage, REQ_RAHEAD); 1120 f2fs_put_page(apage, err ? 1 : 0); 1121 } 1122 1123 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, 1124 struct page *parent, int start) 1125 { 1126 struct page *page; 1127 int err; 1128 1129 if (!nid) 1130 return ERR_PTR(-ENOENT); 1131 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1132 repeat: 1133 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1134 if (!page) 1135 return ERR_PTR(-ENOMEM); 1136 1137 err = read_node_page(page, READ_SYNC); 1138 if (err < 0) { 1139 f2fs_put_page(page, 1); 1140 return ERR_PTR(err); 1141 } else if (err == LOCKED_PAGE) { 1142 goto page_hit; 1143 } 1144 1145 if (parent) 1146 ra_node_pages(parent, start + 1, MAX_RA_NODE); 1147 1148 lock_page(page); 1149 1150 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1151 f2fs_put_page(page, 1); 1152 goto repeat; 1153 } 1154 1155 if (unlikely(!PageUptodate(page))) 1156 goto out_err; 1157 page_hit: 1158 if(unlikely(nid != nid_of_node(page))) { 1159 f2fs_bug_on(sbi, 1); 1160 ClearPageUptodate(page); 1161 out_err: 1162 f2fs_put_page(page, 1); 1163 return ERR_PTR(-EIO); 1164 } 1165 return page; 1166 } 1167 1168 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 1169 { 1170 return __get_node_page(sbi, nid, NULL, 0); 1171 } 1172 1173 struct page *get_node_page_ra(struct page *parent, int start) 1174 { 1175 struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 1176 nid_t nid = get_nid(parent, start, false); 1177 1178 return __get_node_page(sbi, nid, parent, start); 1179 } 1180 1181 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) 1182 { 1183 struct inode *inode; 1184 struct page *page; 1185 int ret; 1186 1187 /* should flush inline_data before evict_inode */ 1188 inode = ilookup(sbi->sb, ino); 1189 if (!inode) 1190 return; 1191 1192 page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); 1193 if (!page) 1194 goto iput_out; 1195 1196 if (!PageUptodate(page)) 1197 goto page_out; 1198 1199 if (!PageDirty(page)) 1200 goto page_out; 1201 1202 if (!clear_page_dirty_for_io(page)) 1203 goto page_out; 1204 1205 ret = f2fs_write_inline_data(inode, page); 1206 inode_dec_dirty_pages(inode); 1207 if (ret) 1208 set_page_dirty(page); 1209 page_out: 1210 f2fs_put_page(page, 1); 1211 iput_out: 1212 iput(inode); 1213 } 1214 1215 void move_node_page(struct page *node_page, int gc_type) 1216 { 1217 if (gc_type == FG_GC) { 1218 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); 1219 struct writeback_control wbc = { 1220 .sync_mode = WB_SYNC_ALL, 1221 .nr_to_write = 1, 1222 .for_reclaim = 0, 1223 }; 1224 1225 set_page_dirty(node_page); 1226 f2fs_wait_on_page_writeback(node_page, NODE, true); 1227 1228 f2fs_bug_on(sbi, PageWriteback(node_page)); 1229 if (!clear_page_dirty_for_io(node_page)) 1230 goto out_page; 1231 1232 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) 1233 unlock_page(node_page); 1234 goto release_page; 1235 } else { 1236 /* set page dirty and write it */ 1237 if (!PageWriteback(node_page)) 1238 set_page_dirty(node_page); 1239 } 1240 out_page: 1241 unlock_page(node_page); 1242 release_page: 1243 f2fs_put_page(node_page, 0); 1244 } 1245 1246 static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1247 { 1248 pgoff_t index, end; 1249 struct pagevec pvec; 1250 struct page *last_page = NULL; 1251 1252 pagevec_init(&pvec, 0); 1253 index = 0; 1254 end = ULONG_MAX; 1255 1256 while (index <= end) { 1257 int i, nr_pages; 1258 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1259 PAGECACHE_TAG_DIRTY, 1260 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1261 if (nr_pages == 0) 1262 break; 1263 1264 for (i = 0; i < nr_pages; i++) { 1265 struct page *page = pvec.pages[i]; 1266 1267 if (unlikely(f2fs_cp_error(sbi))) { 1268 f2fs_put_page(last_page, 0); 1269 pagevec_release(&pvec); 1270 return ERR_PTR(-EIO); 1271 } 1272 1273 if (!IS_DNODE(page) || !is_cold_node(page)) 1274 continue; 1275 if (ino_of_node(page) != ino) 1276 continue; 1277 1278 lock_page(page); 1279 1280 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1281 continue_unlock: 1282 unlock_page(page); 1283 continue; 1284 } 1285 if (ino_of_node(page) != ino) 1286 goto continue_unlock; 1287 1288 if (!PageDirty(page)) { 1289 /* someone wrote it for us */ 1290 goto continue_unlock; 1291 } 1292 1293 if (last_page) 1294 f2fs_put_page(last_page, 0); 1295 1296 get_page(page); 1297 last_page = page; 1298 unlock_page(page); 1299 } 1300 pagevec_release(&pvec); 1301 cond_resched(); 1302 } 1303 return last_page; 1304 } 1305 1306 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, 1307 struct writeback_control *wbc, bool atomic) 1308 { 1309 pgoff_t index, end; 1310 struct pagevec pvec; 1311 int ret = 0; 1312 struct page *last_page = NULL; 1313 bool marked = false; 1314 nid_t ino = inode->i_ino; 1315 int nwritten = 0; 1316 1317 if (atomic) { 1318 last_page = last_fsync_dnode(sbi, ino); 1319 if (IS_ERR_OR_NULL(last_page)) 1320 return PTR_ERR_OR_ZERO(last_page); 1321 } 1322 retry: 1323 pagevec_init(&pvec, 0); 1324 index = 0; 1325 end = ULONG_MAX; 1326 1327 while (index <= end) { 1328 int i, nr_pages; 1329 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1330 PAGECACHE_TAG_DIRTY, 1331 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1332 if (nr_pages == 0) 1333 break; 1334 1335 for (i = 0; i < nr_pages; i++) { 1336 struct page *page = pvec.pages[i]; 1337 1338 if (unlikely(f2fs_cp_error(sbi))) { 1339 f2fs_put_page(last_page, 0); 1340 pagevec_release(&pvec); 1341 return -EIO; 1342 } 1343 1344 if (!IS_DNODE(page) || !is_cold_node(page)) 1345 continue; 1346 if (ino_of_node(page) != ino) 1347 continue; 1348 1349 lock_page(page); 1350 1351 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1352 continue_unlock: 1353 unlock_page(page); 1354 continue; 1355 } 1356 if (ino_of_node(page) != ino) 1357 goto continue_unlock; 1358 1359 if (!PageDirty(page) && page != last_page) { 1360 /* someone wrote it for us */ 1361 goto continue_unlock; 1362 } 1363 1364 f2fs_wait_on_page_writeback(page, NODE, true); 1365 BUG_ON(PageWriteback(page)); 1366 1367 if (!atomic || page == last_page) { 1368 set_fsync_mark(page, 1); 1369 if (IS_INODE(page)) { 1370 if (is_inode_flag_set(inode, 1371 FI_DIRTY_INODE)) 1372 update_inode(inode, page); 1373 set_dentry_mark(page, 1374 need_dentry_mark(sbi, ino)); 1375 } 1376 /* may be written by other thread */ 1377 if (!PageDirty(page)) 1378 set_page_dirty(page); 1379 } 1380 1381 if (!clear_page_dirty_for_io(page)) 1382 goto continue_unlock; 1383 1384 ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1385 if (ret) { 1386 unlock_page(page); 1387 f2fs_put_page(last_page, 0); 1388 break; 1389 } else { 1390 nwritten++; 1391 } 1392 1393 if (page == last_page) { 1394 f2fs_put_page(page, 0); 1395 marked = true; 1396 break; 1397 } 1398 } 1399 pagevec_release(&pvec); 1400 cond_resched(); 1401 1402 if (ret || marked) 1403 break; 1404 } 1405 if (!ret && atomic && !marked) { 1406 f2fs_msg(sbi->sb, KERN_DEBUG, 1407 "Retry to write fsync mark: ino=%u, idx=%lx", 1408 ino, last_page->index); 1409 lock_page(last_page); 1410 set_page_dirty(last_page); 1411 unlock_page(last_page); 1412 goto retry; 1413 } 1414 1415 if (nwritten) 1416 f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); 1417 return ret ? -EIO: 0; 1418 } 1419 1420 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) 1421 { 1422 pgoff_t index, end; 1423 struct pagevec pvec; 1424 int step = 0; 1425 int nwritten = 0; 1426 int ret = 0; 1427 1428 pagevec_init(&pvec, 0); 1429 1430 next_step: 1431 index = 0; 1432 end = ULONG_MAX; 1433 1434 while (index <= end) { 1435 int i, nr_pages; 1436 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1437 PAGECACHE_TAG_DIRTY, 1438 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1439 if (nr_pages == 0) 1440 break; 1441 1442 for (i = 0; i < nr_pages; i++) { 1443 struct page *page = pvec.pages[i]; 1444 1445 if (unlikely(f2fs_cp_error(sbi))) { 1446 pagevec_release(&pvec); 1447 ret = -EIO; 1448 goto out; 1449 } 1450 1451 /* 1452 * flushing sequence with step: 1453 * 0. indirect nodes 1454 * 1. dentry dnodes 1455 * 2. file dnodes 1456 */ 1457 if (step == 0 && IS_DNODE(page)) 1458 continue; 1459 if (step == 1 && (!IS_DNODE(page) || 1460 is_cold_node(page))) 1461 continue; 1462 if (step == 2 && (!IS_DNODE(page) || 1463 !is_cold_node(page))) 1464 continue; 1465 lock_node: 1466 if (!trylock_page(page)) 1467 continue; 1468 1469 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1470 continue_unlock: 1471 unlock_page(page); 1472 continue; 1473 } 1474 1475 if (!PageDirty(page)) { 1476 /* someone wrote it for us */ 1477 goto continue_unlock; 1478 } 1479 1480 /* flush inline_data */ 1481 if (is_inline_node(page)) { 1482 clear_inline_node(page); 1483 unlock_page(page); 1484 flush_inline_data(sbi, ino_of_node(page)); 1485 goto lock_node; 1486 } 1487 1488 f2fs_wait_on_page_writeback(page, NODE, true); 1489 1490 BUG_ON(PageWriteback(page)); 1491 if (!clear_page_dirty_for_io(page)) 1492 goto continue_unlock; 1493 1494 set_fsync_mark(page, 0); 1495 set_dentry_mark(page, 0); 1496 1497 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1498 unlock_page(page); 1499 else 1500 nwritten++; 1501 1502 if (--wbc->nr_to_write == 0) 1503 break; 1504 } 1505 pagevec_release(&pvec); 1506 cond_resched(); 1507 1508 if (wbc->nr_to_write == 0) { 1509 step = 2; 1510 break; 1511 } 1512 } 1513 1514 if (step < 2) { 1515 step++; 1516 goto next_step; 1517 } 1518 out: 1519 if (nwritten) 1520 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1521 return ret; 1522 } 1523 1524 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) 1525 { 1526 pgoff_t index = 0, end = ULONG_MAX; 1527 struct pagevec pvec; 1528 int ret2, ret = 0; 1529 1530 pagevec_init(&pvec, 0); 1531 1532 while (index <= end) { 1533 int i, nr_pages; 1534 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1535 PAGECACHE_TAG_WRITEBACK, 1536 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1537 if (nr_pages == 0) 1538 break; 1539 1540 for (i = 0; i < nr_pages; i++) { 1541 struct page *page = pvec.pages[i]; 1542 1543 /* until radix tree lookup accepts end_index */ 1544 if (unlikely(page->index > end)) 1545 continue; 1546 1547 if (ino && ino_of_node(page) == ino) { 1548 f2fs_wait_on_page_writeback(page, NODE, true); 1549 if (TestClearPageError(page)) 1550 ret = -EIO; 1551 } 1552 } 1553 pagevec_release(&pvec); 1554 cond_resched(); 1555 } 1556 1557 ret2 = filemap_check_errors(NODE_MAPPING(sbi)); 1558 if (!ret) 1559 ret = ret2; 1560 return ret; 1561 } 1562 1563 static int f2fs_write_node_page(struct page *page, 1564 struct writeback_control *wbc) 1565 { 1566 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1567 nid_t nid; 1568 struct node_info ni; 1569 struct f2fs_io_info fio = { 1570 .sbi = sbi, 1571 .type = NODE, 1572 .op = REQ_OP_WRITE, 1573 .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, 1574 .page = page, 1575 .encrypted_page = NULL, 1576 }; 1577 1578 trace_f2fs_writepage(page, NODE); 1579 1580 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1581 goto redirty_out; 1582 if (unlikely(f2fs_cp_error(sbi))) 1583 goto redirty_out; 1584 1585 /* get old block addr of this node page */ 1586 nid = nid_of_node(page); 1587 f2fs_bug_on(sbi, page->index != nid); 1588 1589 if (wbc->for_reclaim) { 1590 if (!down_read_trylock(&sbi->node_write)) 1591 goto redirty_out; 1592 } else { 1593 down_read(&sbi->node_write); 1594 } 1595 1596 get_node_info(sbi, nid, &ni); 1597 1598 /* This page is already truncated */ 1599 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1600 ClearPageUptodate(page); 1601 dec_page_count(sbi, F2FS_DIRTY_NODES); 1602 up_read(&sbi->node_write); 1603 unlock_page(page); 1604 return 0; 1605 } 1606 1607 set_page_writeback(page); 1608 fio.old_blkaddr = ni.blk_addr; 1609 write_node_page(nid, &fio); 1610 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); 1611 dec_page_count(sbi, F2FS_DIRTY_NODES); 1612 up_read(&sbi->node_write); 1613 1614 if (wbc->for_reclaim) 1615 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE); 1616 1617 unlock_page(page); 1618 1619 if (unlikely(f2fs_cp_error(sbi))) 1620 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1621 1622 return 0; 1623 1624 redirty_out: 1625 redirty_page_for_writepage(wbc, page); 1626 return AOP_WRITEPAGE_ACTIVATE; 1627 } 1628 1629 static int f2fs_write_node_pages(struct address_space *mapping, 1630 struct writeback_control *wbc) 1631 { 1632 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 1633 struct blk_plug plug; 1634 long diff; 1635 1636 /* balancing f2fs's metadata in background */ 1637 f2fs_balance_fs_bg(sbi); 1638 1639 /* collect a number of dirty node pages and write together */ 1640 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) 1641 goto skip_write; 1642 1643 trace_f2fs_writepages(mapping->host, wbc, NODE); 1644 1645 diff = nr_pages_to_write(sbi, NODE, wbc); 1646 wbc->sync_mode = WB_SYNC_NONE; 1647 blk_start_plug(&plug); 1648 sync_node_pages(sbi, wbc); 1649 blk_finish_plug(&plug); 1650 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1651 return 0; 1652 1653 skip_write: 1654 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES); 1655 trace_f2fs_writepages(mapping->host, wbc, NODE); 1656 return 0; 1657 } 1658 1659 static int f2fs_set_node_page_dirty(struct page *page) 1660 { 1661 trace_f2fs_set_page_dirty(page, NODE); 1662 1663 if (!PageUptodate(page)) 1664 SetPageUptodate(page); 1665 if (!PageDirty(page)) { 1666 f2fs_set_page_dirty_nobuffers(page); 1667 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); 1668 SetPagePrivate(page); 1669 f2fs_trace_pid(page); 1670 return 1; 1671 } 1672 return 0; 1673 } 1674 1675 /* 1676 * Structure of the f2fs node operations 1677 */ 1678 const struct address_space_operations f2fs_node_aops = { 1679 .writepage = f2fs_write_node_page, 1680 .writepages = f2fs_write_node_pages, 1681 .set_page_dirty = f2fs_set_node_page_dirty, 1682 .invalidatepage = f2fs_invalidate_page, 1683 .releasepage = f2fs_release_page, 1684 #ifdef CONFIG_MIGRATION 1685 .migratepage = f2fs_migrate_page, 1686 #endif 1687 }; 1688 1689 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, 1690 nid_t n) 1691 { 1692 return radix_tree_lookup(&nm_i->free_nid_root, n); 1693 } 1694 1695 static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, 1696 struct free_nid *i) 1697 { 1698 list_del(&i->list); 1699 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1700 } 1701 1702 static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1703 { 1704 struct f2fs_nm_info *nm_i = NM_I(sbi); 1705 struct free_nid *i; 1706 struct nat_entry *ne; 1707 1708 if (!available_free_memory(sbi, FREE_NIDS)) 1709 return -1; 1710 1711 /* 0 nid should not be used */ 1712 if (unlikely(nid == 0)) 1713 return 0; 1714 1715 if (build) { 1716 /* do not add allocated nids */ 1717 ne = __lookup_nat_cache(nm_i, nid); 1718 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1719 nat_get_blkaddr(ne) != NULL_ADDR)) 1720 return 0; 1721 } 1722 1723 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1724 i->nid = nid; 1725 i->state = NID_NEW; 1726 1727 if (radix_tree_preload(GFP_NOFS)) { 1728 kmem_cache_free(free_nid_slab, i); 1729 return 0; 1730 } 1731 1732 spin_lock(&nm_i->free_nid_list_lock); 1733 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { 1734 spin_unlock(&nm_i->free_nid_list_lock); 1735 radix_tree_preload_end(); 1736 kmem_cache_free(free_nid_slab, i); 1737 return 0; 1738 } 1739 list_add_tail(&i->list, &nm_i->free_nid_list); 1740 nm_i->fcnt++; 1741 spin_unlock(&nm_i->free_nid_list_lock); 1742 radix_tree_preload_end(); 1743 return 1; 1744 } 1745 1746 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1747 { 1748 struct free_nid *i; 1749 bool need_free = false; 1750 1751 spin_lock(&nm_i->free_nid_list_lock); 1752 i = __lookup_free_nid_list(nm_i, nid); 1753 if (i && i->state == NID_NEW) { 1754 __del_from_free_nid_list(nm_i, i); 1755 nm_i->fcnt--; 1756 need_free = true; 1757 } 1758 spin_unlock(&nm_i->free_nid_list_lock); 1759 1760 if (need_free) 1761 kmem_cache_free(free_nid_slab, i); 1762 } 1763 1764 static void scan_nat_page(struct f2fs_sb_info *sbi, 1765 struct page *nat_page, nid_t start_nid) 1766 { 1767 struct f2fs_nm_info *nm_i = NM_I(sbi); 1768 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1769 block_t blk_addr; 1770 int i; 1771 1772 i = start_nid % NAT_ENTRY_PER_BLOCK; 1773 1774 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1775 1776 if (unlikely(start_nid >= nm_i->max_nid)) 1777 break; 1778 1779 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1780 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1781 if (blk_addr == NULL_ADDR) { 1782 if (add_free_nid(sbi, start_nid, true) < 0) 1783 break; 1784 } 1785 } 1786 } 1787 1788 void build_free_nids(struct f2fs_sb_info *sbi) 1789 { 1790 struct f2fs_nm_info *nm_i = NM_I(sbi); 1791 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1792 struct f2fs_journal *journal = curseg->journal; 1793 int i = 0; 1794 nid_t nid = nm_i->next_scan_nid; 1795 1796 /* Enough entries */ 1797 if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) 1798 return; 1799 1800 /* readahead nat pages to be scanned */ 1801 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 1802 META_NAT, true); 1803 1804 down_read(&nm_i->nat_tree_lock); 1805 1806 while (1) { 1807 struct page *page = get_current_nat_page(sbi, nid); 1808 1809 scan_nat_page(sbi, page, nid); 1810 f2fs_put_page(page, 1); 1811 1812 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1813 if (unlikely(nid >= nm_i->max_nid)) 1814 nid = 0; 1815 1816 if (++i >= FREE_NID_PAGES) 1817 break; 1818 } 1819 1820 /* go to the next free nat pages to find free nids abundantly */ 1821 nm_i->next_scan_nid = nid; 1822 1823 /* find free nids from current sum_pages */ 1824 down_read(&curseg->journal_rwsem); 1825 for (i = 0; i < nats_in_cursum(journal); i++) { 1826 block_t addr; 1827 1828 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); 1829 nid = le32_to_cpu(nid_in_journal(journal, i)); 1830 if (addr == NULL_ADDR) 1831 add_free_nid(sbi, nid, true); 1832 else 1833 remove_free_nid(nm_i, nid); 1834 } 1835 up_read(&curseg->journal_rwsem); 1836 up_read(&nm_i->nat_tree_lock); 1837 1838 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 1839 nm_i->ra_nid_pages, META_NAT, false); 1840 } 1841 1842 /* 1843 * If this function returns success, caller can obtain a new nid 1844 * from second parameter of this function. 1845 * The returned nid could be used ino as well as nid when inode is created. 1846 */ 1847 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) 1848 { 1849 struct f2fs_nm_info *nm_i = NM_I(sbi); 1850 struct free_nid *i = NULL; 1851 retry: 1852 #ifdef CONFIG_F2FS_FAULT_INJECTION 1853 if (time_to_inject(sbi, FAULT_ALLOC_NID)) 1854 return false; 1855 #endif 1856 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) 1857 return false; 1858 1859 spin_lock(&nm_i->free_nid_list_lock); 1860 1861 /* We should not use stale free nids created by build_free_nids */ 1862 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1863 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1864 list_for_each_entry(i, &nm_i->free_nid_list, list) 1865 if (i->state == NID_NEW) 1866 break; 1867 1868 f2fs_bug_on(sbi, i->state != NID_NEW); 1869 *nid = i->nid; 1870 i->state = NID_ALLOC; 1871 nm_i->fcnt--; 1872 spin_unlock(&nm_i->free_nid_list_lock); 1873 return true; 1874 } 1875 spin_unlock(&nm_i->free_nid_list_lock); 1876 1877 /* Let's scan nat pages and its caches to get free nids */ 1878 mutex_lock(&nm_i->build_lock); 1879 build_free_nids(sbi); 1880 mutex_unlock(&nm_i->build_lock); 1881 goto retry; 1882 } 1883 1884 /* 1885 * alloc_nid() should be called prior to this function. 1886 */ 1887 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) 1888 { 1889 struct f2fs_nm_info *nm_i = NM_I(sbi); 1890 struct free_nid *i; 1891 1892 spin_lock(&nm_i->free_nid_list_lock); 1893 i = __lookup_free_nid_list(nm_i, nid); 1894 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1895 __del_from_free_nid_list(nm_i, i); 1896 spin_unlock(&nm_i->free_nid_list_lock); 1897 1898 kmem_cache_free(free_nid_slab, i); 1899 } 1900 1901 /* 1902 * alloc_nid() should be called prior to this function. 1903 */ 1904 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) 1905 { 1906 struct f2fs_nm_info *nm_i = NM_I(sbi); 1907 struct free_nid *i; 1908 bool need_free = false; 1909 1910 if (!nid) 1911 return; 1912 1913 spin_lock(&nm_i->free_nid_list_lock); 1914 i = __lookup_free_nid_list(nm_i, nid); 1915 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1916 if (!available_free_memory(sbi, FREE_NIDS)) { 1917 __del_from_free_nid_list(nm_i, i); 1918 need_free = true; 1919 } else { 1920 i->state = NID_NEW; 1921 nm_i->fcnt++; 1922 } 1923 spin_unlock(&nm_i->free_nid_list_lock); 1924 1925 if (need_free) 1926 kmem_cache_free(free_nid_slab, i); 1927 } 1928 1929 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) 1930 { 1931 struct f2fs_nm_info *nm_i = NM_I(sbi); 1932 struct free_nid *i, *next; 1933 int nr = nr_shrink; 1934 1935 if (nm_i->fcnt <= MAX_FREE_NIDS) 1936 return 0; 1937 1938 if (!mutex_trylock(&nm_i->build_lock)) 1939 return 0; 1940 1941 spin_lock(&nm_i->free_nid_list_lock); 1942 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { 1943 if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) 1944 break; 1945 if (i->state == NID_ALLOC) 1946 continue; 1947 __del_from_free_nid_list(nm_i, i); 1948 kmem_cache_free(free_nid_slab, i); 1949 nm_i->fcnt--; 1950 nr_shrink--; 1951 } 1952 spin_unlock(&nm_i->free_nid_list_lock); 1953 mutex_unlock(&nm_i->build_lock); 1954 1955 return nr - nr_shrink; 1956 } 1957 1958 void recover_inline_xattr(struct inode *inode, struct page *page) 1959 { 1960 void *src_addr, *dst_addr; 1961 size_t inline_size; 1962 struct page *ipage; 1963 struct f2fs_inode *ri; 1964 1965 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 1966 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); 1967 1968 ri = F2FS_INODE(page); 1969 if (!(ri->i_inline & F2FS_INLINE_XATTR)) { 1970 clear_inode_flag(inode, FI_INLINE_XATTR); 1971 goto update_inode; 1972 } 1973 1974 dst_addr = inline_xattr_addr(ipage); 1975 src_addr = inline_xattr_addr(page); 1976 inline_size = inline_xattr_size(inode); 1977 1978 f2fs_wait_on_page_writeback(ipage, NODE, true); 1979 memcpy(dst_addr, src_addr, inline_size); 1980 update_inode: 1981 update_inode(inode, ipage); 1982 f2fs_put_page(ipage, 1); 1983 } 1984 1985 void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1986 { 1987 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1988 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1989 nid_t new_xnid = nid_of_node(page); 1990 struct node_info ni; 1991 1992 /* 1: invalidate the previous xattr nid */ 1993 if (!prev_xnid) 1994 goto recover_xnid; 1995 1996 /* Deallocate node address */ 1997 get_node_info(sbi, prev_xnid, &ni); 1998 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 1999 invalidate_blocks(sbi, ni.blk_addr); 2000 dec_valid_node_count(sbi, inode); 2001 set_node_addr(sbi, &ni, NULL_ADDR, false); 2002 2003 recover_xnid: 2004 /* 2: allocate new xattr nid */ 2005 if (unlikely(!inc_valid_node_count(sbi, inode))) 2006 f2fs_bug_on(sbi, 1); 2007 2008 remove_free_nid(NM_I(sbi), new_xnid); 2009 get_node_info(sbi, new_xnid, &ni); 2010 ni.ino = inode->i_ino; 2011 set_node_addr(sbi, &ni, NEW_ADDR, false); 2012 f2fs_i_xnid_write(inode, new_xnid); 2013 2014 /* 3: update xattr blkaddr */ 2015 refresh_sit_entry(sbi, NEW_ADDR, blkaddr); 2016 set_node_addr(sbi, &ni, blkaddr, false); 2017 } 2018 2019 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 2020 { 2021 struct f2fs_inode *src, *dst; 2022 nid_t ino = ino_of_node(page); 2023 struct node_info old_ni, new_ni; 2024 struct page *ipage; 2025 2026 get_node_info(sbi, ino, &old_ni); 2027 2028 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2029 return -EINVAL; 2030 retry: 2031 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); 2032 if (!ipage) { 2033 congestion_wait(BLK_RW_ASYNC, HZ/50); 2034 goto retry; 2035 } 2036 2037 /* Should not use this inode from free nid list */ 2038 remove_free_nid(NM_I(sbi), ino); 2039 2040 if (!PageUptodate(ipage)) 2041 SetPageUptodate(ipage); 2042 fill_node_footer(ipage, ino, ino, 0, true); 2043 2044 src = F2FS_INODE(page); 2045 dst = F2FS_INODE(ipage); 2046 2047 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); 2048 dst->i_size = 0; 2049 dst->i_blocks = cpu_to_le64(1); 2050 dst->i_links = cpu_to_le32(1); 2051 dst->i_xattr_nid = 0; 2052 dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; 2053 2054 new_ni = old_ni; 2055 new_ni.ino = ino; 2056 2057 if (unlikely(!inc_valid_node_count(sbi, NULL))) 2058 WARN_ON(1); 2059 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 2060 inc_valid_inode_count(sbi); 2061 set_page_dirty(ipage); 2062 f2fs_put_page(ipage, 1); 2063 return 0; 2064 } 2065 2066 int restore_node_summary(struct f2fs_sb_info *sbi, 2067 unsigned int segno, struct f2fs_summary_block *sum) 2068 { 2069 struct f2fs_node *rn; 2070 struct f2fs_summary *sum_entry; 2071 block_t addr; 2072 int bio_blocks = MAX_BIO_BLOCKS(sbi); 2073 int i, idx, last_offset, nrpages; 2074 2075 /* scan the node segment */ 2076 last_offset = sbi->blocks_per_seg; 2077 addr = START_BLOCK(sbi, segno); 2078 sum_entry = &sum->entries[0]; 2079 2080 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 2081 nrpages = min(last_offset - i, bio_blocks); 2082 2083 /* readahead node pages */ 2084 ra_meta_pages(sbi, addr, nrpages, META_POR, true); 2085 2086 for (idx = addr; idx < addr + nrpages; idx++) { 2087 struct page *page = get_tmp_page(sbi, idx); 2088 2089 rn = F2FS_NODE(page); 2090 sum_entry->nid = rn->footer.nid; 2091 sum_entry->version = 0; 2092 sum_entry->ofs_in_node = 0; 2093 sum_entry++; 2094 f2fs_put_page(page, 1); 2095 } 2096 2097 invalidate_mapping_pages(META_MAPPING(sbi), addr, 2098 addr + nrpages); 2099 } 2100 return 0; 2101 } 2102 2103 static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 2104 { 2105 struct f2fs_nm_info *nm_i = NM_I(sbi); 2106 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2107 struct f2fs_journal *journal = curseg->journal; 2108 int i; 2109 2110 down_write(&curseg->journal_rwsem); 2111 for (i = 0; i < nats_in_cursum(journal); i++) { 2112 struct nat_entry *ne; 2113 struct f2fs_nat_entry raw_ne; 2114 nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); 2115 2116 raw_ne = nat_in_journal(journal, i); 2117 2118 ne = __lookup_nat_cache(nm_i, nid); 2119 if (!ne) { 2120 ne = grab_nat_entry(nm_i, nid); 2121 node_info_from_raw_nat(&ne->ni, &raw_ne); 2122 } 2123 __set_nat_cache_dirty(nm_i, ne); 2124 } 2125 update_nats_in_cursum(journal, -i); 2126 up_write(&curseg->journal_rwsem); 2127 } 2128 2129 static void __adjust_nat_entry_set(struct nat_entry_set *nes, 2130 struct list_head *head, int max) 2131 { 2132 struct nat_entry_set *cur; 2133 2134 if (nes->entry_cnt >= max) 2135 goto add_out; 2136 2137 list_for_each_entry(cur, head, set_list) { 2138 if (cur->entry_cnt >= nes->entry_cnt) { 2139 list_add(&nes->set_list, cur->set_list.prev); 2140 return; 2141 } 2142 } 2143 add_out: 2144 list_add_tail(&nes->set_list, head); 2145 } 2146 2147 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 2148 struct nat_entry_set *set) 2149 { 2150 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2151 struct f2fs_journal *journal = curseg->journal; 2152 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; 2153 bool to_journal = true; 2154 struct f2fs_nat_block *nat_blk; 2155 struct nat_entry *ne, *cur; 2156 struct page *page = NULL; 2157 2158 /* 2159 * there are two steps to flush nat entries: 2160 * #1, flush nat entries to journal in current hot data summary block. 2161 * #2, flush nat entries to nat page. 2162 */ 2163 if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) 2164 to_journal = false; 2165 2166 if (to_journal) { 2167 down_write(&curseg->journal_rwsem); 2168 } else { 2169 page = get_next_nat_page(sbi, start_nid); 2170 nat_blk = page_address(page); 2171 f2fs_bug_on(sbi, !nat_blk); 2172 } 2173 2174 /* flush dirty nats in nat entry set */ 2175 list_for_each_entry_safe(ne, cur, &set->entry_list, list) { 2176 struct f2fs_nat_entry *raw_ne; 2177 nid_t nid = nat_get_nid(ne); 2178 int offset; 2179 2180 if (nat_get_blkaddr(ne) == NEW_ADDR) 2181 continue; 2182 2183 if (to_journal) { 2184 offset = lookup_journal_in_cursum(journal, 2185 NAT_JOURNAL, nid, 1); 2186 f2fs_bug_on(sbi, offset < 0); 2187 raw_ne = &nat_in_journal(journal, offset); 2188 nid_in_journal(journal, offset) = cpu_to_le32(nid); 2189 } else { 2190 raw_ne = &nat_blk->entries[nid - start_nid]; 2191 } 2192 raw_nat_from_node_info(raw_ne, &ne->ni); 2193 nat_reset_flag(ne); 2194 __clear_nat_cache_dirty(NM_I(sbi), ne); 2195 if (nat_get_blkaddr(ne) == NULL_ADDR) 2196 add_free_nid(sbi, nid, false); 2197 } 2198 2199 if (to_journal) 2200 up_write(&curseg->journal_rwsem); 2201 else 2202 f2fs_put_page(page, 1); 2203 2204 f2fs_bug_on(sbi, set->entry_cnt); 2205 2206 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 2207 kmem_cache_free(nat_entry_set_slab, set); 2208 } 2209 2210 /* 2211 * This function is called during the checkpointing process. 2212 */ 2213 void flush_nat_entries(struct f2fs_sb_info *sbi) 2214 { 2215 struct f2fs_nm_info *nm_i = NM_I(sbi); 2216 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2217 struct f2fs_journal *journal = curseg->journal; 2218 struct nat_entry_set *setvec[SETVEC_SIZE]; 2219 struct nat_entry_set *set, *tmp; 2220 unsigned int found; 2221 nid_t set_idx = 0; 2222 LIST_HEAD(sets); 2223 2224 if (!nm_i->dirty_nat_cnt) 2225 return; 2226 2227 down_write(&nm_i->nat_tree_lock); 2228 2229 /* 2230 * if there are no enough space in journal to store dirty nat 2231 * entries, remove all entries from journal and merge them 2232 * into nat entry set. 2233 */ 2234 if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 2235 remove_nats_in_journal(sbi); 2236 2237 while ((found = __gang_lookup_nat_set(nm_i, 2238 set_idx, SETVEC_SIZE, setvec))) { 2239 unsigned idx; 2240 set_idx = setvec[found - 1]->set + 1; 2241 for (idx = 0; idx < found; idx++) 2242 __adjust_nat_entry_set(setvec[idx], &sets, 2243 MAX_NAT_JENTRIES(journal)); 2244 } 2245 2246 /* flush dirty nats in nat entry set */ 2247 list_for_each_entry_safe(set, tmp, &sets, set_list) 2248 __flush_nat_entry_set(sbi, set); 2249 2250 up_write(&nm_i->nat_tree_lock); 2251 2252 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2253 } 2254 2255 static int init_node_manager(struct f2fs_sb_info *sbi) 2256 { 2257 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 2258 struct f2fs_nm_info *nm_i = NM_I(sbi); 2259 unsigned char *version_bitmap; 2260 unsigned int nat_segs, nat_blocks; 2261 2262 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 2263 2264 /* segment_count_nat includes pair segment so divide to 2. */ 2265 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 2266 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 2267 2268 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 2269 2270 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2271 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; 2272 nm_i->fcnt = 0; 2273 nm_i->nat_cnt = 0; 2274 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 2275 nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 2276 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; 2277 2278 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 2279 INIT_LIST_HEAD(&nm_i->free_nid_list); 2280 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); 2281 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); 2282 INIT_LIST_HEAD(&nm_i->nat_entries); 2283 2284 mutex_init(&nm_i->build_lock); 2285 spin_lock_init(&nm_i->free_nid_list_lock); 2286 init_rwsem(&nm_i->nat_tree_lock); 2287 2288 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 2289 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 2290 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); 2291 if (!version_bitmap) 2292 return -EFAULT; 2293 2294 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, 2295 GFP_KERNEL); 2296 if (!nm_i->nat_bitmap) 2297 return -ENOMEM; 2298 return 0; 2299 } 2300 2301 int build_node_manager(struct f2fs_sb_info *sbi) 2302 { 2303 int err; 2304 2305 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); 2306 if (!sbi->nm_info) 2307 return -ENOMEM; 2308 2309 err = init_node_manager(sbi); 2310 if (err) 2311 return err; 2312 2313 build_free_nids(sbi); 2314 return 0; 2315 } 2316 2317 void destroy_node_manager(struct f2fs_sb_info *sbi) 2318 { 2319 struct f2fs_nm_info *nm_i = NM_I(sbi); 2320 struct free_nid *i, *next_i; 2321 struct nat_entry *natvec[NATVEC_SIZE]; 2322 struct nat_entry_set *setvec[SETVEC_SIZE]; 2323 nid_t nid = 0; 2324 unsigned int found; 2325 2326 if (!nm_i) 2327 return; 2328 2329 /* destroy free nid list */ 2330 spin_lock(&nm_i->free_nid_list_lock); 2331 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2332 f2fs_bug_on(sbi, i->state == NID_ALLOC); 2333 __del_from_free_nid_list(nm_i, i); 2334 nm_i->fcnt--; 2335 spin_unlock(&nm_i->free_nid_list_lock); 2336 kmem_cache_free(free_nid_slab, i); 2337 spin_lock(&nm_i->free_nid_list_lock); 2338 } 2339 f2fs_bug_on(sbi, nm_i->fcnt); 2340 spin_unlock(&nm_i->free_nid_list_lock); 2341 2342 /* destroy nat cache */ 2343 down_write(&nm_i->nat_tree_lock); 2344 while ((found = __gang_lookup_nat_cache(nm_i, 2345 nid, NATVEC_SIZE, natvec))) { 2346 unsigned idx; 2347 2348 nid = nat_get_nid(natvec[found - 1]) + 1; 2349 for (idx = 0; idx < found; idx++) 2350 __del_from_nat_cache(nm_i, natvec[idx]); 2351 } 2352 f2fs_bug_on(sbi, nm_i->nat_cnt); 2353 2354 /* destroy nat set cache */ 2355 nid = 0; 2356 while ((found = __gang_lookup_nat_set(nm_i, 2357 nid, SETVEC_SIZE, setvec))) { 2358 unsigned idx; 2359 2360 nid = setvec[found - 1]->set + 1; 2361 for (idx = 0; idx < found; idx++) { 2362 /* entry_cnt is not zero, when cp_error was occurred */ 2363 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); 2364 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); 2365 kmem_cache_free(nat_entry_set_slab, setvec[idx]); 2366 } 2367 } 2368 up_write(&nm_i->nat_tree_lock); 2369 2370 kfree(nm_i->nat_bitmap); 2371 sbi->nm_info = NULL; 2372 kfree(nm_i); 2373 } 2374 2375 int __init create_node_manager_caches(void) 2376 { 2377 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2378 sizeof(struct nat_entry)); 2379 if (!nat_entry_slab) 2380 goto fail; 2381 2382 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2383 sizeof(struct free_nid)); 2384 if (!free_nid_slab) 2385 goto destroy_nat_entry; 2386 2387 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", 2388 sizeof(struct nat_entry_set)); 2389 if (!nat_entry_set_slab) 2390 goto destroy_free_nid; 2391 return 0; 2392 2393 destroy_free_nid: 2394 kmem_cache_destroy(free_nid_slab); 2395 destroy_nat_entry: 2396 kmem_cache_destroy(nat_entry_slab); 2397 fail: 2398 return -ENOMEM; 2399 } 2400 2401 void destroy_node_manager_caches(void) 2402 { 2403 kmem_cache_destroy(nat_entry_set_slab); 2404 kmem_cache_destroy(free_nid_slab); 2405 kmem_cache_destroy(nat_entry_slab); 2406 } 2407