1 /* 2 * fs/f2fs/data.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/buffer_head.h> 14 #include <linux/mpage.h> 15 #include <linux/writeback.h> 16 #include <linux/backing-dev.h> 17 #include <linux/blkdev.h> 18 #include <linux/bio.h> 19 #include <linux/prefetch.h> 20 #include <linux/uio.h> 21 22 #include "f2fs.h" 23 #include "node.h" 24 #include "segment.h" 25 #include "trace.h" 26 #include <trace/events/f2fs.h> 27 28 static struct kmem_cache *extent_tree_slab; 29 static struct kmem_cache *extent_node_slab; 30 31 static void f2fs_read_end_io(struct bio *bio, int err) 32 { 33 struct bio_vec *bvec; 34 int i; 35 36 bio_for_each_segment_all(bvec, bio, i) { 37 struct page *page = bvec->bv_page; 38 39 if (!err) { 40 SetPageUptodate(page); 41 } else { 42 ClearPageUptodate(page); 43 SetPageError(page); 44 } 45 unlock_page(page); 46 } 47 bio_put(bio); 48 } 49 50 static void f2fs_write_end_io(struct bio *bio, int err) 51 { 52 struct f2fs_sb_info *sbi = bio->bi_private; 53 struct bio_vec *bvec; 54 int i; 55 56 bio_for_each_segment_all(bvec, bio, i) { 57 struct page *page = bvec->bv_page; 58 59 if (unlikely(err)) { 60 set_page_dirty(page); 61 set_bit(AS_EIO, &page->mapping->flags); 62 f2fs_stop_checkpoint(sbi); 63 } 64 end_page_writeback(page); 65 dec_page_count(sbi, F2FS_WRITEBACK); 66 } 67 68 if (!get_pages(sbi, F2FS_WRITEBACK) && 69 !list_empty(&sbi->cp_wait.task_list)) 70 wake_up(&sbi->cp_wait); 71 72 bio_put(bio); 73 } 74 75 /* 76 * Low-level block read/write IO operations. 77 */ 78 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, 79 int npages, bool is_read) 80 { 81 struct bio *bio; 82 83 /* No failure on bio allocation */ 84 bio = bio_alloc(GFP_NOIO, npages); 85 86 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); 88 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 bio->bi_private = sbi; 90 91 return bio; 92 } 93 94 static void __submit_merged_bio(struct f2fs_bio_info *io) 95 { 96 struct f2fs_io_info *fio = &io->fio; 97 98 if (!io->bio) 99 return; 100 101 if (is_read_io(fio->rw)) 102 trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); 103 else 104 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); 105 106 submit_bio(fio->rw, io->bio); 107 io->bio = NULL; 108 } 109 110 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, 111 enum page_type type, int rw) 112 { 113 enum page_type btype = PAGE_TYPE_OF_BIO(type); 114 struct f2fs_bio_info *io; 115 116 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; 117 118 down_write(&io->io_rwsem); 119 120 /* change META to META_FLUSH in the checkpoint procedure */ 121 if (type >= META_FLUSH) { 122 io->fio.type = META_FLUSH; 123 if (test_opt(sbi, NOBARRIER)) 124 io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO; 125 else 126 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 127 } 128 __submit_merged_bio(io); 129 up_write(&io->io_rwsem); 130 } 131 132 /* 133 * Fill the locked page with data located in the block address. 134 * Return unlocked page. 135 */ 136 int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, 137 struct f2fs_io_info *fio) 138 { 139 struct bio *bio; 140 141 trace_f2fs_submit_page_bio(page, fio); 142 f2fs_trace_ios(page, fio, 0); 143 144 /* Allocate a new bio */ 145 bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); 146 147 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 148 bio_put(bio); 149 f2fs_put_page(page, 1); 150 return -EFAULT; 151 } 152 153 submit_bio(fio->rw, bio); 154 return 0; 155 } 156 157 void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, 158 struct f2fs_io_info *fio) 159 { 160 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 161 struct f2fs_bio_info *io; 162 bool is_read = is_read_io(fio->rw); 163 164 io = is_read ? &sbi->read_io : &sbi->write_io[btype]; 165 166 verify_block_addr(sbi, fio->blk_addr); 167 168 down_write(&io->io_rwsem); 169 170 if (!is_read) 171 inc_page_count(sbi, F2FS_WRITEBACK); 172 173 if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 || 174 io->fio.rw != fio->rw)) 175 __submit_merged_bio(io); 176 alloc_new: 177 if (io->bio == NULL) { 178 int bio_blocks = MAX_BIO_BLOCKS(sbi); 179 180 io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read); 181 io->fio = *fio; 182 } 183 184 if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < 185 PAGE_CACHE_SIZE) { 186 __submit_merged_bio(io); 187 goto alloc_new; 188 } 189 190 io->last_block_in_bio = fio->blk_addr; 191 f2fs_trace_ios(page, fio, 0); 192 193 up_write(&io->io_rwsem); 194 trace_f2fs_submit_page_mbio(page, fio); 195 } 196 197 /* 198 * Lock ordering for the change of data block address: 199 * ->data_page 200 * ->node_page 201 * update block addresses in the node page 202 */ 203 void set_data_blkaddr(struct dnode_of_data *dn) 204 { 205 struct f2fs_node *rn; 206 __le32 *addr_array; 207 struct page *node_page = dn->node_page; 208 unsigned int ofs_in_node = dn->ofs_in_node; 209 210 f2fs_wait_on_page_writeback(node_page, NODE); 211 212 rn = F2FS_NODE(node_page); 213 214 /* Get physical address of data block */ 215 addr_array = blkaddr_in_node(rn); 216 addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 217 set_page_dirty(node_page); 218 } 219 220 int reserve_new_block(struct dnode_of_data *dn) 221 { 222 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 223 224 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 225 return -EPERM; 226 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 227 return -ENOSPC; 228 229 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 230 231 dn->data_blkaddr = NEW_ADDR; 232 set_data_blkaddr(dn); 233 mark_inode_dirty(dn->inode); 234 sync_inode_page(dn); 235 return 0; 236 } 237 238 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 239 { 240 bool need_put = dn->inode_page ? false : true; 241 int err; 242 243 err = get_dnode_of_data(dn, index, ALLOC_NODE); 244 if (err) 245 return err; 246 247 if (dn->data_blkaddr == NULL_ADDR) 248 err = reserve_new_block(dn); 249 if (err || need_put) 250 f2fs_put_dnode(dn); 251 return err; 252 } 253 254 static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, 255 struct extent_info *ei, struct buffer_head *bh_result) 256 { 257 unsigned int blkbits = sb->s_blocksize_bits; 258 size_t max_size = bh_result->b_size; 259 size_t mapped_size; 260 261 clear_buffer_new(bh_result); 262 map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); 263 mapped_size = (ei->fofs + ei->len - pgofs) << blkbits; 264 bh_result->b_size = min(max_size, mapped_size); 265 } 266 267 static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, 268 struct extent_info *ei) 269 { 270 struct f2fs_inode_info *fi = F2FS_I(inode); 271 pgoff_t start_fofs, end_fofs; 272 block_t start_blkaddr; 273 274 read_lock(&fi->ext_lock); 275 if (fi->ext.len == 0) { 276 read_unlock(&fi->ext_lock); 277 return false; 278 } 279 280 stat_inc_total_hit(inode->i_sb); 281 282 start_fofs = fi->ext.fofs; 283 end_fofs = fi->ext.fofs + fi->ext.len - 1; 284 start_blkaddr = fi->ext.blk; 285 286 if (pgofs >= start_fofs && pgofs <= end_fofs) { 287 *ei = fi->ext; 288 stat_inc_read_hit(inode->i_sb); 289 read_unlock(&fi->ext_lock); 290 return true; 291 } 292 read_unlock(&fi->ext_lock); 293 return false; 294 } 295 296 static bool update_extent_info(struct inode *inode, pgoff_t fofs, 297 block_t blkaddr) 298 { 299 struct f2fs_inode_info *fi = F2FS_I(inode); 300 pgoff_t start_fofs, end_fofs; 301 block_t start_blkaddr, end_blkaddr; 302 int need_update = true; 303 304 write_lock(&fi->ext_lock); 305 306 start_fofs = fi->ext.fofs; 307 end_fofs = fi->ext.fofs + fi->ext.len - 1; 308 start_blkaddr = fi->ext.blk; 309 end_blkaddr = fi->ext.blk + fi->ext.len - 1; 310 311 /* Drop and initialize the matched extent */ 312 if (fi->ext.len == 1 && fofs == start_fofs) 313 fi->ext.len = 0; 314 315 /* Initial extent */ 316 if (fi->ext.len == 0) { 317 if (blkaddr != NULL_ADDR) { 318 fi->ext.fofs = fofs; 319 fi->ext.blk = blkaddr; 320 fi->ext.len = 1; 321 } 322 goto end_update; 323 } 324 325 /* Front merge */ 326 if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { 327 fi->ext.fofs--; 328 fi->ext.blk--; 329 fi->ext.len++; 330 goto end_update; 331 } 332 333 /* Back merge */ 334 if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { 335 fi->ext.len++; 336 goto end_update; 337 } 338 339 /* Split the existing extent */ 340 if (fi->ext.len > 1 && 341 fofs >= start_fofs && fofs <= end_fofs) { 342 if ((end_fofs - fofs) < (fi->ext.len >> 1)) { 343 fi->ext.len = fofs - start_fofs; 344 } else { 345 fi->ext.fofs = fofs + 1; 346 fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; 347 fi->ext.len -= fofs - start_fofs + 1; 348 } 349 } else { 350 need_update = false; 351 } 352 353 /* Finally, if the extent is very fragmented, let's drop the cache. */ 354 if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { 355 fi->ext.len = 0; 356 set_inode_flag(fi, FI_NO_EXTENT); 357 need_update = true; 358 } 359 end_update: 360 write_unlock(&fi->ext_lock); 361 return need_update; 362 } 363 364 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 365 struct extent_tree *et, struct extent_info *ei, 366 struct rb_node *parent, struct rb_node **p) 367 { 368 struct extent_node *en; 369 370 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); 371 if (!en) 372 return NULL; 373 374 en->ei = *ei; 375 INIT_LIST_HEAD(&en->list); 376 377 rb_link_node(&en->rb_node, parent, p); 378 rb_insert_color(&en->rb_node, &et->root); 379 et->count++; 380 atomic_inc(&sbi->total_ext_node); 381 return en; 382 } 383 384 static void __detach_extent_node(struct f2fs_sb_info *sbi, 385 struct extent_tree *et, struct extent_node *en) 386 { 387 rb_erase(&en->rb_node, &et->root); 388 et->count--; 389 atomic_dec(&sbi->total_ext_node); 390 391 if (et->cached_en == en) 392 et->cached_en = NULL; 393 } 394 395 static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, 396 nid_t ino) 397 { 398 struct extent_tree *et; 399 400 down_read(&sbi->extent_tree_lock); 401 et = radix_tree_lookup(&sbi->extent_tree_root, ino); 402 if (!et) { 403 up_read(&sbi->extent_tree_lock); 404 return NULL; 405 } 406 atomic_inc(&et->refcount); 407 up_read(&sbi->extent_tree_lock); 408 409 return et; 410 } 411 412 static struct extent_tree *__grab_extent_tree(struct inode *inode) 413 { 414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 415 struct extent_tree *et; 416 nid_t ino = inode->i_ino; 417 418 down_write(&sbi->extent_tree_lock); 419 et = radix_tree_lookup(&sbi->extent_tree_root, ino); 420 if (!et) { 421 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); 422 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); 423 memset(et, 0, sizeof(struct extent_tree)); 424 et->ino = ino; 425 et->root = RB_ROOT; 426 et->cached_en = NULL; 427 rwlock_init(&et->lock); 428 atomic_set(&et->refcount, 0); 429 et->count = 0; 430 sbi->total_ext_tree++; 431 } 432 atomic_inc(&et->refcount); 433 up_write(&sbi->extent_tree_lock); 434 435 return et; 436 } 437 438 static struct extent_node *__lookup_extent_tree(struct extent_tree *et, 439 unsigned int fofs) 440 { 441 struct rb_node *node = et->root.rb_node; 442 struct extent_node *en; 443 444 if (et->cached_en) { 445 struct extent_info *cei = &et->cached_en->ei; 446 447 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) 448 return et->cached_en; 449 } 450 451 while (node) { 452 en = rb_entry(node, struct extent_node, rb_node); 453 454 if (fofs < en->ei.fofs) { 455 node = node->rb_left; 456 } else if (fofs >= en->ei.fofs + en->ei.len) { 457 node = node->rb_right; 458 } else { 459 et->cached_en = en; 460 return en; 461 } 462 } 463 return NULL; 464 } 465 466 static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, 467 struct extent_tree *et, struct extent_node *en) 468 { 469 struct extent_node *prev; 470 struct rb_node *node; 471 472 node = rb_prev(&en->rb_node); 473 if (!node) 474 return NULL; 475 476 prev = rb_entry(node, struct extent_node, rb_node); 477 if (__is_back_mergeable(&en->ei, &prev->ei)) { 478 en->ei.fofs = prev->ei.fofs; 479 en->ei.blk = prev->ei.blk; 480 en->ei.len += prev->ei.len; 481 __detach_extent_node(sbi, et, prev); 482 return prev; 483 } 484 return NULL; 485 } 486 487 static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, 488 struct extent_tree *et, struct extent_node *en) 489 { 490 struct extent_node *next; 491 struct rb_node *node; 492 493 node = rb_next(&en->rb_node); 494 if (!node) 495 return NULL; 496 497 next = rb_entry(node, struct extent_node, rb_node); 498 if (__is_front_mergeable(&en->ei, &next->ei)) { 499 en->ei.len += next->ei.len; 500 __detach_extent_node(sbi, et, next); 501 return next; 502 } 503 return NULL; 504 } 505 506 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 507 struct extent_tree *et, struct extent_info *ei, 508 struct extent_node **den) 509 { 510 struct rb_node **p = &et->root.rb_node; 511 struct rb_node *parent = NULL; 512 struct extent_node *en; 513 514 while (*p) { 515 parent = *p; 516 en = rb_entry(parent, struct extent_node, rb_node); 517 518 if (ei->fofs < en->ei.fofs) { 519 if (__is_front_mergeable(ei, &en->ei)) { 520 f2fs_bug_on(sbi, !den); 521 en->ei.fofs = ei->fofs; 522 en->ei.blk = ei->blk; 523 en->ei.len += ei->len; 524 *den = __try_back_merge(sbi, et, en); 525 return en; 526 } 527 p = &(*p)->rb_left; 528 } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 529 if (__is_back_mergeable(ei, &en->ei)) { 530 f2fs_bug_on(sbi, !den); 531 en->ei.len += ei->len; 532 *den = __try_front_merge(sbi, et, en); 533 return en; 534 } 535 p = &(*p)->rb_right; 536 } else { 537 f2fs_bug_on(sbi, 1); 538 } 539 } 540 541 return __attach_extent_node(sbi, et, ei, parent, p); 542 } 543 544 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 545 struct extent_tree *et, bool free_all) 546 { 547 struct rb_node *node, *next; 548 struct extent_node *en; 549 unsigned int count = et->count; 550 551 node = rb_first(&et->root); 552 while (node) { 553 next = rb_next(node); 554 en = rb_entry(node, struct extent_node, rb_node); 555 556 if (free_all) { 557 spin_lock(&sbi->extent_lock); 558 if (!list_empty(&en->list)) 559 list_del_init(&en->list); 560 spin_unlock(&sbi->extent_lock); 561 } 562 563 if (free_all || list_empty(&en->list)) { 564 __detach_extent_node(sbi, et, en); 565 kmem_cache_free(extent_node_slab, en); 566 } 567 node = next; 568 } 569 570 return count - et->count; 571 } 572 573 static void f2fs_init_extent_tree(struct inode *inode, 574 struct f2fs_extent *i_ext) 575 { 576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 577 struct extent_tree *et; 578 struct extent_node *en; 579 struct extent_info ei; 580 581 if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) 582 return; 583 584 et = __grab_extent_tree(inode); 585 586 write_lock(&et->lock); 587 if (et->count) 588 goto out; 589 590 set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 591 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); 592 593 en = __insert_extent_tree(sbi, et, &ei, NULL); 594 if (en) { 595 et->cached_en = en; 596 597 spin_lock(&sbi->extent_lock); 598 list_add_tail(&en->list, &sbi->extent_list); 599 spin_unlock(&sbi->extent_lock); 600 } 601 out: 602 write_unlock(&et->lock); 603 atomic_dec(&et->refcount); 604 } 605 606 static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 607 struct extent_info *ei) 608 { 609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 610 struct extent_tree *et; 611 struct extent_node *en; 612 613 trace_f2fs_lookup_extent_tree_start(inode, pgofs); 614 615 et = __find_extent_tree(sbi, inode->i_ino); 616 if (!et) 617 return false; 618 619 read_lock(&et->lock); 620 en = __lookup_extent_tree(et, pgofs); 621 if (en) { 622 *ei = en->ei; 623 spin_lock(&sbi->extent_lock); 624 if (!list_empty(&en->list)) 625 list_move_tail(&en->list, &sbi->extent_list); 626 spin_unlock(&sbi->extent_lock); 627 stat_inc_read_hit(sbi->sb); 628 } 629 stat_inc_total_hit(sbi->sb); 630 read_unlock(&et->lock); 631 632 trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); 633 634 atomic_dec(&et->refcount); 635 return en ? true : false; 636 } 637 638 static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, 639 block_t blkaddr) 640 { 641 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 642 struct extent_tree *et; 643 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; 644 struct extent_node *den = NULL; 645 struct extent_info ei, dei; 646 unsigned int endofs; 647 648 trace_f2fs_update_extent_tree(inode, fofs, blkaddr); 649 650 et = __grab_extent_tree(inode); 651 652 write_lock(&et->lock); 653 654 /* 1. lookup and remove existing extent info in cache */ 655 en = __lookup_extent_tree(et, fofs); 656 if (!en) 657 goto update_extent; 658 659 dei = en->ei; 660 __detach_extent_node(sbi, et, en); 661 662 /* 2. if extent can be split more, split and insert the left part */ 663 if (dei.len > 1) { 664 /* insert left part of split extent into cache */ 665 if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { 666 set_extent_info(&ei, dei.fofs, dei.blk, 667 fofs - dei.fofs); 668 en1 = __insert_extent_tree(sbi, et, &ei, NULL); 669 } 670 671 /* insert right part of split extent into cache */ 672 endofs = dei.fofs + dei.len - 1; 673 if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { 674 set_extent_info(&ei, fofs + 1, 675 fofs - dei.fofs + dei.blk, endofs - fofs); 676 en2 = __insert_extent_tree(sbi, et, &ei, NULL); 677 } 678 } 679 680 update_extent: 681 /* 3. update extent in extent cache */ 682 if (blkaddr) { 683 set_extent_info(&ei, fofs, blkaddr, 1); 684 en3 = __insert_extent_tree(sbi, et, &ei, &den); 685 } 686 687 /* 4. update in global extent list */ 688 spin_lock(&sbi->extent_lock); 689 if (en && !list_empty(&en->list)) 690 list_del(&en->list); 691 /* 692 * en1 and en2 split from en, they will become more and more smaller 693 * fragments after splitting several times. So if the length is smaller 694 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. 695 */ 696 if (en1) 697 list_add_tail(&en1->list, &sbi->extent_list); 698 if (en2) 699 list_add_tail(&en2->list, &sbi->extent_list); 700 if (en3) { 701 if (list_empty(&en3->list)) 702 list_add_tail(&en3->list, &sbi->extent_list); 703 else 704 list_move_tail(&en3->list, &sbi->extent_list); 705 } 706 if (den && !list_empty(&den->list)) 707 list_del(&den->list); 708 spin_unlock(&sbi->extent_lock); 709 710 /* 5. release extent node */ 711 if (en) 712 kmem_cache_free(extent_node_slab, en); 713 if (den) 714 kmem_cache_free(extent_node_slab, den); 715 716 write_unlock(&et->lock); 717 atomic_dec(&et->refcount); 718 } 719 720 void f2fs_preserve_extent_tree(struct inode *inode) 721 { 722 struct extent_tree *et; 723 struct extent_info *ext = &F2FS_I(inode)->ext; 724 bool sync = false; 725 726 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) 727 return; 728 729 et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); 730 if (!et) { 731 if (ext->len) { 732 ext->len = 0; 733 update_inode_page(inode); 734 } 735 return; 736 } 737 738 read_lock(&et->lock); 739 if (et->count) { 740 struct extent_node *en; 741 742 if (et->cached_en) { 743 en = et->cached_en; 744 } else { 745 struct rb_node *node = rb_first(&et->root); 746 747 if (!node) 748 node = rb_last(&et->root); 749 en = rb_entry(node, struct extent_node, rb_node); 750 } 751 752 if (__is_extent_same(ext, &en->ei)) 753 goto out; 754 755 *ext = en->ei; 756 sync = true; 757 } else if (ext->len) { 758 ext->len = 0; 759 sync = true; 760 } 761 out: 762 read_unlock(&et->lock); 763 atomic_dec(&et->refcount); 764 765 if (sync) 766 update_inode_page(inode); 767 } 768 769 void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 770 { 771 struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; 772 struct extent_node *en, *tmp; 773 unsigned long ino = F2FS_ROOT_INO(sbi); 774 struct radix_tree_iter iter; 775 void **slot; 776 unsigned int found; 777 unsigned int node_cnt = 0, tree_cnt = 0; 778 779 if (!test_opt(sbi, EXTENT_CACHE)) 780 return; 781 782 if (available_free_memory(sbi, EXTENT_CACHE)) 783 return; 784 785 spin_lock(&sbi->extent_lock); 786 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { 787 if (!nr_shrink--) 788 break; 789 list_del_init(&en->list); 790 } 791 spin_unlock(&sbi->extent_lock); 792 793 down_read(&sbi->extent_tree_lock); 794 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, 795 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 796 unsigned i; 797 798 ino = treevec[found - 1]->ino + 1; 799 for (i = 0; i < found; i++) { 800 struct extent_tree *et = treevec[i]; 801 802 atomic_inc(&et->refcount); 803 write_lock(&et->lock); 804 node_cnt += __free_extent_tree(sbi, et, false); 805 write_unlock(&et->lock); 806 atomic_dec(&et->refcount); 807 } 808 } 809 up_read(&sbi->extent_tree_lock); 810 811 down_write(&sbi->extent_tree_lock); 812 radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, 813 F2FS_ROOT_INO(sbi)) { 814 struct extent_tree *et = (struct extent_tree *)*slot; 815 816 if (!atomic_read(&et->refcount) && !et->count) { 817 radix_tree_delete(&sbi->extent_tree_root, et->ino); 818 kmem_cache_free(extent_tree_slab, et); 819 sbi->total_ext_tree--; 820 tree_cnt++; 821 } 822 } 823 up_write(&sbi->extent_tree_lock); 824 825 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); 826 } 827 828 void f2fs_destroy_extent_tree(struct inode *inode) 829 { 830 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 831 struct extent_tree *et; 832 unsigned int node_cnt = 0; 833 834 if (!test_opt(sbi, EXTENT_CACHE)) 835 return; 836 837 et = __find_extent_tree(sbi, inode->i_ino); 838 if (!et) 839 goto out; 840 841 /* free all extent info belong to this extent tree */ 842 write_lock(&et->lock); 843 node_cnt = __free_extent_tree(sbi, et, true); 844 write_unlock(&et->lock); 845 846 atomic_dec(&et->refcount); 847 848 /* try to find and delete extent tree entry in radix tree */ 849 down_write(&sbi->extent_tree_lock); 850 et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); 851 if (!et) { 852 up_write(&sbi->extent_tree_lock); 853 goto out; 854 } 855 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); 856 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); 857 kmem_cache_free(extent_tree_slab, et); 858 sbi->total_ext_tree--; 859 up_write(&sbi->extent_tree_lock); 860 out: 861 trace_f2fs_destroy_extent_tree(inode, node_cnt); 862 return; 863 } 864 865 void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) 866 { 867 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) 868 f2fs_init_extent_tree(inode, i_ext); 869 870 write_lock(&F2FS_I(inode)->ext_lock); 871 get_extent_info(&F2FS_I(inode)->ext, *i_ext); 872 write_unlock(&F2FS_I(inode)->ext_lock); 873 } 874 875 static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, 876 struct extent_info *ei) 877 { 878 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) 879 return false; 880 881 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) 882 return f2fs_lookup_extent_tree(inode, pgofs, ei); 883 884 return lookup_extent_info(inode, pgofs, ei); 885 } 886 887 void f2fs_update_extent_cache(struct dnode_of_data *dn) 888 { 889 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 890 pgoff_t fofs; 891 892 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 893 894 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 895 return; 896 897 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 898 dn->ofs_in_node; 899 900 if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) 901 return f2fs_update_extent_tree(dn->inode, fofs, 902 dn->data_blkaddr); 903 904 if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) 905 sync_inode_page(dn); 906 } 907 908 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 909 { 910 struct address_space *mapping = inode->i_mapping; 911 struct dnode_of_data dn; 912 struct page *page; 913 struct extent_info ei; 914 int err; 915 struct f2fs_io_info fio = { 916 .type = DATA, 917 .rw = sync ? READ_SYNC : READA, 918 }; 919 920 /* 921 * If sync is false, it needs to check its block allocation. 922 * This is need and triggered by two flows: 923 * gc and truncate_partial_data_page. 924 */ 925 if (!sync) 926 goto search; 927 928 page = find_get_page(mapping, index); 929 if (page && PageUptodate(page)) 930 return page; 931 f2fs_put_page(page, 0); 932 search: 933 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 934 dn.data_blkaddr = ei.blk + index - ei.fofs; 935 goto got_it; 936 } 937 938 set_new_dnode(&dn, inode, NULL, NULL, 0); 939 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 940 if (err) 941 return ERR_PTR(err); 942 f2fs_put_dnode(&dn); 943 944 if (dn.data_blkaddr == NULL_ADDR) 945 return ERR_PTR(-ENOENT); 946 947 /* By fallocate(), there is no cached page, but with NEW_ADDR */ 948 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 949 return ERR_PTR(-EINVAL); 950 951 got_it: 952 page = grab_cache_page(mapping, index); 953 if (!page) 954 return ERR_PTR(-ENOMEM); 955 956 if (PageUptodate(page)) { 957 unlock_page(page); 958 return page; 959 } 960 961 fio.blk_addr = dn.data_blkaddr; 962 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); 963 if (err) 964 return ERR_PTR(err); 965 966 if (sync) { 967 wait_on_page_locked(page); 968 if (unlikely(!PageUptodate(page))) { 969 f2fs_put_page(page, 0); 970 return ERR_PTR(-EIO); 971 } 972 } 973 return page; 974 } 975 976 /* 977 * If it tries to access a hole, return an error. 978 * Because, the callers, functions in dir.c and GC, should be able to know 979 * whether this page exists or not. 980 */ 981 struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 982 { 983 struct address_space *mapping = inode->i_mapping; 984 struct dnode_of_data dn; 985 struct page *page; 986 struct extent_info ei; 987 int err; 988 struct f2fs_io_info fio = { 989 .type = DATA, 990 .rw = READ_SYNC, 991 }; 992 repeat: 993 page = grab_cache_page(mapping, index); 994 if (!page) 995 return ERR_PTR(-ENOMEM); 996 997 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 998 dn.data_blkaddr = ei.blk + index - ei.fofs; 999 goto got_it; 1000 } 1001 1002 set_new_dnode(&dn, inode, NULL, NULL, 0); 1003 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1004 if (err) { 1005 f2fs_put_page(page, 1); 1006 return ERR_PTR(err); 1007 } 1008 f2fs_put_dnode(&dn); 1009 1010 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 1011 f2fs_put_page(page, 1); 1012 return ERR_PTR(-ENOENT); 1013 } 1014 1015 got_it: 1016 if (PageUptodate(page)) 1017 return page; 1018 1019 /* 1020 * A new dentry page is allocated but not able to be written, since its 1021 * new inode page couldn't be allocated due to -ENOSPC. 1022 * In such the case, its blkaddr can be remained as NEW_ADDR. 1023 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. 1024 */ 1025 if (dn.data_blkaddr == NEW_ADDR) { 1026 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 1027 SetPageUptodate(page); 1028 return page; 1029 } 1030 1031 fio.blk_addr = dn.data_blkaddr; 1032 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); 1033 if (err) 1034 return ERR_PTR(err); 1035 1036 lock_page(page); 1037 if (unlikely(!PageUptodate(page))) { 1038 f2fs_put_page(page, 1); 1039 return ERR_PTR(-EIO); 1040 } 1041 if (unlikely(page->mapping != mapping)) { 1042 f2fs_put_page(page, 1); 1043 goto repeat; 1044 } 1045 return page; 1046 } 1047 1048 /* 1049 * Caller ensures that this data page is never allocated. 1050 * A new zero-filled data page is allocated in the page cache. 1051 * 1052 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 1053 * f2fs_unlock_op(). 1054 * Note that, ipage is set only by make_empty_dir. 1055 */ 1056 struct page *get_new_data_page(struct inode *inode, 1057 struct page *ipage, pgoff_t index, bool new_i_size) 1058 { 1059 struct address_space *mapping = inode->i_mapping; 1060 struct page *page; 1061 struct dnode_of_data dn; 1062 int err; 1063 1064 set_new_dnode(&dn, inode, ipage, NULL, 0); 1065 err = f2fs_reserve_block(&dn, index); 1066 if (err) 1067 return ERR_PTR(err); 1068 repeat: 1069 page = grab_cache_page(mapping, index); 1070 if (!page) { 1071 err = -ENOMEM; 1072 goto put_err; 1073 } 1074 1075 if (PageUptodate(page)) 1076 return page; 1077 1078 if (dn.data_blkaddr == NEW_ADDR) { 1079 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 1080 SetPageUptodate(page); 1081 } else { 1082 struct f2fs_io_info fio = { 1083 .type = DATA, 1084 .rw = READ_SYNC, 1085 .blk_addr = dn.data_blkaddr, 1086 }; 1087 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); 1088 if (err) 1089 goto put_err; 1090 1091 lock_page(page); 1092 if (unlikely(!PageUptodate(page))) { 1093 f2fs_put_page(page, 1); 1094 err = -EIO; 1095 goto put_err; 1096 } 1097 if (unlikely(page->mapping != mapping)) { 1098 f2fs_put_page(page, 1); 1099 goto repeat; 1100 } 1101 } 1102 1103 if (new_i_size && 1104 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { 1105 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); 1106 /* Only the directory inode sets new_i_size */ 1107 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); 1108 } 1109 return page; 1110 1111 put_err: 1112 f2fs_put_dnode(&dn); 1113 return ERR_PTR(err); 1114 } 1115 1116 static int __allocate_data_block(struct dnode_of_data *dn) 1117 { 1118 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1119 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 1120 struct f2fs_summary sum; 1121 struct node_info ni; 1122 int seg = CURSEG_WARM_DATA; 1123 pgoff_t fofs; 1124 1125 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1126 return -EPERM; 1127 1128 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); 1129 if (dn->data_blkaddr == NEW_ADDR) 1130 goto alloc; 1131 1132 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 1133 return -ENOSPC; 1134 1135 alloc: 1136 get_node_info(sbi, dn->nid, &ni); 1137 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1138 1139 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) 1140 seg = CURSEG_DIRECT_IO; 1141 1142 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, 1143 &sum, seg); 1144 1145 /* direct IO doesn't use extent cache to maximize the performance */ 1146 set_data_blkaddr(dn); 1147 1148 /* update i_size */ 1149 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 1150 dn->ofs_in_node; 1151 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) 1152 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); 1153 1154 return 0; 1155 } 1156 1157 static void __allocate_data_blocks(struct inode *inode, loff_t offset, 1158 size_t count) 1159 { 1160 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1161 struct dnode_of_data dn; 1162 u64 start = F2FS_BYTES_TO_BLK(offset); 1163 u64 len = F2FS_BYTES_TO_BLK(count); 1164 bool allocated; 1165 u64 end_offset; 1166 1167 while (len) { 1168 f2fs_balance_fs(sbi); 1169 f2fs_lock_op(sbi); 1170 1171 /* When reading holes, we need its node page */ 1172 set_new_dnode(&dn, inode, NULL, NULL, 0); 1173 if (get_dnode_of_data(&dn, start, ALLOC_NODE)) 1174 goto out; 1175 1176 allocated = false; 1177 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1178 1179 while (dn.ofs_in_node < end_offset && len) { 1180 block_t blkaddr; 1181 1182 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 1183 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { 1184 if (__allocate_data_block(&dn)) 1185 goto sync_out; 1186 allocated = true; 1187 } 1188 len--; 1189 start++; 1190 dn.ofs_in_node++; 1191 } 1192 1193 if (allocated) 1194 sync_inode_page(&dn); 1195 1196 f2fs_put_dnode(&dn); 1197 f2fs_unlock_op(sbi); 1198 } 1199 return; 1200 1201 sync_out: 1202 if (allocated) 1203 sync_inode_page(&dn); 1204 f2fs_put_dnode(&dn); 1205 out: 1206 f2fs_unlock_op(sbi); 1207 return; 1208 } 1209 1210 /* 1211 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. 1212 * If original data blocks are allocated, then give them to blockdev. 1213 * Otherwise, 1214 * a. preallocate requested block addresses 1215 * b. do not use extent cache for better performance 1216 * c. give the block addresses to blockdev 1217 */ 1218 static int __get_data_block(struct inode *inode, sector_t iblock, 1219 struct buffer_head *bh_result, int create, bool fiemap) 1220 { 1221 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 1222 unsigned maxblocks = bh_result->b_size >> blkbits; 1223 struct dnode_of_data dn; 1224 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 1225 pgoff_t pgofs, end_offset; 1226 int err = 0, ofs = 1; 1227 struct extent_info ei; 1228 bool allocated = false; 1229 1230 /* Get the page offset from the block offset(iblock) */ 1231 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 1232 1233 if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { 1234 f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); 1235 goto out; 1236 } 1237 1238 if (create) 1239 f2fs_lock_op(F2FS_I_SB(inode)); 1240 1241 /* When reading holes, we need its node page */ 1242 set_new_dnode(&dn, inode, NULL, NULL, 0); 1243 err = get_dnode_of_data(&dn, pgofs, mode); 1244 if (err) { 1245 if (err == -ENOENT) 1246 err = 0; 1247 goto unlock_out; 1248 } 1249 if (dn.data_blkaddr == NEW_ADDR && !fiemap) 1250 goto put_out; 1251 1252 if (dn.data_blkaddr != NULL_ADDR) { 1253 clear_buffer_new(bh_result); 1254 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 1255 } else if (create) { 1256 err = __allocate_data_block(&dn); 1257 if (err) 1258 goto put_out; 1259 allocated = true; 1260 set_buffer_new(bh_result); 1261 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 1262 } else { 1263 goto put_out; 1264 } 1265 1266 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1267 bh_result->b_size = (((size_t)1) << blkbits); 1268 dn.ofs_in_node++; 1269 pgofs++; 1270 1271 get_next: 1272 if (dn.ofs_in_node >= end_offset) { 1273 if (allocated) 1274 sync_inode_page(&dn); 1275 allocated = false; 1276 f2fs_put_dnode(&dn); 1277 1278 set_new_dnode(&dn, inode, NULL, NULL, 0); 1279 err = get_dnode_of_data(&dn, pgofs, mode); 1280 if (err) { 1281 if (err == -ENOENT) 1282 err = 0; 1283 goto unlock_out; 1284 } 1285 if (dn.data_blkaddr == NEW_ADDR && !fiemap) 1286 goto put_out; 1287 1288 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1289 } 1290 1291 if (maxblocks > (bh_result->b_size >> blkbits)) { 1292 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 1293 if (blkaddr == NULL_ADDR && create) { 1294 err = __allocate_data_block(&dn); 1295 if (err) 1296 goto sync_out; 1297 allocated = true; 1298 set_buffer_new(bh_result); 1299 blkaddr = dn.data_blkaddr; 1300 } 1301 /* Give more consecutive addresses for the readahead */ 1302 if (blkaddr == (bh_result->b_blocknr + ofs)) { 1303 ofs++; 1304 dn.ofs_in_node++; 1305 pgofs++; 1306 bh_result->b_size += (((size_t)1) << blkbits); 1307 goto get_next; 1308 } 1309 } 1310 sync_out: 1311 if (allocated) 1312 sync_inode_page(&dn); 1313 put_out: 1314 f2fs_put_dnode(&dn); 1315 unlock_out: 1316 if (create) 1317 f2fs_unlock_op(F2FS_I_SB(inode)); 1318 out: 1319 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 1320 return err; 1321 } 1322 1323 static int get_data_block(struct inode *inode, sector_t iblock, 1324 struct buffer_head *bh_result, int create) 1325 { 1326 return __get_data_block(inode, iblock, bh_result, create, false); 1327 } 1328 1329 static int get_data_block_fiemap(struct inode *inode, sector_t iblock, 1330 struct buffer_head *bh_result, int create) 1331 { 1332 return __get_data_block(inode, iblock, bh_result, create, true); 1333 } 1334 1335 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1336 u64 start, u64 len) 1337 { 1338 return generic_block_fiemap(inode, fieinfo, 1339 start, len, get_data_block_fiemap); 1340 } 1341 1342 static int f2fs_read_data_page(struct file *file, struct page *page) 1343 { 1344 struct inode *inode = page->mapping->host; 1345 int ret = -EAGAIN; 1346 1347 trace_f2fs_readpage(page, DATA); 1348 1349 /* If the file has inline data, try to read it directly */ 1350 if (f2fs_has_inline_data(inode)) 1351 ret = f2fs_read_inline_data(inode, page); 1352 if (ret == -EAGAIN) 1353 ret = mpage_readpage(page, get_data_block); 1354 1355 return ret; 1356 } 1357 1358 static int f2fs_read_data_pages(struct file *file, 1359 struct address_space *mapping, 1360 struct list_head *pages, unsigned nr_pages) 1361 { 1362 struct inode *inode = file->f_mapping->host; 1363 1364 /* If the file has inline data, skip readpages */ 1365 if (f2fs_has_inline_data(inode)) 1366 return 0; 1367 1368 return mpage_readpages(mapping, pages, nr_pages, get_data_block); 1369 } 1370 1371 int do_write_data_page(struct page *page, struct f2fs_io_info *fio) 1372 { 1373 struct inode *inode = page->mapping->host; 1374 struct dnode_of_data dn; 1375 int err = 0; 1376 1377 set_new_dnode(&dn, inode, NULL, NULL, 0); 1378 err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); 1379 if (err) 1380 return err; 1381 1382 fio->blk_addr = dn.data_blkaddr; 1383 1384 /* This page is already truncated */ 1385 if (fio->blk_addr == NULL_ADDR) { 1386 ClearPageUptodate(page); 1387 goto out_writepage; 1388 } 1389 1390 set_page_writeback(page); 1391 1392 /* 1393 * If current allocation needs SSR, 1394 * it had better in-place writes for updated data. 1395 */ 1396 if (unlikely(fio->blk_addr != NEW_ADDR && 1397 !is_cold_data(page) && 1398 need_inplace_update(inode))) { 1399 rewrite_data_page(page, fio); 1400 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1401 trace_f2fs_do_write_data_page(page, IPU); 1402 } else { 1403 write_data_page(page, &dn, fio); 1404 set_data_blkaddr(&dn); 1405 f2fs_update_extent_cache(&dn); 1406 trace_f2fs_do_write_data_page(page, OPU); 1407 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 1408 if (page->index == 0) 1409 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); 1410 } 1411 out_writepage: 1412 f2fs_put_dnode(&dn); 1413 return err; 1414 } 1415 1416 static int f2fs_write_data_page(struct page *page, 1417 struct writeback_control *wbc) 1418 { 1419 struct inode *inode = page->mapping->host; 1420 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1421 loff_t i_size = i_size_read(inode); 1422 const pgoff_t end_index = ((unsigned long long) i_size) 1423 >> PAGE_CACHE_SHIFT; 1424 unsigned offset = 0; 1425 bool need_balance_fs = false; 1426 int err = 0; 1427 struct f2fs_io_info fio = { 1428 .type = DATA, 1429 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, 1430 }; 1431 1432 trace_f2fs_writepage(page, DATA); 1433 1434 if (page->index < end_index) 1435 goto write; 1436 1437 /* 1438 * If the offset is out-of-range of file size, 1439 * this page does not have to be written to disk. 1440 */ 1441 offset = i_size & (PAGE_CACHE_SIZE - 1); 1442 if ((page->index >= end_index + 1) || !offset) 1443 goto out; 1444 1445 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 1446 write: 1447 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1448 goto redirty_out; 1449 if (f2fs_is_drop_cache(inode)) 1450 goto out; 1451 if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && 1452 available_free_memory(sbi, BASE_CHECK)) 1453 goto redirty_out; 1454 1455 /* Dentry blocks are controlled by checkpoint */ 1456 if (S_ISDIR(inode->i_mode)) { 1457 if (unlikely(f2fs_cp_error(sbi))) 1458 goto redirty_out; 1459 err = do_write_data_page(page, &fio); 1460 goto done; 1461 } 1462 1463 /* we should bypass data pages to proceed the kworkder jobs */ 1464 if (unlikely(f2fs_cp_error(sbi))) { 1465 SetPageError(page); 1466 goto out; 1467 } 1468 1469 if (!wbc->for_reclaim) 1470 need_balance_fs = true; 1471 else if (has_not_enough_free_secs(sbi, 0)) 1472 goto redirty_out; 1473 1474 err = -EAGAIN; 1475 f2fs_lock_op(sbi); 1476 if (f2fs_has_inline_data(inode)) 1477 err = f2fs_write_inline_data(inode, page); 1478 if (err == -EAGAIN) 1479 err = do_write_data_page(page, &fio); 1480 f2fs_unlock_op(sbi); 1481 done: 1482 if (err && err != -ENOENT) 1483 goto redirty_out; 1484 1485 clear_cold_data(page); 1486 out: 1487 inode_dec_dirty_pages(inode); 1488 if (err) 1489 ClearPageUptodate(page); 1490 unlock_page(page); 1491 if (need_balance_fs) 1492 f2fs_balance_fs(sbi); 1493 if (wbc->for_reclaim) 1494 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1495 return 0; 1496 1497 redirty_out: 1498 redirty_page_for_writepage(wbc, page); 1499 return AOP_WRITEPAGE_ACTIVATE; 1500 } 1501 1502 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, 1503 void *data) 1504 { 1505 struct address_space *mapping = data; 1506 int ret = mapping->a_ops->writepage(page, wbc); 1507 mapping_set_error(mapping, ret); 1508 return ret; 1509 } 1510 1511 static int f2fs_write_data_pages(struct address_space *mapping, 1512 struct writeback_control *wbc) 1513 { 1514 struct inode *inode = mapping->host; 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1516 bool locked = false; 1517 int ret; 1518 long diff; 1519 1520 trace_f2fs_writepages(mapping->host, wbc, DATA); 1521 1522 /* deal with chardevs and other special file */ 1523 if (!mapping->a_ops->writepage) 1524 return 0; 1525 1526 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 1527 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && 1528 available_free_memory(sbi, DIRTY_DENTS)) 1529 goto skip_write; 1530 1531 /* during POR, we don't need to trigger writepage at all. */ 1532 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1533 goto skip_write; 1534 1535 diff = nr_pages_to_write(sbi, DATA, wbc); 1536 1537 if (!S_ISDIR(inode->i_mode)) { 1538 mutex_lock(&sbi->writepages); 1539 locked = true; 1540 } 1541 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1542 if (locked) 1543 mutex_unlock(&sbi->writepages); 1544 1545 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1546 1547 remove_dirty_dir_inode(inode); 1548 1549 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1550 return ret; 1551 1552 skip_write: 1553 wbc->pages_skipped += get_dirty_pages(inode); 1554 return 0; 1555 } 1556 1557 static void f2fs_write_failed(struct address_space *mapping, loff_t to) 1558 { 1559 struct inode *inode = mapping->host; 1560 1561 if (to > inode->i_size) { 1562 truncate_pagecache(inode, inode->i_size); 1563 truncate_blocks(inode, inode->i_size, true); 1564 } 1565 } 1566 1567 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 1568 loff_t pos, unsigned len, unsigned flags, 1569 struct page **pagep, void **fsdata) 1570 { 1571 struct inode *inode = mapping->host; 1572 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1573 struct page *page, *ipage; 1574 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 1575 struct dnode_of_data dn; 1576 int err = 0; 1577 1578 trace_f2fs_write_begin(inode, pos, len, flags); 1579 1580 f2fs_balance_fs(sbi); 1581 1582 /* 1583 * We should check this at this moment to avoid deadlock on inode page 1584 * and #0 page. The locking rule for inline_data conversion should be: 1585 * lock_page(page #0) -> lock_page(inode_page) 1586 */ 1587 if (index != 0) { 1588 err = f2fs_convert_inline_inode(inode); 1589 if (err) 1590 goto fail; 1591 } 1592 repeat: 1593 page = grab_cache_page_write_begin(mapping, index, flags); 1594 if (!page) { 1595 err = -ENOMEM; 1596 goto fail; 1597 } 1598 1599 *pagep = page; 1600 1601 f2fs_lock_op(sbi); 1602 1603 /* check inline_data */ 1604 ipage = get_node_page(sbi, inode->i_ino); 1605 if (IS_ERR(ipage)) { 1606 err = PTR_ERR(ipage); 1607 goto unlock_fail; 1608 } 1609 1610 set_new_dnode(&dn, inode, ipage, ipage, 0); 1611 1612 if (f2fs_has_inline_data(inode)) { 1613 if (pos + len <= MAX_INLINE_DATA) { 1614 read_inline_data(page, ipage); 1615 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); 1616 sync_inode_page(&dn); 1617 goto put_next; 1618 } 1619 err = f2fs_convert_inline_page(&dn, page); 1620 if (err) 1621 goto put_fail; 1622 } 1623 err = f2fs_reserve_block(&dn, index); 1624 if (err) 1625 goto put_fail; 1626 put_next: 1627 f2fs_put_dnode(&dn); 1628 f2fs_unlock_op(sbi); 1629 1630 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 1631 return 0; 1632 1633 f2fs_wait_on_page_writeback(page, DATA); 1634 1635 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { 1636 unsigned start = pos & (PAGE_CACHE_SIZE - 1); 1637 unsigned end = start + len; 1638 1639 /* Reading beyond i_size is simple: memset to zero */ 1640 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); 1641 goto out; 1642 } 1643 1644 if (dn.data_blkaddr == NEW_ADDR) { 1645 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 1646 } else { 1647 struct f2fs_io_info fio = { 1648 .type = DATA, 1649 .rw = READ_SYNC, 1650 .blk_addr = dn.data_blkaddr, 1651 }; 1652 err = f2fs_submit_page_bio(sbi, page, &fio); 1653 if (err) 1654 goto fail; 1655 1656 lock_page(page); 1657 if (unlikely(!PageUptodate(page))) { 1658 f2fs_put_page(page, 1); 1659 err = -EIO; 1660 goto fail; 1661 } 1662 if (unlikely(page->mapping != mapping)) { 1663 f2fs_put_page(page, 1); 1664 goto repeat; 1665 } 1666 } 1667 out: 1668 SetPageUptodate(page); 1669 clear_cold_data(page); 1670 return 0; 1671 1672 put_fail: 1673 f2fs_put_dnode(&dn); 1674 unlock_fail: 1675 f2fs_unlock_op(sbi); 1676 f2fs_put_page(page, 1); 1677 fail: 1678 f2fs_write_failed(mapping, pos + len); 1679 return err; 1680 } 1681 1682 static int f2fs_write_end(struct file *file, 1683 struct address_space *mapping, 1684 loff_t pos, unsigned len, unsigned copied, 1685 struct page *page, void *fsdata) 1686 { 1687 struct inode *inode = page->mapping->host; 1688 1689 trace_f2fs_write_end(inode, pos, len, copied); 1690 1691 set_page_dirty(page); 1692 1693 if (pos + copied > i_size_read(inode)) { 1694 i_size_write(inode, pos + copied); 1695 mark_inode_dirty(inode); 1696 update_inode_page(inode); 1697 } 1698 1699 f2fs_put_page(page, 1); 1700 return copied; 1701 } 1702 1703 static int check_direct_IO(struct inode *inode, struct iov_iter *iter, 1704 loff_t offset) 1705 { 1706 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 1707 1708 if (iov_iter_rw(iter) == READ) 1709 return 0; 1710 1711 if (offset & blocksize_mask) 1712 return -EINVAL; 1713 1714 if (iov_iter_alignment(iter) & blocksize_mask) 1715 return -EINVAL; 1716 1717 return 0; 1718 } 1719 1720 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 1721 loff_t offset) 1722 { 1723 struct file *file = iocb->ki_filp; 1724 struct address_space *mapping = file->f_mapping; 1725 struct inode *inode = mapping->host; 1726 size_t count = iov_iter_count(iter); 1727 int err; 1728 1729 /* we don't need to use inline_data strictly */ 1730 if (f2fs_has_inline_data(inode)) { 1731 err = f2fs_convert_inline_inode(inode); 1732 if (err) 1733 return err; 1734 } 1735 1736 if (check_direct_IO(inode, iter, offset)) 1737 return 0; 1738 1739 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1740 1741 if (iov_iter_rw(iter) == WRITE) 1742 __allocate_data_blocks(inode, offset, count); 1743 1744 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); 1745 if (err < 0 && iov_iter_rw(iter) == WRITE) 1746 f2fs_write_failed(mapping, offset + count); 1747 1748 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); 1749 1750 return err; 1751 } 1752 1753 void f2fs_invalidate_page(struct page *page, unsigned int offset, 1754 unsigned int length) 1755 { 1756 struct inode *inode = page->mapping->host; 1757 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1758 1759 if (inode->i_ino >= F2FS_ROOT_INO(sbi) && 1760 (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)) 1761 return; 1762 1763 if (PageDirty(page)) { 1764 if (inode->i_ino == F2FS_META_INO(sbi)) 1765 dec_page_count(sbi, F2FS_DIRTY_META); 1766 else if (inode->i_ino == F2FS_NODE_INO(sbi)) 1767 dec_page_count(sbi, F2FS_DIRTY_NODES); 1768 else 1769 inode_dec_dirty_pages(inode); 1770 } 1771 ClearPagePrivate(page); 1772 } 1773 1774 int f2fs_release_page(struct page *page, gfp_t wait) 1775 { 1776 /* If this is dirty page, keep PagePrivate */ 1777 if (PageDirty(page)) 1778 return 0; 1779 1780 ClearPagePrivate(page); 1781 return 1; 1782 } 1783 1784 static int f2fs_set_data_page_dirty(struct page *page) 1785 { 1786 struct address_space *mapping = page->mapping; 1787 struct inode *inode = mapping->host; 1788 1789 trace_f2fs_set_page_dirty(page, DATA); 1790 1791 SetPageUptodate(page); 1792 1793 if (f2fs_is_atomic_file(inode)) { 1794 register_inmem_page(inode, page); 1795 return 1; 1796 } 1797 1798 mark_inode_dirty(inode); 1799 1800 if (!PageDirty(page)) { 1801 __set_page_dirty_nobuffers(page); 1802 update_dirty_page(inode, page); 1803 return 1; 1804 } 1805 return 0; 1806 } 1807 1808 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 1809 { 1810 struct inode *inode = mapping->host; 1811 1812 /* we don't need to use inline_data strictly */ 1813 if (f2fs_has_inline_data(inode)) { 1814 int err = f2fs_convert_inline_inode(inode); 1815 if (err) 1816 return err; 1817 } 1818 return generic_block_bmap(mapping, block, get_data_block); 1819 } 1820 1821 void init_extent_cache_info(struct f2fs_sb_info *sbi) 1822 { 1823 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); 1824 init_rwsem(&sbi->extent_tree_lock); 1825 INIT_LIST_HEAD(&sbi->extent_list); 1826 spin_lock_init(&sbi->extent_lock); 1827 sbi->total_ext_tree = 0; 1828 atomic_set(&sbi->total_ext_node, 0); 1829 } 1830 1831 int __init create_extent_cache(void) 1832 { 1833 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1834 sizeof(struct extent_tree)); 1835 if (!extent_tree_slab) 1836 return -ENOMEM; 1837 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1838 sizeof(struct extent_node)); 1839 if (!extent_node_slab) { 1840 kmem_cache_destroy(extent_tree_slab); 1841 return -ENOMEM; 1842 } 1843 return 0; 1844 } 1845 1846 void destroy_extent_cache(void) 1847 { 1848 kmem_cache_destroy(extent_node_slab); 1849 kmem_cache_destroy(extent_tree_slab); 1850 } 1851 1852 const struct address_space_operations f2fs_dblock_aops = { 1853 .readpage = f2fs_read_data_page, 1854 .readpages = f2fs_read_data_pages, 1855 .writepage = f2fs_write_data_page, 1856 .writepages = f2fs_write_data_pages, 1857 .write_begin = f2fs_write_begin, 1858 .write_end = f2fs_write_end, 1859 .set_page_dirty = f2fs_set_data_page_dirty, 1860 .invalidatepage = f2fs_invalidate_page, 1861 .releasepage = f2fs_release_page, 1862 .direct_IO = f2fs_direct_IO, 1863 .bmap = f2fs_bmap, 1864 }; 1865